saveToCassandra works with Cassandra Lucene plugin? - apache-spark

I am implementing the example on Lucene plugin for Cassandra page (https://github.com/Stratio/cassandra-lucene-index) and when I try to save the data using saveToCassandra I get the exception NoSuchElementException.
If I use CassandraConnector.withSessionDo I am able to add elements into Cassandra and no exception is raised.
The tables:
CREATE KEYSPACE demo
WITH REPLICATION = {'class' : 'SimpleStrategy', 'replication_factor': 1};
USE demo;
CREATE TABLE tweets (
id INT PRIMARY KEY,
user TEXT,
body TEXT,
time TIMESTAMP,
latitude FLOAT,
longitude FLOAT
);
CREATE CUSTOM INDEX tweets_index ON tweets ()
USING 'com.stratio.cassandra.lucene.Index'
WITH OPTIONS = {
'refresh_seconds' : '1',
'schema' : '{
fields : {
id : {type : "integer"},
user : {type : "string"},
body : {type : "text", analyzer : "english"},
time : {type : "date", pattern : "yyyy/MM/dd", sorted : true},
place : {type : "geo_point", latitude:"latitude", longitude:"longitude"}
}
}'
};
The code :
import org.apache.spark.{SparkConf, SparkContext, Logging}
import com.datastax.spark.connector.cql.CassandraConnector
import com.datastax.spark.connector._
object App extends Logging{
def main(args: Array[String]) {
// Get the cassandra IP and create the spark context
val cassandraIP = System.getenv("CASSANDRA_IP");
val sparkConf = new SparkConf(true)
.set("spark.cassandra.connection.host", cassandraIP)
.set("spark.cleaner.ttl", "3600")
.setAppName("Simple Spark Cassandra Example")
val sc = new SparkContext(sparkConf)
// Works
CassandraConnector(sparkConf).withSessionDo { session =>
session.execute("INSERT INTO demo.tweets(id, user, body, time, latitude, longitude) VALUES (19, 'Name', 'Body', '2016-03-19 09:00:00-0300', 39, 39)")
}
// Does not work
val demo = sc.parallelize(Seq((9, "Name", "Body", "2016-03-29 19:00:00-0300", 29, 29)))
// Raises the exception
demo.saveToCassandra("demo", "tweets", SomeColumns("id", "user", "body", "time", "latitude", "longitude"))
}
}
The exception:
16/03/28 14:15:41 INFO CassandraConnector: Connected to Cassandra cluster: Test Cluster
Exception in thread "main" java.util.NoSuchElementException: Column not found in demo.tweets
at com.datastax.spark.connector.cql.StructDef$$anonfun$columnByName$2.apply(Schema.scala:60)
at com.datastax.spark.connector.cql.StructDef$$anonfun$columnByName$2.apply(Schema.scala:60)
at scala.collection.Map$WithDefault.default(Map.scala:52)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:58)
at com.datastax.spark.connector.cql.TableDef$$anonfun$9.apply(Schema.scala:153)
at com.datastax.spark.connector.cql.TableDef$$anonfun$9.apply(Schema.scala:152)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.Map$Map1.foreach(Map.scala:109)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.TableDef.<init>(Schema.scala:152)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchTables$1$2.apply(Schema.scala:283)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchTables$1$2.apply(Schema.scala:271)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.Set$Set4.foreach(Set.scala:137)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.Schema$.com$datastax$spark$connector$cql$Schema$$fetchTables$1(Schema.scala:271)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1$2.apply(Schema.scala:295)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1$2.apply(Schema.scala:294)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.HashSet$HashSet1.foreach(HashSet.scala:153)
at scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:306)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.Schema$.com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1(Schema.scala:294)
at com.datastax.spark.connector.cql.Schema$$anonfun$fromCassandra$1.apply(Schema.scala:307)
at com.datastax.spark.connector.cql.Schema$$anonfun$fromCassandra$1.apply(Schema.scala:304)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withClusterDo$1.apply(CassandraConnector.scala:121)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withClusterDo$1.apply(CassandraConnector.scala:120)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:110)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:109)
at com.datastax.spark.connector.cql.CassandraConnector.closeResourceAfterUse(CassandraConnector.scala:139)
at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:109)
at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:120)
at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:304)
at com.datastax.spark.connector.writer.TableWriter$.apply(TableWriter.scala:275)
at com.datastax.spark.connector.RDDFunctions.saveToCassandra(RDDFunctions.scala:36)
at com.webradar.spci.spark.cassandra.App$.main(App.scala:27)
at com.webradar.spci.spark.cassandra.App.main(App.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) 16/03/28 14:15:41 INFO CassandraConnector: Connected to Cassandra cluster: Test Cluster
Exception in thread "main" java.util.NoSuchElementException: Column not found in demo.tweets
at com.datastax.spark.connector.cql.StructDef$$anonfun$columnByName$2.apply(Schema.scala:60)
at com.datastax.spark.connector.cql.StructDef$$anonfun$columnByName$2.apply(Schema.scala:60)
at scala.collection.Map$WithDefault.default(Map.scala:52)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:58)
at com.datastax.spark.connector.cql.TableDef$$anonfun$9.apply(Schema.scala:153)
at com.datastax.spark.connector.cql.TableDef$$anonfun$9.apply(Schema.scala:152)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.Map$Map1.foreach(Map.scala:109)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.TableDef.<init>(Schema.scala:152)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchTables$1$2.apply(Schema.scala:283)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchTables$1$2.apply(Schema.scala:271)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.Set$Set4.foreach(Set.scala:137)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.Schema$.com$datastax$spark$connector$cql$Schema$$fetchTables$1(Schema.scala:271)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1$2.apply(Schema.scala:295)
at com.datastax.spark.connector.cql.Schema$$anonfun$com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1$2.apply(Schema.scala:294)
at scala.collection.TraversableLike$WithFilter$$anonfun$map$2.apply(TraversableLike.scala:722)
at scala.collection.immutable.HashSet$HashSet1.foreach(HashSet.scala:153)
at scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:306)
at scala.collection.TraversableLike$WithFilter.map(TraversableLike.scala:721)
at com.datastax.spark.connector.cql.Schema$.com$datastax$spark$connector$cql$Schema$$fetchKeyspaces$1(Schema.scala:294)
at com.datastax.spark.connector.cql.Schema$$anonfun$fromCassandra$1.apply(Schema.scala:307)
at com.datastax.spark.connector.cql.Schema$$anonfun$fromCassandra$1.apply(Schema.scala:304)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withClusterDo$1.apply(CassandraConnector.scala:121)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withClusterDo$1.apply(CassandraConnector.scala:120)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:110)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:109)
at com.datastax.spark.connector.cql.CassandraConnector.closeResourceAfterUse(CassandraConnector.scala:139)
at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:109)
at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:120)
at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:304)
at com.datastax.spark.connector.writer.TableWriter$.apply(TableWriter.scala:275)
at com.datastax.spark.connector.RDDFunctions.saveToCassandra(RDDFunctions.scala:36)
at com.webradar.spci.spark.cassandra.App$.main(App.scala:27)
at com.webradar.spci.spark.cassandra.App.main(App.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
EDITED:
Versions
Spark 1.6.0
Cassandra 3.0.3
Lucene plugin 3.0.3.1
For Jar creation I used maven-assembly-plugin to get a fat JAR.
If I remove the custom index I am able to use saveToCassandra

It seems that the problem is caused by a problem in the Cassandra Spark driver, and not in the plugin.
Since CASSANDRA-10217 Cassandra 3.x per-row indexes don't require to be created on a fake column anymore. Thus, from Cassandra 3.x the "CREATE CUSTOM INDEX %s ON %s(%s)" column-based syntax is replaced with the new "CREATE CUSTOM INDEX %s ON %s()" row-based syntax. However, DataStax Spark driver doesn't seem to support this new feature yet.
When "com.datastax.spark.connector.RDDFunctions.saveToCassandra" is called it tries to load the table schema and the index schema related to a table column. Since this new index syntax does not have the fake-column anymore it results in a NoSuchElementException due to an empty column name.
However, saveToCassandra works well if you execute the same example with prior fake column syntax:
CREATE KEYSPACE demo
WITH REPLICATION = {'class' : 'SimpleStrategy', 'replication_factor': 1};
USE demo;
CREATE TABLE tweets (
id INT PRIMARY KEY,
user TEXT,
body TEXT,
time TIMESTAMP,
latitude FLOAT,
longitude FLOAT,
lucene TEXT
);
CREATE CUSTOM INDEX tweets_index ON tweets (lucene)
USING 'com.stratio.cassandra.lucene.Index'
WITH OPTIONS = {
'refresh_seconds' : '1',
'schema' : '{
fields : {
id : {type : "integer"},
user : {type : "string"},
body : {type : "text", analyzer : "english"},
time : {type : "date", pattern : "yyyy/MM/dd", sorted : true},
place : {type : "geo_point", latitude:"latitude", longitude:"longitude"}
}
}'
};

Related

Query CUstom Record using Custom Field in Mulesoft

I want to query a custom record based on the custom field in the custom record in Anypoint Studio. I tried using Search operation in Netsuite but it seems that I unable to write the dataweave code accordingly.
I used the code below
%dw 2.0
output application/java
---
{
customFieldList: {
customField: [{
internalId: "8",
scriptId: "abc"
} as Object {
class : "org.mule.module.netsuite.extension.api.ListOrRecordRef"
} ]
} as Object {
class : "org.mule.module.netsuite.extension.api.CustomFieldList"
},
recType: {
internalId: "10078"
}
} as Object {
class : "org.mule.module.netsuite.extension.api.CustomRecordSearchBasic"
}
I used basic Custom record search.I am getting below error
Message : null
Element : testFlow2/processors/0 # test:test.xml:18 (Transform Message)
Element DSL : <ee:transform doc:name="Transform Message" doc:id="c6dbb0ee-4979-4667-bab1-e0b5d72a6b2b">
<ee:message>
<ee:set-payload>%dw 2.0
output application/java
---
{
customFieldList: {
customField: [{
internalId: "8",
scriptId: "abc"
} as Object {
class : "org.mule.module.netsuite.extension.api.ListOrRecordRef"
} ]
} as Object {
class : "org.mule.module.netsuite.extension.api.CustomFieldList"
},
recType: {
internalId: "10078"
}
} as Object {
class : "org.mule.module.netsuite.extension.api.CustomRecordSearchBasic"
}</ee:set-payload>
</ee:message>
</ee:transform>
Error type : MULE:UNKNOWN
FlowStack : at testFlow2(testFlow2/processors/0 # test:test.xml:18 (Transform Message))
--------------------------------------------------------------------------------
Root Exception stack trace:
java.lang.NullPointerException
at org.mule.weave.v2.module.pojo.exception.CannotInstantiateException.message(CannotInstantiateException.scala:7)
at org.mule.weave.v2.parser.exception.LocatableException.getMessage(LocatableException.scala:18)
at org.mule.weave.v2.parser.exception.LocatableException.getMessage$(LocatableException.scala:15)
at org.mule.weave.v2.module.pojo.exception.CannotInstantiateException.getMessage(CannotInstantiateException.scala:6)
at org.mule.runtime.core.internal.el.dataweave.DataWeaveExpressionLanguageAdaptor$1.handledException(DataWeaveExpressionLanguageAdaptor.java:298)
at org.mule.runtime.core.internal.el.dataweave.DataWeaveExpressionLanguageAdaptor$1.evaluate(DataWeaveExpressionLanguageAdaptor.java:309)
at org.mule.runtime.core.internal.el.DefaultExpressionManagerSession.evaluate(DefaultExpressionManagerSession.java:105)
at com.mulesoft.mule.runtime.core.internal.processor.SetPayloadTransformationTarget.process(SetPayloadTransformationTarget.java:32)
at com.mulesoft.mule.runtime.core.internal.processor.TransformMessageProcessor.lambda$0(TransformMessageProcessor.java:92)
at java.util.Optional.ifPresent(Optional.java:159)
at com.mulesoft.mule.runtime.core.internal.processor.TransformMessageProcessor.process(TransformMessageProcessor.java:92)
at org.mule.runtime.core.api.util.func.CheckedFunction.apply(CheckedFunction.java:25)
at org.mule.runtime.core.api.rx.Exceptions.lambda$checkedFunction$2(Exceptions.java:84)
at org.mule.runtime.core.internal.util.rx.Operators.lambda$nullSafeMap$0(Operators.java:47)
at reactor.core.* (1 elements filtered from stack; set debug level logging or '-Dmule.verbose.exceptions=true' for everything)(Unknown Source)
at org.mule.runtime.core.privileged.processor.chain.* (2 elements filtered from stack; set debug level logging or '-Dmule.verbose.exceptions=true' for everything)(Unknown Source)
at reactor.core.* (6 elements filtered from stack; set debug level logging or '-Dmule.verbose.exceptions=true' for everything)(Unknown Source)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at org.mule.service.scheduler.internal.AbstractRunnableFutureDecorator.doRun(AbstractRunnableFutureDecorator.java:111)
at org.mule.service.scheduler.internal.RunnableFutureDecorator.run(RunnableFutureDecorator.java:54)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
I don't know where I am doing wrong.
Any help is appreciated.
Thank you so much. But I figured it some how. I got the solution to it. But I can accept other solutions too. I just added a class and it worked.
Thank you again.

Error while dropping the index in Cassandra

While creating the new index in cassandra by mistake given same directory path(directory_path) for both the indexes which is might be giving error while dropping the indexes. And able to delete other indexes which is pointing to unique different directory paths. Please share your thoughts that how we can delete the new index which was pointed to same directory path of existing index.
New index
CREATE CUSTOM INDEX inbound_idx ON inventory.inbound (p_index) USING 'com.stratio.cassandra.lucene.Index' WITH OPTIONS = {'schema': '{
fields : {
symbol : {type : "string",case_sensitive: false},
destination : {type : "string",case_sensitive: false},
ticket_id : {type : "string",case_sensitive: false}
}
}', 'refresh_seconds': '1', 'directory_path': '/c05/stratio_index/inventory/inbound'};
CREATE CUSTOM INDEX inbound_idx_new ON inventory.inbound (p_index_new) USING 'com.stratio.cassandra.lucene.Index' WITH OPTIONS = {'schema': '{
fields : {
symbol : {type : "string",case_sensitive: false},
destination : {type : "string",case_sensitive: false},
ticket_id : {type : "string",case_sensitive: false}
}
}', 'refresh_seconds': '1', 'directory_path': '/c05/stratio_index/inventory/inbound'};
Error:
inventory_admin#cqlsh:inventory> drop index inbound_idx_new;
ServerError: java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.NullPointerException
inventory_admin#cqlsh:inventory> drop INDEX inbound_idx;
ServerError: java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.AssertionError: attempted to delete non-existing file inbound
inventory_admin#cqlsh:inventory> drop index inbound_idx1;
inventory_admin#cqlsh:inventory>

Groovy No such property error while creating dynamic json

I am trying to create a dynamic json using csv data in jmeter with JSR223 PreProcessor
Below is the code for the same I am using CSV for data for Id and Name:
def builder = new groovy.json.JsonBuilder()
#groovy.transform.Immutable
class Items {
String Id
String Name
}
def items = new File("Item.txt").readLines().collect { line -> new Items(line.split(",")[0], line.split(",")[1]) }
builder.Rule(
__type: "DataCollectionRule",
DeviceFamily: '${__P(DeviceFamily)}',
RuleId: 0,
Name: 'test-${__time(yyyy-MM-dd'T'hh:mm:ss)}-${__counter(TRUE,)}',
Targets:
[
Groups :
[
[
Id: '${logicalid1_1}',
]
],
Devices:
[
]
],
StartDate: '/Date(${__time(,)})/',
IsEnabled: true,
Priority: 0,
AlertType: 0,
DeliverySchedule:
[
Id : 1,
Name : "Every 30 Minutes",
Period : "30M"
],
CollectionSchedule:
[
Id : 1,
Name : "Every 30 Minutes",
Period : "30M"
],
Items : items.collect() [
[
Id : it.Id,
Name : it.Name
]
],
LocationAccuracy:
[
UseGPS : false,
DistanceInMeters : 100,
ReportToServer : true,
AccuracyInMeters : 10
],
HasDolphinCounters: false,
EnrollmentCertificateId: null,
EnrollmentCertificateName: "",
DatabaseHighWatermark: 28,
DatabaseLowWatermark: 14,
DeviceHighWatermark: 400,
DeviceLowWatermark: 200
)
sampler.getArguments().removeAllArguments()
sampler.addNonEncodedArgument('', builder.toPrettyString(), '')
sampler.setPostBodyRaw(true);
While running the test I am getting HTTP 400 with Bad Request
Log message is as shown below:
2018-09-24 13:49:23,669 ERROR o.a.j.m.JSR223PreProcessor: Problem in JSR223 script, JSR223 PreProcessor
javax.script.ScriptException: groovy.lang.MissingPropertyException: No such property: it for class: Script32
at org.codehaus.groovy.jsr223.GroovyScriptEngineImpl.eval(GroovyScriptEngineImpl.java:320) ~[groovy-all-2.4.13.jar:2.4.13]
at org.codehaus.groovy.jsr223.GroovyCompiledScript.eval(GroovyCompiledScript.java:72) ~[groovy-all-2.4.13.jar:2.4.13]
at javax.script.CompiledScript.eval(Unknown Source) ~[?:1.8.0_151]
at org.apache.jmeter.util.JSR223TestElement.processFileOrScript(JSR223TestElement.java:221) ~[ApacheJMeter_core.jar:4.0 r1823414]
at org.apache.jmeter.modifiers.JSR223PreProcessor.process(JSR223PreProcessor.java:44) [ApacheJMeter_components.jar:4.0 r1823414]
at org.apache.jmeter.threads.JMeterThread.runPreProcessors(JMeterThread.java:849) [ApacheJMeter_core.jar:4.0 r1823414]
at org.apache.jmeter.threads.JMeterThread.executeSamplePackage(JMeterThread.java:467) [ApacheJMeter_core.jar:4.0 r1823414]
at org.apache.jmeter.threads.JMeterThread.processSampler(JMeterThread.java:416) [ApacheJMeter_core.jar:4.0 r1823414]
at org.apache.jmeter.threads.JMeterThread.run(JMeterThread.java:250) [ApacheJMeter_core.jar:4.0 r1823414]
at java.lang.Thread.run(Unknown Source) [?:1.8.0_151]
Caused by: groovy.lang.MissingPropertyException: No such property: it for class: Script32
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.unwrap(ScriptBytecodeAdapter.java:53) ~[groovy-all-2.4.13.jar:2.4.13]
at org.codehaus.groovy.runtime.callsite.PogoGetPropertySite.getProperty(PogoGetPropertySite.java:52) ~[groovy-all-2.4.13.jar:2.4.13]
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callGroovyObjectGetProperty(AbstractCallSite.java:307) ~[groovy-all-2.4.13.jar:2.4.13]
at Script32.run(Script32.groovy:46) ~[?:?]
at org.codehaus.groovy.jsr223.GroovyScriptEngineImpl.eval(GroovyScriptEngineImpl.java:317) ~[groovy-all-2.4.13.jar:2.4.13]
... 9 more
CSV is as follows :
-1,BatteryStatus
-3,AvailableMemory
-5,AvailableStorage
Thank you in advance
You have to use {} for a closure here:
items.collect() { // wrong: [
// ...
} // wrong: ]
Or just items.collect { ... }
With the [] the compiler will see that as a map literal and you get above errors (it is undefined)
I believe you should be copying and pasting the example code more accurately, to wit your "Items" section should look like:
Items: items.collect() {
[
Id : it.Id,
Name: it.Name
]
}
Also be aware that you should not be using JMeter Functions and or Variables in Groovy scripts directly as it conflicts with GString Template feature and makes caching of compiled scripts impossible negatively impacting performance.
So I would also recommend changing:
${__P(DeviceFamily) to props.get('DeviceFamily)`
${__time(yyyy-MM-dd'T'hh:mm:ss)} to new Date().format("yyyy-MM-dd'T'hh:mm:ss")
etc.
See The Groovy Templates Cheat Sheet for JMeter article for more information on Groovy scripting in JMeter if needed

org.apache.spark.sql.AnalysisException: 'write' can not be called on streaming Dataset/DataFrame

I'm trying to write a Spark Structured Streaming (2.3) dataset to ScyllaDB (Cassandra).
My code to write the dataset:
def saveStreamSinkProvider(ds: Dataset[InvoiceItemKafka]) = {
ds
.writeStream
.format("cassandra.ScyllaSinkProvider")
.outputMode(OutputMode.Append)
.queryName("KafkaToCassandraStreamSinkProvider")
.options(
Map(
"keyspace" -> namespace,
"table" -> StreamProviderTableSink,
"checkpointLocation" -> "/tmp/checkpoints"
)
)
.start()
}
My ScyllaDB Streaming Sinks:
class ScyllaSinkProvider extends StreamSinkProvider {
override def createSink(sqlContext: SQLContext,
parameters: Map[String, String],
partitionColumns: Seq[String],
outputMode: OutputMode): ScyllaSink =
new ScyllaSink(parameters)
}
class ScyllaSink(parameters: Map[String, String]) extends Sink {
override def addBatch(batchId: Long, data: DataFrame): Unit =
data.write
.cassandraFormat(
parameters("table"),
parameters("keyspace")
//parameters("cluster")
)
.mode(SaveMode.Append)
.save()
}
However, when I run this code, I receive an exception:
...
[error] +- StreamingExecutionRelation KafkaSource[Subscribe[transactions_load]], [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]
[error] at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:295)
[error] at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:189)
[error] Caused by: org.apache.spark.sql.AnalysisException: 'write' can not be called on streaming Dataset/DataFrame;
[error] at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
[error] at org.apache.spark.sql.Dataset.write(Dataset.scala:3103)
[error] at cassandra.ScyllaSink.addBatch(CassandraDriver.scala:113)
[error] at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$3$$anonfun$apply$16.apply(MicroBatchExecution.scala:477)
...
I have seen a similar question, but that is for CosmosDB - Spark CosmosDB Sink: org.apache.spark.sql.AnalysisException: 'write' can not be called on streaming Dataset/DataFrame
You could convert it to an RDD first and then write:
class ScyllaSink(parameters: Map[String, String]) extends Sink {
override def addBatch(batchId: Long, data: DataFrame): Unit = synchronized {
val schema = data.schema
// this ensures that the same query plan will be used
val rdd: RDD[Row] = df.queryExecution.toRdd.mapPartitions { rows =>
val converter = CatalystTypeConverters.createToScalaConverter(schema)
rows.map(converter(_).asInstanceOf[Row])
}
// write the RDD to Cassandra
}
}

Error in simple spark application

I'm running a simple spark application which does the 'word to vector'. here is my code (this is from the spark website)
import org.apache.spark._
import org.apache.spark.rdd._
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel}
object SimpleApp {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Word2Vector")
val sc = new SparkContext(conf)
val input = sc.textFile("text8").map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
val model = word2vec.fit(input)
val synonyms = model.findSynonyms("china", 40)
for((synonym, cosineSimilarity) <- synonyms) {
println(s"$synonym $cosineSimilarity")
}
// Save and load model
model.save(sc, "myModelPath")
}
}
when running it it gives me the following error message
Exception in thread "main" org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://GXYDEVVM:8020/user/hadoop/YOUR_SPARK_HOME/README.md
at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:285)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:313)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:219)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:217)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:219)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:217)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:219)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:217)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1781)
at org.apache.spark.rdd.RDD.count(RDD.scala:1099)
at org.apache.spark.api.java.JavaRDDLike$class.count(JavaRDDLike.scala:442)
at org.apache.spark.api.java.AbstractJavaRDDLike.count(JavaRDDLike.scala:47)
at SimpleApp.main(SimpleApp.java:13)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:665)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
What is the problem? where this addess is coming from /user/hadoop/YOUR_SPARK_HOME/README.md
This is probably related to your default Spark configuration.
Take a look (or use grep) in the conf directory of your Spark home directory. You should find a spark-env.sh file, which could contain a reference to the strange file.
In fact, Spark is trying to load a file from HDFS (kind of a standard if you run Spark on a cluster : your input / output should be reachable by the master, and the workers slaves). If you use Spark locally you have to configure the Spark Context using setMaster method. Here is my version :
object SparkDemo {
def log[A](key:String)(job : =>A) = {
val start = System.currentTimeMillis
val output = job
println("===> %s in %s seconds"
.format(key, (System.currentTimeMillis - start) / 1000.0))
output
}
def main(args: Array[String]):Unit ={
val modelName ="w2vModel"
val sc = new SparkContext(
new SparkConf()
.setAppName("SparkDemo")
.set("spark.executor.memory", "4G")
.set("spark.driver.maxResultSize", "16G")
.setMaster("spark://192.168.1.53:7077") // ip of the spark master.
// .setMaster("local[2]") // does not work... workers loose contact with the master after 120s
)
// take a look into target folder if you are unsure how the jar is named
// onliner to compile / run : sbt package && sbt run
sc.addJar("./target/scala-2.10/sparkling_2.10-0.1.jar")
val input = sc.textFile("./text8").map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
val model = log("compute model") { word2vec.fit(input) }
log ("save model") { model.save(sc, modelName) }
val synonyms = model.findSynonyms("china", 40)
for((synonym, cosineSimilarity) <- synonyms) {
println(s"$synonym $cosineSimilarity")
}
val model2 = log("reload model") { Word2VecModel.load(sc, modelName) }
}
}

Resources