Java heap space Error while running SVMWithSGD algorithm in MLlib - apache-spark

My fnl2 dataset is of the form:
scala> fnl2.first()
res4: org.apache.spark.mllib.regression.LabeledPoint = (0.0,(612515,[28693,86703,94568,162663,267733,292870,327313,347868,362660,396595,415817,436773,443713,470149,485282,486556,489594,496185,541453,570126,571088],[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]))
scala> fnl2.count()
res5: Long = 775946
Then, I try to build a SVMWithSGD model:
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
val splits = fnl2.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
val numIterations = 100
val model = SVMWithSGD.train(training, numIterations)
But I get the following Java heap size error and then the spark context closes unexpectedly:
15/08/10 09:15:41 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS
15/08/10 09:15:41 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS
15/08/10 09:23:50 ERROR ActorSystemImpl: Uncaught fatal error from thread [sparkDriver-akka.actor.default-dispatcher-30] shutting down ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
at com.google.protobuf_spark.ByteString.toByteArray(ByteString.java:213)
at akka.remote.MessageSerializer$.deserialize(MessageSerializer.scala:24)
at akka.remote.DefaultMessageDispatcher.payload$lzycompute$1(Endpoint.scala:55)
at akka.remote.DefaultMessageDispatcher.payload$1(Endpoint.scala:55)
at akka.remote.DefaultMessageDispatcher.dispatch(Endpoint.scala:73)
at akka.remote.EndpointReader$$anonfun$receive$2.applyOrElse(Endpoint.scala:764)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
15/08/10 09:23:56 ERROR ConnectionManager: Corresponding SendingConnection to ConnectionManagerId(eastspark1,57211) not found
org.apache.spark.SparkException: Job cancelled because SparkContext was shut down
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:694)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:693)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:693)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.postStop(DAGScheduler.scala:1399)
at akka.actor.dungeon.FaultHandling$class.akka$actor$dungeon$FaultHandling$$finishTerminate(FaultHandling.scala:201)
at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:163)
at akka.actor.ActorCell.terminate(ActorCell.scala:338)
at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
at akka.dispatch.Mailbox.run(Mailbox.scala:218)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
scala> 15/08/10 09:23:56 ERROR ConnectionManager: Corresponding SendingConnection to ConnectionManagerId(10.2.0.14,37151) not found
15/08/10 09:23:56 ERROR SendingConnection: Exception while reading SendingConnection to ConnectionManagerId(10.2.0.16,54187)
java.nio.channels.ClosedChannelException
at sun.nio.ch.SocketChannelImpl.ensureReadOpen(SocketChannelImpl.java:257)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:300)
at org.apache.spark.network.SendingConnection.read(Connection.scala:390)
at org.apache.spark.network.ConnectionManager$$anon$7.run(ConnectionManager.scala:199)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
The spark context is based on 12 cores with 8G of memory on each node.
Any ideas?
EDIT
This is the error I get after increasing the diver's memory to 5G: export SPARK_DRIVER_MEMORY="5000M"
scala> val model = SVMWithSGD.train(training, numIterations)
15/08/10 11:33:07 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS
15/08/10 11:33:07 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS
Exception in thread "qtp950243028-158" java.lang.OutOfMemoryError: GC overhead limit exceeded
15/08/10 11:46:26 ERROR ActorSystemImpl: Uncaught fatal error from thread [sparkDriver-akka.actor.default-dispatcher-32] shutting down ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
at com.google.protobuf_spark.ByteString.copyFrom(ByteString.java:90)
at com.google.protobuf_spark.CodedInputStream.readBytes(CodedInputStream.java:289)
at akka.remote.WireFormats$SerializedMessage$Builder.mergeFrom(WireFormats.java:2700)
at akka.remote.WireFormats$SerializedMessage$Builder.mergeFrom(WireFormats.java:2546)
at com.google.protobuf_spark.CodedInputStream.readMessage(CodedInputStream.java:275)
at akka.remote.WireFormats$RemoteEnvelope$Builder.mergeFrom(WireFormats.java:1165)
at akka.remote.WireFormats$RemoteEnvelope$Builder.mergeFrom(WireFormats.java:949)
at com.google.protobuf_spark.CodedInputStream.readMessage(CodedInputStream.java:275)
at akka.remote.WireFormats$AckAndEnvelopeContainer$Builder.mergeFrom(WireFormats.java:479)
at akka.remote.WireFormats$AckAndEnvelopeContainer$Builder.mergeFrom(WireFormats.java:300)
at com.google.protobuf_spark.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:300)
at com.google.protobuf_spark.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:238)
at com.google.protobuf_spark.AbstractMessageLite$Builder.mergeFrom(AbstractMessageLite.java:162)
at com.google.protobuf_spark.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:716)
at com.google.protobuf_spark.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:238)
at com.google.protobuf_spark.AbstractMessageLite$Builder.mergeFrom(AbstractMessageLite.java:153)
at com.google.protobuf_spark.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:709)
at akka.remote.WireFormats$AckAndEnvelopeContainer.parseFrom(WireFormats.java:234)
at akka.remote.transport.AkkaPduProtobufCodec$.decodeMessage(AkkaPduCodec.scala:181)
at akka.remote.EndpointReader.akka$remote$EndpointReader$$tryDecodeMessageAndAck(Endpoint.scala:821)
at akka.remote.EndpointReader$$anonfun$receive$2.applyOrElse(Endpoint.scala:755)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
15/08/10 11:46:45 WARN AbstractNioWorker: Unexpected exception in the selector loop.
java.lang.OutOfMemoryError: Java heap space
at org.jboss.netty.buffer.HeapChannelBuffer.<init>(HeapChannelBuffer.java:42)
at org.jboss.netty.buffer.BigEndianHeapChannelBuffer.<init>(BigEndianHeapChannelBuffer.java:34)
at org.jboss.netty.buffer.ChannelBuffers.buffer(ChannelBuffers.java:134)
at org.jboss.netty.buffer.HeapChannelBufferFactory.getBuffer(HeapChannelBufferFactory.java:69)
at org.jboss.netty.buffer.AbstractChannelBufferFactory.getBuffer(AbstractChannelBufferFactory.java:48)
at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:75)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.processSelectedKeys(AbstractNioWorker.java:472)
at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:333)
at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:35)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

Related

Spark DataFrame Filter function throwing Task not Serializable exception

I am trying to filter data frame/dataset records using filter function with scala anonymous function. but it throws Task not serializable exception can someone please look into code and explain to me what mistake with code.
val spark = SparkSession.builder()
.appName("test data frame")
.master("local[*]")
.getOrCreate()
val user_seq = Seq(
Row(1,"John","London"),
Row(1,"Martin","New York"),
Row(1,"Abhishek","New York")
)
val user_schema = StructType(
Array(
StructField("user_id",IntegerType,true),
StructField("user_name",StringType,true),
StructField("user_city",StringType,true)
))
var user_df = spark.createDataFrame(spark.sparkContext.parallelize(user_seq),user_schema)
var user_rdd = user_df.filter((item)=>{
return item.getString(2) == "New York"
})
user_rdd.count();
I can see below exception on console. when I am trying to filter data with ColumnName its working fine.
objc[48765]: Class JavaLaunchHelper is implemented in both /Library/Java/JavaVirtualMachines/jdk1.8.0_144.jdk/Contents/Home/bin/java (0x1059db4c0) and /Library/Java/JavaVirtualMachines/jdk1.8.0_144.jdk/Contents/Home/jre/lib/libinstrument.dylib (0x105a5f4e0). One of the two will be used. Which one is undefined.
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/07/18 20:10:09 INFO SparkContext: Running Spark version 2.4.6
20/07/18 20:10:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/07/18 20:10:09 INFO SparkContext: Submitted application: test data frame
20/07/18 20:10:09 INFO SecurityManager: Changing view acls groups to:
20/07/18 20:10:09 INFO SecurityManager: Changing modify acls groups to:
20/07/18 20:10:12 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
20/07/18 20:10:12 INFO ContextCleaner: Cleaned accumulator 0
20/07/18 20:10:13 INFO CodeGenerator: Code generated in 170.789451 ms
20/07/18 20:10:13 INFO CodeGenerator: Code generated in 17.729004 ms
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:416)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:406)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:163)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1.apply(RDD.scala:872)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1.apply(RDD.scala:871)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.mapPartitionsWithIndex(RDD.scala:871)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:630)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:391)
at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:151)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:627)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296)
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2836)
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2835)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
at org.apache.spark.sql.Dataset.count(Dataset.scala:2835)
at DataFrameTest$.main(DataFrameTest.scala:65)
at DataFrameTest.main(DataFrameTest.scala)
Caused by: java.io.NotSerializableException: java.lang.Object
Serialization stack:
- object not serializable (class: java.lang.Object, value: java.lang.Object#cec590c)
- field (class: DataFrameTest$$anonfun$1, name: nonLocalReturnKey1$1, type: class java.lang.Object)
- object (class DataFrameTest$$anonfun$1, <function1>)
- element of array (index: 1)
- array (class [Ljava.lang.Object;, size 5)
- field (class: org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13, name: references$1, type: class [Ljava.lang.Object;)
- object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13, <function2>)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:413)
... 48 more
20/07/18 20:10:13 INFO SparkContext: Invoking stop() from shutdown hook
20/07/18 20:10:13 INFO SparkUI: Stopped Spark web UI at http://192.168.31.239:4040
20/07/18 20:10:13 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/07/18 20:10:13 INFO MemoryStore: MemoryStore cleared
20/07/18 20:10:13 INFO BlockManager: BlockManager stopped
20/07/18 20:10:13 INFO BlockManagerMaster: BlockManagerMaster stopped
20/07/18 20:10:13 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/07/18 20:10:13 INFO SparkContext: Successfully stopped SparkContext
20/07/18 20:10:13 INFO ShutdownHookManager: Shutdown hook called
20/07/18 20:10:13 INFO ShutdownHookManager: Deleting directory /private/var/folders/33/3n6vtfs54mdb7x6882fyqy4mccfmvg/T/spark-3e071448-7ad7-47b8-bf70-68ab74721aa2
Process finished with exit code 1
Remove return keyword in below line.
Change below code
var user_rdd = user_df.filter((item)=>{
return item.getString(2) == "New York"
})
with below line
var user_rdd = user_df.filter(_.getString(2) == "New York")
or
user_df.filter($"user_city" === "New York").count
Also refactor your code like below.
val df = Seq((1,"John","London"),(1,"Martin","New York"),(1,"Abhishek","New York"))
.toDF("user_id","user_name","user_city")
df.filter($"user_city" === "New York").count

Though I have setMaster as local, my spark application gives error

I have the following application (I am starting and stopping spark) in Windows. I use Scala-IDE(Eclipse). I get "A master URL must be set in your configuration" error even though I have set it here. I use spark-2.4.4 version.
Can someone please help me to fix this issue.
import org.apache.spark._;
import org.apache.spark.sql._;
object SampleApp {
def main(args: Array[String]) {
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("Simple Application")
val sc = new SparkContext(conf)
sc.stop()
}
}
The error is:
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
19/10/28 22:58:56 INFO SparkContext: Running Spark version 2.4.4
19/10/28 22:58:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
19/10/28 22:58:56 ERROR SparkContext: Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
at org.apache.spark.SparkContext.<init>(SparkContext.scala:368)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$5(SparkSession.scala:935)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at com.spark.renga.SampleApp$.main(SampleApp.scala:8)
at com.spark.renga.SampleApp.main(SampleApp.scala)
19/10/28 22:58:56 ERROR Utils: Uncaught exception in thread main
java.lang.NullPointerException
at org.apache.spark.SparkContext.postApplicationEnd(SparkContext.scala:2416)
at org.apache.spark.SparkContext.$anonfun$stop$2(SparkContext.scala:1931)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1340)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1931)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:585)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$5(SparkSession.scala:935)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at com.spark.renga.SampleApp$.main(SampleApp.scala:8)
at com.spark.renga.SampleApp.main(SampleApp.scala)
19/10/28 22:58:56 INFO SparkContext: Successfully stopped SparkContext
Exception in thread "main" org.apache.spark.SparkException: A master URL must be set in your configuration
at org.apache.spark.SparkContext.<init>(SparkContext.scala:368)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$5(SparkSession.scala:935)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at com.spark.renga.SampleApp$.main(SampleApp.scala:8)
at com.spark.renga.SampleApp.main(SampleApp.scala)
if you are using version 2.4.4 try this:
import org.apache.spark.sql.SparkSession
object SampleApp {
def main(args: Array[String]) {
val spark = SparkSession
.builder
.master("local[*]")
.appName("test")
.getOrCreate()
println(spark.sparkContext.version)
spark.stop()
}
}

Apache Cassandra 3.11.2 - errors on startup

I have suddenly encountered some errors (Java exceptions) on startup of my Apache Cassandra 3.11.2 database on Windows 10. I didn't have such errors before on that version of the database. Here is an excerpt from my debug.log file:
DEBUG [SSTableBatchOpen:1] 2018-05-21 13:42:39,806 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\IndexInfo-9f5c6374d48532299a0a5094af9ad1e3\mc-53-big (0,139KiB)
ERROR [SSTableBatchOpen:1] 2018-05-21 13:42:40,092 DebuggableThreadPoolExecutor.java:239 - Error in ThreadPoolExecutor
java.lang.NoClassDefFoundError: com/github/benmanes/caffeine/cache/LocalCacheFactory$WISWR
at com.github.benmanes.caffeine.cache.BoundedLocalCache$BoundedLocalManualCache.<init>(BoundedLocalCache.java:2727) ~[caffeine-2.2.6.jar:na]
at com.github.benmanes.caffeine.cache.BoundedLocalCache$BoundedLocalLoadingCache.<init>(BoundedLocalCache.java:2944) ~[caffeine-2.2.6.jar:na]
at com.github.benmanes.caffeine.cache.Caffeine.build(Caffeine.java:830) ~[caffeine-2.2.6.jar:na]
at org.apache.cassandra.cache.ChunkCache.<init>(ChunkCache.java:145) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.cache.ChunkCache.<clinit>(ChunkCache.java:47) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:763) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:737) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:517) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader$3.run(SSTableReader.java:564) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
Caused by: java.lang.ClassNotFoundException: com.github.benmanes.caffeine.cache.LocalCacheFactory$WISWR
at java.net.URLClassLoader$1.run(URLClassLoader.java:370) ~[na:1.8.0_60]
at java.net.URLClassLoader$1.run(URLClassLoader.java:362) ~[na:1.8.0_60]
at java.security.AccessController.doPrivileged(Native Method) ~[na:1.8.0_60]
at java.net.URLClassLoader.findClass(URLClassLoader.java:361) ~[na:1.8.0_60]
at java.lang.ClassLoader.loadClass(ClassLoader.java:424) ~[na:1.8.0_60]
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331) ~[na:1.8.0_60]
at java.lang.ClassLoader.loadClass(ClassLoader.java:357) ~[na:1.8.0_60]
... 16 common frames omitted
Caused by: java.util.zip.ZipException: invalid distance too far back
at java.util.zip.InflaterInputStream.read(InflaterInputStream.java:164) ~[na:1.8.0_60]
at sun.misc.Resource.getBytes(Resource.java:124) ~[na:1.8.0_60]
at java.net.URLClassLoader.defineClass(URLClassLoader.java:462) ~[na:1.8.0_60]
at java.net.URLClassLoader.access$100(URLClassLoader.java:73) ~[na:1.8.0_60]
at java.net.URLClassLoader$1.run(URLClassLoader.java:368) ~[na:1.8.0_60]
... 22 common frames omitted
DEBUG [main] 2018-05-21 13:42:40,119 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.IndexInfo
DEBUG [main] 2018-05-21 13:42:40,136 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,138 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.IndexInfo
INFO [main] 2018-05-21 13:42:40,154 ColumnFamilyStore.java:411 - Initializing system.batches
INFO [main] 2018-05-21 13:42:40,166 ColumnFamilyStore.java:411 - Initializing system.paxos
DEBUG [main] 2018-05-21 13:42:40,168 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.paxos
DEBUG [main] 2018-05-21 13:42:40,168 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,168 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.paxos
INFO [main] 2018-05-21 13:42:40,184 ColumnFamilyStore.java:411 - Initializing system.local
DEBUG [SSTableBatchOpen:1] 2018-05-21 13:42:40,189 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\local-7ad54392bcdd35a684174e047860b377\mc-187-big (5,049KiB)
ERROR [SSTableBatchOpen:1] 2018-05-21 13:42:40,189 DebuggableThreadPoolExecutor.java:239 - Error in ThreadPoolExecutor
java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:763) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:737) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:517) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader$3.run(SSTableReader.java:564) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
DEBUG [main] 2018-05-21 13:42:40,190 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.local
DEBUG [main] 2018-05-21 13:42:40,190 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,190 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.local
INFO [main] 2018-05-21 13:42:40,201 ColumnFamilyStore.java:411 - Initializing system.peers
DEBUG [main] 2018-05-21 13:42:40,202 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.peers
DEBUG [main] 2018-05-21 13:42:40,202 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,202 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.peers
INFO [main] 2018-05-21 13:42:40,211 ColumnFamilyStore.java:411 - Initializing system.peer_events
DEBUG [main] 2018-05-21 13:42:40,213 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.peer_events
DEBUG [main] 2018-05-21 13:42:40,213 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,213 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.peer_events
INFO [main] 2018-05-21 13:42:40,223 ColumnFamilyStore.java:411 - Initializing system.range_xfers
DEBUG [main] 2018-05-21 13:42:40,224 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.range_xfers
DEBUG [main] 2018-05-21 13:42:40,224 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,224 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.range_xfers
INFO [main] 2018-05-21 13:42:40,237 ColumnFamilyStore.java:411 - Initializing system.compaction_history
DEBUG [SSTableBatchOpen:1] 2018-05-21 13:42:40,244 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\compaction_history-b4dbb7b4dc493fb5b3bfce6e434832ca\mc-32-big (3,121KiB)
DEBUG [SSTableBatchOpen:2] 2018-05-21 13:42:40,244 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\compaction_history-b4dbb7b4dc493fb5b3bfce6e434832ca\mc-33-big (0,354KiB)
ERROR [SSTableBatchOpen:2] 2018-05-21 13:42:40,244 DebuggableThreadPoolExecutor.java:239 - Error in ThreadPoolExecutor
java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:763) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:737) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:517) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader$3.run(SSTableReader.java:564) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
ERROR [SSTableBatchOpen:1] 2018-05-21 13:42:40,245 DebuggableThreadPoolExecutor.java:239 - Error in ThreadPoolExecutor
java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:763) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:737) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:517) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader$3.run(SSTableReader.java:564) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
DEBUG [main] 2018-05-21 13:42:40,246 DiskBoundaryManager.java:53 - Refreshing disk boundary cache for system.compaction_history
DEBUG [main] 2018-05-21 13:42:40,246 DiskBoundaryManager.java:92 - Got local ranges [] (ringVersion = 0)
DEBUG [main] 2018-05-21 13:42:40,246 DiskBoundaryManager.java:56 - Updating boundaries from null to DiskBoundaries{directories=[DataDirectory{location=C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data}], positions=null, ringVersion=0, directoriesVersion=0} for system.compaction_history
INFO [main] 2018-05-21 13:42:40,262 ColumnFamilyStore.java:411 - Initializing system.sstable_activity
DEBUG [SSTableBatchOpen:2] 2018-05-21 13:42:40,268 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\sstable_activity-5a1ff267ace03f128563cfae6103c65e\mc-40-big (1,243KiB)
DEBUG [SSTableBatchOpen:1] 2018-05-21 13:42:40,268 SSTableReader.java:504 - Opening C:\Users\Michał\Downloads\apache-cassandra-3.11.2\data\data\system\sstable_activity-5a1ff267ace03f128563cfae6103c65e\mc-39-big (0,778KiB)
ERROR [SSTableBatchOpen:2] 2018-05-21 13:42:40,269 DebuggableThreadPoolExecutor.java:239 - Error in ThreadPoolExecutor
java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:763) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.load(SSTableReader.java:737) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:517) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader.open(SSTableReader.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableReader$3.run(SSTableReader.java:564) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
(...)
ERROR [main] 2018-05-21 13:42:51,336 CassandraDaemon.java:708 - Exception encountered during startup
java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.utils.FBUtilities.waitOnFuture(FBUtilities.java:385) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.SystemKeyspace.forceBlockingFlush(SystemKeyspace.java:819) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.SystemKeyspace.removeTruncationRecord(SystemKeyspace.java:670) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.ColumnFamilyStore.invalidate(ColumnFamilyStore.java:553) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.ColumnFamilyStore.invalidate(ColumnFamilyStore.java:529) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.schema.LegacySchemaMigrator.lambda$unloadLegacySchemaTables$1(LegacySchemaMigrator.java:137) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Iterable.forEach(Iterable.java:75) ~[na:1.8.0_60]
at org.apache.cassandra.schema.LegacySchemaMigrator.unloadLegacySchemaTables(LegacySchemaMigrator.java:137) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.schema.LegacySchemaMigrator.migrate(LegacySchemaMigrator.java:83) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.service.CassandraDaemon.setup(CassandraDaemon.java:256) [apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.service.CassandraDaemon.activate(CassandraDaemon.java:602) [apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.service.CassandraDaemon.main(CassandraDaemon.java:691) [apache-cassandra-3.11.2.jar:3.11.2]
Caused by: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at java.util.concurrent.FutureTask.report(FutureTask.java:122) ~[na:1.8.0_60]
at java.util.concurrent.FutureTask.get(FutureTask.java:192) ~[na:1.8.0_60]
at org.apache.cassandra.utils.FBUtilities.waitOnFuture(FBUtilities.java:381) ~[apache-cassandra-3.11.2.jar:3.11.2]
... 11 common frames omitted
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.cassandra.cache.ChunkCache
at org.apache.cassandra.io.sstable.format.big.BigTableWriter.<init>(BigTableWriter.java:64) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.big.BigFormat$WriterFactory.open(BigFormat.java:92) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.format.SSTableWriter.create(SSTableWriter.java:102) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.io.sstable.SimpleSSTableMultiWriter.create(SimpleSSTableMultiWriter.java:119) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.compaction.AbstractCompactionStrategy.createSSTableMultiWriter(AbstractCompactionStrategy.java:587) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.compaction.CompactionStrategyManager.createSSTableMultiWriter(CompactionStrategyManager.java:1027) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.ColumnFamilyStore.createSSTableMultiWriter(ColumnFamilyStore.java:518) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.Memtable$FlushRunnable.createFlushWriter(Memtable.java:504) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.Memtable$FlushRunnable.<init>(Memtable.java:443) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.Memtable$FlushRunnable.<init>(Memtable.java:420) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.Memtable.createFlushRunnables(Memtable.java:307) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.Memtable.flushRunnables(Memtable.java:298) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.ColumnFamilyStore$Flush.flushMemtable(ColumnFamilyStore.java:1140) ~[apache-cassandra-3.11.2.jar:3.11.2]
at org.apache.cassandra.db.ColumnFamilyStore$Flush.run(ColumnFamilyStore.java:1105) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_60]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[na:1.8.0_60]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) ~[apache-cassandra-3.11.2.jar:3.11.2]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_60]
Is this issue caused by an old version of JDK or rather by Cassandra? How can I address it?
After Windows reboot, these errors no longer occur.

Spring boot - Spark Application: Unable to process a file on its nodes

I have the below setup:
Spark Master and Slaves configured and running in my local.
17/11/01 18:03:52 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
17/11/01 18:03:52 INFO Master: Starting Spark master at spark://127.0.0.1:7077
17/11/01 18:03:52 INFO Master: Running Spark version 2.2.0
17/11/01 18:03:52 INFO Utils: Successfully started service 'MasterUI' on port 8080.
I have a spring boot application whose properties file contents look like the below:
spark.home=/usr/local/Cellar/apache-spark/2.2.0/bin/
master.uri=spark://127.0.0.1:7077
#Autowired
SparkConf sparkConf;
public void processFile(String inputFile, String outputFile) {
JavaSparkContext javaSparkContext;
SparkContext sc = new SparkContext(sparkConf);
SerializationWrapper sw= new SerializationWrapper() {
private static final long serialVersionUID = 1L;
#Override
public JavaSparkContext createJavaSparkContext() {
// TODO Auto-generated method stub
return JavaSparkContext.fromSparkContext(sc);
}
};
javaSparkContext=sw.createJavaSparkContext();
JavaRDD<String> lines = javaSparkContext.textFile(inputFile);
Broadcast<JavaRDD<String>> outputLines;
outputLines = javaSparkContext.broadcast(lines.map(new Function<String, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
#Override
public String call(String arg0) throws Exception {
// TODO Auto-generated method stub
return arg0;
}
}));
outputLines.getValue().saveAsTextFile(outputFile);
//javaSparkContext.close();
}
When i run the code I'm getting the below error:
17/11/01 18:16:36 INFO TorrentBroadcast: Started reading broadcast variable 2
17/11/01 18:16:36 INFO TransportClientFactory: Successfully created connection to /192.168.0.135:51903 after 1 ms (0 ms spent in bootstraps)
17/11/01 18:16:36 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 24.4 KB, free 366.3 MB)
17/11/01 18:16:36 INFO TorrentBroadcast: Reading broadcast variable 2 took 82 ms
17/11/01 18:16:36 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 67.2 KB, free 366.2 MB)
17/11/01 18:16:36 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1)
java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD
at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2133)
at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1305)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2251)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
17/11/01 18:16:36 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
The springboot-spark app should process the files based on REST API call where i get the input and output file location shared across the Spark nodes.
Any suggestions to fix the above errors
I think you should not broadcast a JavaRDD, since a RDD is already distributed among your cluster nodes.

ERROR TaskSchedulerImpl: Exception in statusUpdate

I ran a python code on Spark using Mllib. It works fine with small datasets, but I'm getting the following error after two iterations for large datasets:
ERROR TaskSchedulerImpl: Exception in statusUpdate
java.util.concurrent.RejectedExecutionException: Task org.apache.spark.scheduler.TaskResultGetter$$anon$2#15b59543 rejected from java.util.concurrent.ThreadPoolExecutor#22427929[Terminated, pool size = 0, active threads = 0, queued tasks = 0, completed tasks = 2701]
at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2050)
at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:821)
at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1372)
at org.apache.spark.scheduler.TaskResultGetter.enqueueSuccessfulTask(TaskResultGetter.scala:49)
at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$liftedTree2$1$1.apply(TaskSchedulerImpl.scala:327)
at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$liftedTree2$1$1.apply(TaskSchedulerImpl.scala:324)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.TaskSchedulerImpl.liftedTree2$1(TaskSchedulerImpl.scala:324)
at org.apache.spark.scheduler.TaskSchedulerImpl.statusUpdate(TaskSchedulerImpl.scala:309)
at org.apache.spark.scheduler.local.LocalEndpoint$$anonfun$receive$1.applyOrElse(LocalBackend.scala:61)
at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$processMessage(AkkaRpcEnv.scala:178)
at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1$$anonfun$applyOrElse$4.apply$mcV$sp(AkkaRpcEnv.scala:127)
at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$safelyCall(AkkaRpcEnv.scala:198)
at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1.applyOrElse(AkkaRpcEnv.scala:126)
at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:59)
at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:42)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118)
at org.apache.spark.util.ActorLogReceive$$anon$1.applyOrElse(ActorLogReceive.scala:42)
at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1.aroundReceive(AkkaRpcEnv.scala:93)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
at akka.dispatch.Mailbox.run(Mailbox.scala:220)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Do you have any idea about it?

Resources