Error when starting spark-shell local on Mac - apache-spark

I get the following error when I try to start spark-shell locally on Mac. Because the error occurs while sc is being created, sc is not available afterwards.
[ender#koders:data] $ spark-shell
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel).
16/12/29 13:57:06 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
16/12/29 13:57:12 ERROR SparkContext: Error initializing SparkContext.
java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.executor.Executor.addReplClassLoaderIfNeeded(Executor.scala:453)
at org.apache.spark.executor.Executor.<init>(Executor.scala:99)
at org.apache.spark.scheduler.local.LocalEndpoint.<init>(LocalSchedulerBackend.scala:59)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.start(LocalSchedulerBackend.scala:126)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:149)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:497)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2275)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
at $line3.$read$$iw$$iw.<init>(<console>:15)
at $line3.$read$$iw.<init>(<console>:31)
at $line3.$read.<init>(<console>:33)
at $line3.$read$.<init>(<console>:37)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<console>:7)
at $line3.$eval$.$print(<console>:6)
at $line3.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply$mcV$sp(SparkILoop.scala:38)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:37)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:37)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:214)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:37)
at org.apache.spark.repl.SparkILoop.loadFiles(SparkILoop.scala:94)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:920)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97)
at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909)
at org.apache.spark.repl.Main$.doMain(Main.scala:68)
at org.apache.spark.repl.Main$.main(Main.scala:51)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.net.URISyntaxException: Malformed escape pair at index 38: spark://fe80:0:0:0:44cd:86ff:fe5b:cd90%7:61707/classes
at java.net.URI$Parser.fail(URI.java:2848)
at java.net.URI$Parser.scanEscape(URI.java:2978)
at java.net.URI$Parser.scan(URI.java:3001)
at java.net.URI$Parser.parseAuthority(URI.java:3142)
at java.net.URI$Parser.parseHierarchical(URI.java:3097)
at java.net.URI$Parser.parse(URI.java:3053)
at java.net.URI.<init>(URI.java:588)
at org.apache.spark.repl.ExecutorClassLoader.<init>(ExecutorClassLoader.scala:48)
... 63 more
16/12/29 13:57:12 ERROR Utils: Uncaught exception in thread main
java.lang.NullPointerException
at org.apache.spark.scheduler.local.LocalSchedulerBackend.org$apache$spark$scheduler$local$LocalSchedulerBackend$$stop(LocalSchedulerBackend.scala:158)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.stop(LocalSchedulerBackend.scala:137)
at org.apache.spark.scheduler.TaskSchedulerImpl.stop(TaskSchedulerImpl.scala:455)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:1605)
at org.apache.spark.SparkContext$$anonfun$stop$8.apply$mcV$sp(SparkContext.scala:1798)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1287)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1797)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:565)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2275)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
at $line3.$read$$iw$$iw.<init>(<console>:15)
at $line3.$read$$iw.<init>(<console>:31)
at $line3.$read.<init>(<console>:33)
at $line3.$read$.<init>(<console>:37)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<console>:7)
at $line3.$eval$.$print(<console>:6)
at $line3.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply$mcV$sp(SparkILoop.scala:38)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:37)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:37)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:214)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:37)
at org.apache.spark.repl.SparkILoop.loadFiles(SparkILoop.scala:94)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:920)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97)
at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909)
at org.apache.spark.repl.Main$.doMain(Main.scala:68)
at org.apache.spark.repl.Main$.main(Main.scala:51)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
16/12/29 13:57:12 WARN MetricsSystem: Stopping a MetricsSystem that is not running
java.net.URISyntaxException: Malformed escape pair at index 38: spark://fe80:0:0:0:44cd:86ff:fe5b:cd90%7:61707/classes
at java.net.URI$Parser.fail(URI.java:2848)
at java.net.URI$Parser.scanEscape(URI.java:2978)
at java.net.URI$Parser.scan(URI.java:3001)
at java.net.URI$Parser.parseAuthority(URI.java:3142)
at java.net.URI$Parser.parseHierarchical(URI.java:3097)
at java.net.URI$Parser.parse(URI.java:3053)
at java.net.URI.<init>(URI.java:588)
at org.apache.spark.repl.ExecutorClassLoader.<init>(ExecutorClassLoader.scala:48)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.executor.Executor.addReplClassLoaderIfNeeded(Executor.scala:453)
at org.apache.spark.executor.Executor.<init>(Executor.scala:99)
at org.apache.spark.scheduler.local.LocalEndpoint.<init>(LocalSchedulerBackend.scala:59)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.start(LocalSchedulerBackend.scala:126)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:149)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:497)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2275)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
... 47 elided
<console>:14: error: not found: value spark
import spark.implicits._
^
<console>:14: error: not found: value spark
import spark.sql
^
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.0.1
/_/
Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_101)
Type in expressions to have them evaluated.
Type :help for more information.
scala> sc
<console>:18: error: not found: value sc
sc
^

After searching and investigating the logs, I suspected that it's because my hostname could not be found in /etc/hosts. After changing the hostname from "koders.local" to "koders" which exists inside /etc/hosts, problem solved.

Related

spark-shell throws java.lang.reflect.InvocationTargetException on running

When I execute run-example SparkPi, for example, it works perfectly, but
when I run spark-shell, it throws these exceptions:
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/C:/big_data/spark-3.2.0-bin-hadoop3.2-scala2.13/jars/spark-unsafe_2.13-3.2.0.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 3.2.0
/_/
Using Scala version 2.13.5 (OpenJDK 64-Bit Server VM, Java 11.0.9.1)
Type in expressions to have them evaluated.
Type :help for more information.
21/12/11 19:28:36 ERROR SparkContext: Error initializing SparkContext.
java.lang.reflect.InvocationTargetException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
at org.apache.spark.executor.Executor.addReplClassLoaderIfNeeded(Executor.scala:909)
at org.apache.spark.executor.Executor.<init>(Executor.scala:160)
at org.apache.spark.scheduler.local.LocalEndpoint.<init>(LocalSchedulerBackend.scala:64)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.start(LocalSchedulerBackend.scala:132)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:220)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:581)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2690)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:949)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:943)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:114)
at $line3.$read$$iw.<init>(<console>:5)
at $line3.$read.<init>(<console>:4)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<synthetic>:6)
at $line3.$eval$.$print(<synthetic>:5)
at $line3.$eval.$print(<synthetic>)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:670)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1006)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$1(IMain.scala:506)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:43)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:505)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$3(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.doInterpret(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:503)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:501)
at scala.tools.nsc.interpreter.IMain.$anonfun$quietRun$1(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.IMain.quietRun(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$interpretPreamble$1(ILoop.scala:924)
at scala.collection.immutable.List.foreach(List.scala:333)
at scala.tools.nsc.interpreter.shell.ILoop.interpretPreamble(ILoop.scala:924)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$3(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ILoop.echoOff(ILoop.scala:90)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$2(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.IMain.withSuppressedSettings(IMain.scala:1406)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$1(ILoop.scala:954)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.shell.ILoop.run(ILoop.scala:954)
at org.apache.spark.repl.Main$.doMain(Main.scala:84)
at org.apache.spark.repl.Main$.main(Main.scala:59)
at org.apache.spark.repl.Main.main(Main.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.net.URISyntaxException: Illegal character in path at index 42: spark://DESKTOP-JO73CF4.mshome.net:2103/C:\classes
at java.base/java.net.URI$Parser.fail(URI.java:2913)
at java.base/java.net.URI$Parser.checkChars(URI.java:3084)
at java.base/java.net.URI$Parser.parseHierarchical(URI.java:3166)
at java.base/java.net.URI$Parser.parse(URI.java:3114)
at java.base/java.net.URI.<init>(URI.java:600)
at org.apache.spark.repl.ExecutorClassLoader.<init>(ExecutorClassLoader.scala:57)
... 67 more
21/12/11 19:28:36 ERROR Utils: Uncaught exception in thread main
java.lang.NullPointerException
at org.apache.spark.scheduler.local.LocalSchedulerBackend.org$apache$spark$scheduler$local$LocalSchedulerBackend$$stop(LocalSchedulerBackend.scala:173)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.stop(LocalSchedulerBackend.scala:144)
at org.apache.spark.scheduler.TaskSchedulerImpl.stop(TaskSchedulerImpl.scala:927)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:2516)
at org.apache.spark.SparkContext.$anonfun$stop$12(SparkContext.scala:2086)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1442)
at org.apache.spark.SparkContext.stop(SparkContext.scala:2086)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:677)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2690)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:949)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:943)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:114)
at $line3.$read$$iw.<init>(<console>:5)
at $line3.$read.<init>(<console>:4)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<synthetic>:6)
at $line3.$eval$.$print(<synthetic>:5)
at $line3.$eval.$print(<synthetic>)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:670)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1006)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$1(IMain.scala:506)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:43)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:505)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$3(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.doInterpret(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:503)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:501)
at scala.tools.nsc.interpreter.IMain.$anonfun$quietRun$1(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.IMain.quietRun(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$interpretPreamble$1(ILoop.scala:924)
at scala.collection.immutable.List.foreach(List.scala:333)
at scala.tools.nsc.interpreter.shell.ILoop.interpretPreamble(ILoop.scala:924)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$3(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ILoop.echoOff(ILoop.scala:90)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$2(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.IMain.withSuppressedSettings(IMain.scala:1406)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$1(ILoop.scala:954)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.shell.ILoop.run(ILoop.scala:954)
at org.apache.spark.repl.Main$.doMain(Main.scala:84)
at org.apache.spark.repl.Main$.main(Main.scala:59)
at org.apache.spark.repl.Main.main(Main.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
21/12/11 19:28:36 WARN MetricsSystem: Stopping a MetricsSystem that is not running
21/12/11 19:28:36 ERROR Main: Failed to initialize Spark session.
java.lang.reflect.InvocationTargetException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
at org.apache.spark.executor.Executor.addReplClassLoaderIfNeeded(Executor.scala:909)
at org.apache.spark.executor.Executor.<init>(Executor.scala:160)
at org.apache.spark.scheduler.local.LocalEndpoint.<init>(LocalSchedulerBackend.scala:64)
at org.apache.spark.scheduler.local.LocalSchedulerBackend.start(LocalSchedulerBackend.scala:132)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:220)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:581)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2690)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:949)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:943)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:114)
at $line3.$read$$iw.<init>(<console>:5)
at $line3.$read.<init>(<console>:4)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<synthetic>:6)
at $line3.$eval$.$print(<synthetic>:5)
at $line3.$eval.$print(<synthetic>)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:670)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1006)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$1(IMain.scala:506)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:43)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:505)
at scala.tools.nsc.interpreter.IMain.$anonfun$doInterpret$3(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.doInterpret(IMain.scala:519)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:503)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:501)
at scala.tools.nsc.interpreter.IMain.$anonfun$quietRun$1(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.IMain.quietRun(IMain.scala:216)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$interpretPreamble$1(ILoop.scala:924)
at scala.collection.immutable.List.foreach(List.scala:333)
at scala.tools.nsc.interpreter.shell.ILoop.interpretPreamble(ILoop.scala:924)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$3(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ILoop.echoOff(ILoop.scala:90)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$2(ILoop.scala:963)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.IMain.withSuppressedSettings(IMain.scala:1406)
at scala.tools.nsc.interpreter.shell.ILoop.$anonfun$run$1(ILoop.scala:954)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.tools.nsc.interpreter.shell.ReplReporterImpl.withoutPrintingResults(Reporter.scala:64)
at scala.tools.nsc.interpreter.shell.ILoop.run(ILoop.scala:954)
at org.apache.spark.repl.Main$.doMain(Main.scala:84)
at org.apache.spark.repl.Main$.main(Main.scala:59)
at org.apache.spark.repl.Main.main(Main.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.net.URISyntaxException: Illegal character in path at index 42: spark://DESKTOP-JO73CF4.mshome.net:2103/C:\classes
at java.base/java.net.URI$Parser.fail(URI.java:2913)
at java.base/java.net.URI$Parser.checkChars(URI.java:3084)
at java.base/java.net.URI$Parser.parseHierarchical(URI.java:3166)
at java.base/java.net.URI$Parser.parse(URI.java:3114)
at java.base/java.net.URI.<init>(URI.java:600)
at org.apache.spark.repl.ExecutorClassLoader.<init>(ExecutorClassLoader.scala:57)
... 67 more
21/12/11 19:28:36 ERROR Utils: Uncaught exception in thread shutdown-hook-0
java.lang.ExceptionInInitializerError
at org.apache.spark.executor.Executor.stop(Executor.scala:333)
at org.apache.spark.executor.Executor.$anonfun$stopHookReference$1(Executor.scala:76)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214)
at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2019)
at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.util.Try$.apply(Try.scala:210)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.NullPointerException
at org.apache.spark.shuffle.ShuffleBlockPusher$.<clinit>(ShuffleBlockPusher.scala:465)
... 16 more
21/12/11 19:28:36 WARN ShutdownHookManager: ShutdownHook '' failed, java.util.concurrent.ExecutionException: java.lang.ExceptionInInitializerError
java.util.concurrent.ExecutionException: java.lang.ExceptionInInitializerError
at java.base/java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.base/java.util.concurrent.FutureTask.get(FutureTask.java:205)
at org.apache.hadoop.util.ShutdownHookManager.executeShutdown(ShutdownHookManager.java:124)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:95)
Caused by: java.lang.ExceptionInInitializerError
at org.apache.spark.executor.Executor.stop(Executor.scala:333)
at org.apache.spark.executor.Executor.$anonfun$stopHookReference$1(Executor.scala:76)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214)
at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2019)
at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.util.Try$.apply(Try.scala:210)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.NullPointerException
at org.apache.spark.shuffle.ShuffleBlockPusher$.<clinit>(ShuffleBlockPusher.scala:465)
... 16 more
As I can see it caused by Illegal character in path at index 42: spark://DESKTOP-JO73CF4.mshome.net:2103/C:\classes, but I don't understand what does it mean exactly and how to deal with that
How can I solve this problem?
I use Spark 3.2.0 Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)
JAVA_HOME, HADOOP_HOME, SPARK_HOME path variables are set.
i face the same problem, i think Spark 3.2 is the problem itself
switched to Spark 3.1.2, it works fine
I've faced the same issue. as of April 2022, I've used spark-3.1.3-bin-hadoop3.2 with winutils.exe (hadoop-3.2.0). It worked

spark giving requirement failed: Literal must have a corresponding value to string, but class String found

I have a spark 2.4.6 with dataframe write as
df
.select((struct(df.columns.map(column): _*)).alias("value"))
.write
.format("kafka")
.options(........)
.save
But after making a jar and doing spark submit in version 3.0.2 it gives:
Exception in thread "main" java.lang.IllegalArgumentException: requirement failed: Literal must have a corresponding value to string, but class String found.
at scala.Predef$.require(Predef.scala:281)
at org.apache.spark.sql.catalyst.expressions.Literal$.validateLiteralValue(literals.scala:215)
at org.apache.spark.sql.catalyst.expressions.Literal.<init>(literals.scala:292)
at org.apache.spark.sql.kafka010.KafkaWriter$.$anonfun$validateQuery$2(KafkaWriter.scala:55)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.kafka010.KafkaWriter$.validateQuery(KafkaWriter.scala:50)
at org.apache.spark.sql.kafka010.KafkaWriter$.write(KafkaWriter.scala:86)
at org.apache.spark.sql.kafka010.KafkaSourceProvider.createRelation(KafkaSourceProvider.scala:255)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:126)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:962)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:962)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:414)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:398)
at part4integrations.IntegratingKafka$.nimbleWrite(IntegratingKafka.scala:145)
at part4integrations.IntegratingKafka$.main(IntegratingKafka.scala:154)
at part4integrations.IntegratingKafka.main(IntegratingKafka.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
On checking the functions
we have that validation function in 3.0.2
https://github.com/apache/spark/blob/648457905c4ea7d00e3d88048c63f360045f0714/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala#L292
but not in the previous version
https://github.com/apache/spark/blob/v2.4.6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
Is there any resolution to this without upgrading the spark version?
as rebuilding the jar with version 3.0.2 works fine.

exception thrown on worker gets "Task exception could not be deserialized" ClassNotFoundException error

When an exception defined in my application is thrown on a worker, for example in a map:
sc.parallelize(0 until 10000).map(x =>
if (x == 7938)
throw new FatalException("oops")
else
x).sum()
I get a error of the form:
16/04/06 15:07:55 WARN ThrowableSerializationWrapper: Task exception could not be deserialized
java.lang.ClassNotFoundException: $line15.$read$$iwC$$iwC$FatalException
which is propagated to the driver as a SparkException: ReasonUnknown:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost): UnknownReason
This is a serious problem since I can't diagnose errors in the driver or report them meaningfully to the application user.
If I throw a SparkException, on the other hand, I get a nice:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 1.0 failed 1 times, most recent failure: Lost task 6.0 in stage 1.0 (TID 14, localhost): org.apache.spark.SparkException: oops
I'm guess the class loader deserializing the propagated exception isn't seeing my application jar (or in this case, the class defined in spark-shell), but I haven't been able to find a workaround. Any ideas?
I'm seeing this in an application run through spark-submit, but here's a full example through spark-shell:
$ /usr/local/spark-1.5.2-bin-hadoop2.6/bin/spark-shell
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Using Spark's repl log4j profile: org/apache/spark/log4j-defaults-repl.properties
To adjust logging level use sc.setLogLevel("INFO")
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.5.2
/_/
Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_45)
Type in expressions to have them evaluated.
Type :help for more information.
16/04/06 15:07:38 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set.
Spark context available as sc.
16/04/06 15:07:39 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
16/04/06 15:07:39 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
16/04/06 15:07:41 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
16/04/06 15:07:41 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
16/04/06 15:07:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
16/04/06 15:07:42 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
16/04/06 15:07:42 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
SQL context available as sqlContext.
scala> class FatalException(msg: String) extends RuntimeException(msg)
defined class FatalException
scala> sc.parallelize(0 until 10000).map(x => if (x == 7938) throw new FatalException("oops") else x).sum()
[Stage 0:> (0 + 0) / 8]16/04/06 15:07:55 ERROR Executor: Exception in task 6.0 in stage 0.0 (TID 6)
$line15.$read$$iwC$$iwC$FatalException: oops
at $line19.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply$mcII$sp(<console>:23)
at $line19.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:23)
at $line19.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:23)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.fold(TraversableOnce.scala:199)
at scala.collection.AbstractIterator.fold(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
16/04/06 15:07:55 WARN ThrowableSerializationWrapper: Task exception could not be deserialized
java.lang.ClassNotFoundException: $line15.$read$$iwC$$iwC$FatalException
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
at org.apache.spark.ThrowableSerializationWrapper.readObject(TaskEndReason.scala:167)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1017)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1896)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:72)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:98)
at org.apache.spark.scheduler.TaskResultGetter$$anon$3$$anonfun$run$2.apply$mcV$sp(TaskResultGetter.scala:108)
at org.apache.spark.scheduler.TaskResultGetter$$anon$3$$anonfun$run$2.apply(TaskResultGetter.scala:105)
at org.apache.spark.scheduler.TaskResultGetter$$anon$3$$anonfun$run$2.apply(TaskResultGetter.scala:105)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1699)
at org.apache.spark.scheduler.TaskResultGetter$$anon$3.run(TaskResultGetter.scala:105)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
16/04/06 15:07:55 ERROR TaskResultGetter: Could not deserialize TaskEndReason: ClassNotFound with classloader org.apache.spark.repl.SparkIMain$TranslatingClassLoader#4538856f
16/04/06 15:07:55 WARN TaskSetManager: Lost task 6.0 in stage 0.0 (TID 6, localhost): UnknownReason
16/04/06 15:07:55 ERROR TaskSetManager: Task 6 in stage 0.0 failed 1 times; aborting job
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost): UnknownReason
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1944)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1063)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:310)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1057)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:310)
at org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:33)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:23)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:34)
at $iwC$$iwC$$iwC.<init>(<console>:36)
at $iwC$$iwC.<init>(<console>:38)
at $iwC.<init>(<console>:40)
at <init>(<console>:42)
at .<init>(<console>:46)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
scala> import org.apache.spark._
import org.apache.spark._
scala> sc.parallelize(0 until 10000).map(x => if (x == 7938) throw new SparkException("oops") else x).sum()
16/04/06 15:08:05 ERROR Executor: Exception in task 6.0 in stage 1.0 (TID 14)
org.apache.spark.SparkException: oops
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply$mcII$sp(<console>:25)
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.fold(TraversableOnce.scala:199)
at scala.collection.AbstractIterator.fold(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
16/04/06 15:08:05 WARN TaskSetManager: Lost task 6.0 in stage 1.0 (TID 14, localhost): org.apache.spark.SparkException: oops
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply$mcII$sp(<console>:25)
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at $line23.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.fold(TraversableOnce.scala:199)
at scala.collection.AbstractIterator.fold(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
16/04/06 15:08:05 ERROR TaskSetManager: Task 6 in stage 1.0 failed 1 times; aborting job
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 1.0 failed 1 times, most recent failure: Lost task 6.0 in stage 1.0 (TID 14, localhost): org.apache.spark.SparkException: oops
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply$mcII$sp(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.fold(TraversableOnce.scala:199)
at scala.collection.AbstractIterator.fold(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1944)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1063)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:310)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1057)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:310)
at org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:33)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:34)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:36)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:40)
at $iwC$$iwC$$iwC.<init>(<console>:42)
at $iwC$$iwC.<init>(<console>:44)
at $iwC.<init>(<console>:46)
at <init>(<console>:48)
at .<init>(<console>:52)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.SparkException: oops
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply$mcII$sp(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:25)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.fold(TraversableOnce.scala:199)
at scala.collection.AbstractIterator.fold(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.rdd.RDD$$anonfun$fold$1$$anonfun$19.apply(RDD.scala:1061)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.SparkContext$$anonfun$36.apply(SparkContext.scala:1943)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
scala>
Any ideas?
edit 1:
The relevant code in Spark is in TaskResultGetter.scala:
try {
if (serializedData != null && serializedData.limit() > 0) {
reason = serializer.get().deserialize[TaskEndReason](
serializedData, Utils.getSparkClassLoader)
}
} catch {
case cnd: ClassNotFoundException =>
// Log an error but keep going here -- the task failed, so not catastrophic
// if we can't deserialize the reason.
val loader = Utils.getContextOrSparkClassLoader
logError(
"Could not deserialize TaskEndReason: ClassNotFound with classloader " + loader)
case ex: Exception => {}
}
which calls spark.util.Utils.getSparkClassLoader:
def getSparkClassLoader: ClassLoader = getClass.getClassLoader
which is using the class loader which loaded the Spark jar.
This is a known issue: SPARK-11195. You need to upgrade your Spark to 1.6.0 or 1.6.1.

titan-1.0.0-hadoop1 compatibilty issues with apache-cassandra-3.2

I am trying to connect titan-1.0.0-hadoop1 with apache-cassandra-3.2.
I am using following " titan-cassandra-myfile.properties ":
storage.cassandra-config-dir=titan-1.0.0- hadoop1/conf/cassandra/cassandra.yaml
storage.backend=cassandra
storage.hostname=localhost
When I use gremlin to execute following command:
g=TitanFactory.open("/path to titan/ titan-1.0.0-hadoop1/conf/titan-cassandra-myfile.properties");
I am getting following error:
java.lang.IllegalArgumentException: Could not instantiate implementation: com.thinkaurelius.titan.diskstorage.cassandra.astyanax.AstyanaxStoreManager
at com.thinkaurelius.titan.util.system.ConfigurationUtil.instantiate(ConfigurationUtil.java:55)
at com.thinkaurelius.titan.diskstorage.Backend.getImplementationClass(Backend.java:473)
at com.thinkaurelius.titan.diskstorage.Backend.getStorageManager(Backend.java:407)
at com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration.(GraphDatabaseConfiguration.java:1320)
at com.thinkaurelius.titan.core.TitanFactory.open(TitanFactory.java:94)
at com.thinkaurelius.titan.core.TitanFactory.open(TitanFactory.java:62)
at com.thinkaurelius.titan.core.TitanFactory$open.call(Unknown Source)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCall(CallSiteArray.java:45)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:110)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:122)
at groovysh_evaluate.run(groovysh_evaluate:3)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.codehaus.groovy.tools.shell.Interpreter.evaluate(Interpreter.groovy:69)
at org.codehaus.groovy.tools.shell.Groovysh.execute(Groovysh.groovy:185)
at org.codehaus.groovy.tools.shell.Shell.leftShift(Shell.groovy:119)
at org.codehaus.groovy.tools.shell.ShellRunner.work(ShellRunner.groovy:94)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.super$2$work(InteractiveShellRunner.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:324)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1207)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:130)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:150)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.work(InteractiveShellRunner.groovy:123)
at org.codehaus.groovy.tools.shell.ShellRunner.run(ShellRunner.groovy:58)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.super$2$run(InteractiveShellRunner.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:324)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1207)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:130)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:150)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.run(InteractiveShellRunner.groovy:82)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.apache.tinkerpop.gremlin.console.Console.(Console.groovy:144)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.apache.tinkerpop.gremlin.console.Console.main(Console.groovy:303)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at com.thinkaurelius.titan.util.system.ConfigurationUtil.instantiate(ConfigurationUtil.java:44)
... 42 more
Caused by: com.thinkaurelius.titan.diskstorage.TemporaryBackendException: Temporary failure in storage backend
at com.thinkaurelius.titan.diskstorage.cassandra.astyanax.AstyanaxStoreManager.ensureKeyspaceExists(AstyanaxStoreManager.java:572)
at com.thinkaurelius.titan.diskstorage.cassandra.astyanax.AstyanaxStoreManager.(AstyanaxStoreManager.java:291)
... 47 more
Caused by: com.netflix.astyanax.connectionpool.exceptions.PoolTimeoutException: PoolTimeoutException: [host=localhost(127.0.0.1):9160, latency=10000(10000), attempts=1]Timed out waiting for connection
at com.netflix.astyanax.connectionpool.impl.SimpleHostConnectionPool.waitForConnection(SimpleHostConnectionPool.java:231)
at com.netflix.astyanax.connectionpool.impl.SimpleHostConnectionPool.borrowConnection(SimpleHostConnectionPool.java:198)
at com.netflix.astyanax.connectionpool.impl.RoundRobinExecuteWithFailover.borrowConnection(RoundRobinExecuteWithFailover.java:84)
at com.netflix.astyanax.connectionpool.impl.AbstractExecuteWithFailoverImpl.tryOperation(AbstractExecuteWithFailoverImpl.java:117)
at com.netflix.astyanax.connectionpool.impl.AbstractHostPartitionConnectionPool.executeWithFailover(AbstractHostPartitionConnectionPool.java:352)
at com.netflix.astyanax.thrift.ThriftClusterImpl.executeSchemaChangeOperation(ThriftClusterImpl.java:146)
at com.netflix.astyanax.thrift.ThriftClusterImpl.internalCreateKeyspace(ThriftClusterImpl.java:321)
at com.netflix.astyanax.thrift.ThriftClusterImpl.addKeyspace(ThriftClusterImpl.java:294)
at com.thinkaurelius.titan.diskstorage.cassandra.astyanax.AstyanaxStoreManager.ensureKeyspaceExists(AstyanaxStoreManager.java:567)
... 48 more
```
When I use same configurations with apache-cassandra-1.2.5 and titan-all-0.3.1 , the above config works and it creates a graph.
Please suggest some solution on this , whether is this a compatibility issue or I am doing something wrong.
Thanks,
Laxmikant
Titan 1.0 has been released using Cassandra 2.1.9. I'm almost sure that it's not compatible with Cassandra 3.x because there was a storage engine re-write
See here: https://github.com/thinkaurelius/titan/blob/titan10/pom.xml#L65

Error when writing a repartitioned SchemaRDD to Parquet with Spark SQL

I am trying to write save Spark SQL tables to Parquet files. Because of other issues I need to reduce the number of partitions before writing. My code is
data.coalesce(1000,shuffle=true).saveAsParquetFile("s3n://...")
This throws
java.lang.NullPointerException
at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:927)
at org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
at org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
at org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
at org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
at org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
at org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1128)
at org.apache.spark.sql.parquet.InsertIntoParquetTable.saveAsHadoopFile(ParquetTableOperations.scala:318)
at org.apache.spark.sql.parquet.InsertIntoParquetTable.execute(ParquetTableOperations.scala:246)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:409)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:409)
at org.apache.spark.sql.SchemaRDDLike$class.saveAsParquetFile(SchemaRDDLike.scala:77)
at org.apache.spark.sql.SchemaRDD.saveAsParquetFile(SchemaRDD.scala:103)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:22)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:27)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:29)
at $iwC$$iwC$$iwC.<init>(<console>:31)
at $iwC$$iwC.<init>(<console>:33)
at $iwC.<init>(<console>:35)
at <init>(<console>:37)
at .<init>(<console>:41)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:859)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:771)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:616)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:624)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:629)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:954)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:997)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
The code works fine if I take out the coalesce step and changing the code to use shuffle=true or using repartition throws the same error. I am using spark-1.1.0.

Resources