Spark 2.3.0 giving error : Provider org.apache.spark.ml.source.libsvm.LibSVMFileFormat not a subtype - apache-spark

I am running Spark 2.3.0, and trying to load dataset from MySql (MariaDB)
using : mysql-connector-java : version 6.0.6
val dataSet:Dataset[Row] = spark
.read
.format("jdbc")
.options(Map("url" -> jdbcUrl
,"user" -> username
,"password" -> password
,"dbtable" -> dataSourceTableName
,"driver" -> driver
))
.load()
When I am running using spark-submit .. .. , I am getting the following error
Exception in thread "main" java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.ml.source.libsvm.LibSVMFileFormat not a subtype
at java.util.ServiceLoader.fail(ServiceLoader.java:239)
at java.util.ServiceLoader.access$300(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:376)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43)
at scala.collection.Iterator$class.foreach(Iterator.scala:742)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.filterImpl(TraversableLike.scala:258)
at scala.collection.TraversableLike$class.filter(TraversableLike.scala:270)
at scala.collection.AbstractTraversable.filter(Traversable.scala:104)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:614)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:190)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at analytics.utils.QuantumDataSets$.loadDataSet(QuantumDataSets.scala:93)
at analytics.utils.QuantumDataSets$.<init>(QuantumDataSets.scala:50)
at analytics.utils.QuantumDataSets$.<clinit>(QuantumDataSets.scala)
at analytics.utils.QuantumDataSets.main(QuantumDataSets.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:879)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:197)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:227)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:136)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Related

Failed to commit spark job out of CDH 6.2 servers

CDH 6.2 is deployed on 3 servers: cm01, cm02, cm03.
spark-shell --master yarn --deploy-mode client --conf spark.yarn.archive=hdfs:///share/spark-libs.jar works if I login with rd account and run command on any one of these servers.
Now I want to submit spark job from server dev01 with user rd. I copied some directories from CDH to dev01 and setup environment HADOOP_CONF_DIR and HADOOP_HOME. However, yarn always complains:
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
19/07/30 14:03:38 ERROR cluster.YarnClientSchedulerBackend: YARN application has exited unexpectedly with state FAILED! Check the YARN application logs for more details.
19/07/30 14:03:38 ERROR cluster.YarnClientSchedulerBackend: Diagnostics message: Uncaught exception: org.apache.spark.SparkException: Exception thrown in awaitResult:
at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:226)
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:101)
at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:109)
at org.apache.spark.deploy.yarn.ApplicationMaster.runExecutorLauncher(ApplicationMaster.scala:493)
at org.apache.spark.deploy.yarn.ApplicationMaster.run(ApplicationMaster.scala:277)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$3.run(ApplicationMaster.scala:799)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$3.run(ApplicationMaster.scala:798)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
at org.apache.spark.deploy.yarn.ApplicationMaster$.main(ApplicationMaster.scala:798)
at org.apache.spark.deploy.yarn.ExecutorLauncher$.main(ApplicationMaster.scala:830)
at org.apache.spark.deploy.yarn.ExecutorLauncher.main(ApplicationMaster.scala)
Caused by: java.io.IOException: Failed to connect to iZ8vb501or059rpllhr7i9Z:35937
at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:250)
at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:192)
at org.apache.spark.rpc.netty.NettyRpcEnv.createClient(NettyRpcEnv.scala:198)
at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox.scala:194)
at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox.scala:190)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.UnknownHostException: iZ8vb501or059rpllhr7i9Z
at java.net.InetAddress.getAllByName0(InetAddress.java:1280)
at java.net.InetAddress.getAllByName(InetAddress.java:1192)
at java.net.InetAddress.getAllByName(InetAddress.java:1126)
at java.net.InetAddress.getByName(InetAddress.java:1076)
at org.spark_project.io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:146)
at org.spark_project.io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:143)
at java.security.AccessController.doPrivileged(Native Method)
at org.spark_project.io.netty.util.internal.SocketUtils.addressByName(SocketUtils.java:143)
at org.spark_project.io.netty.resolver.DefaultNameResolver.doResolve(DefaultNameResolver.java:43)
at org.spark_project.io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:63)
at org.spark_project.io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:55)
at org.spark_project.io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:57)
at org.spark_project.io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:32)
at org.spark_project.io.netty.resolver.AbstractAddressResolver.resolve(AbstractAddressResolver.java:108)
at org.spark_project.io.netty.bootstrap.Bootstrap.doResolveAndConnect0(Bootstrap.java:208)
at org.spark_project.io.netty.bootstrap.Bootstrap.access$000(Bootstrap.java:49)
at org.spark_project.io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:188)
at org.spark_project.io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:174)
at org.spark_project.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:507)
at org.spark_project.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:481)
at org.spark_project.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:420)
at org.spark_project.io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:104)
at org.spark_project.io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:82)
at org.spark_project.io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetSuccess(AbstractChannel.java:978)
at org.spark_project.io.netty.channel.AbstractChannel$AbstractUnsafe.register0(AbstractChannel.java:512)
at org.spark_project.io.netty.channel.AbstractChannel$AbstractUnsafe.access$200(AbstractChannel.java:423)
at org.spark_project.io.netty.channel.AbstractChannel$AbstractUnsafe$1.run(AbstractChannel.java:482)
at org.spark_project.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
at org.spark_project.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:403)
at org.spark_project.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:463)
at org.spark_project.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
at org.spark_project.io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
... 1 more
19/07/30 14:03:38 WARN cluster.YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!
19/07/30 14:03:38 ERROR spark.SparkContext: Error initializing SparkContext.
java.lang.IllegalStateException: Spark context stopped while waiting for backend
at org.apache.spark.scheduler.TaskSchedulerImpl.waitBackendReady(TaskSchedulerImpl.scala:818)
at org.apache.spark.scheduler.TaskSchedulerImpl.postStartHook(TaskSchedulerImpl.scala:196)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:560)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:935)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:926)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:106)
at $line3.$read$$iw$$iw.<init>(<console>:15)
at $line3.$read$$iw.<init>(<console>:43)
at $line3.$read.<init>(<console>:45)
at $line3.$read$.<init>(<console>:49)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<console>:7)
at $line3.$eval$.$print(<console>:6)
at $line3.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
at scala.tools.nsc.interpreter.IMain$$anonfun$quietRun$1.apply(IMain.scala:231)
at scala.tools.nsc.interpreter.IMain$$anonfun$quietRun$1.apply(IMain.scala:231)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:221)
at scala.tools.nsc.interpreter.IMain.quietRun(IMain.scala:231)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:109)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply$mcV$sp(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:109)
at scala.tools.nsc.interpreter.ILoop.savingReplayStack(ILoop.scala:91)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:108)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply$mcV$sp(SparkILoop.scala:211)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply(SparkILoop.scala:199)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply(SparkILoop.scala:199)
at scala.tools.nsc.interpreter.ILoop$$anonfun$mumly$1.apply(ILoop.scala:189)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:221)
at scala.tools.nsc.interpreter.ILoop.mumly(ILoop.scala:186)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1(SparkILoop.scala:199)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$startup$1$1.apply(SparkILoop.scala:267)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$startup$1$1.apply(SparkILoop.scala:247)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.withSuppressedSettings$1(SparkILoop.scala:235)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.startup$1(SparkILoop.scala:247)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:282)
at org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)
at org.apache.spark.repl.Main$.doMain(Main.scala:78)
at org.apache.spark.repl.Main$.main(Main.scala:58)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
19/07/30 14:03:38 ERROR repl.Main: Failed to initialize Spark session.
java.lang.IllegalStateException: Spark context stopped while waiting for backend
at org.apache.spark.scheduler.TaskSchedulerImpl.waitBackendReady(TaskSchedulerImpl.scala:818)
at org.apache.spark.scheduler.TaskSchedulerImpl.postStartHook(TaskSchedulerImpl.scala:196)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:560)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:935)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:926)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at org.apache.spark.repl.Main$.createSparkSession(Main.scala:106)
at $line3.$read$$iw$$iw.<init>(<console>:15)
at $line3.$read$$iw.<init>(<console>:43)
at $line3.$read.<init>(<console>:45)
at $line3.$read$.<init>(<console>:49)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.$print$lzycompute(<console>:7)
at $line3.$eval$.$print(<console>:6)
at $line3.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
at scala.tools.nsc.interpreter.IMain$$anonfun$quietRun$1.apply(IMain.scala:231)
at scala.tools.nsc.interpreter.IMain$$anonfun$quietRun$1.apply(IMain.scala:231)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:221)
at scala.tools.nsc.interpreter.IMain.quietRun(IMain.scala:231)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:109)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply$mcV$sp(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:109)
at org.apache.spark.repl.SparkILoop$$anonfun$initializeSpark$1.apply(SparkILoop.scala:109)
at scala.tools.nsc.interpreter.ILoop.savingReplayStack(ILoop.scala:91)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:108)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply$mcV$sp(SparkILoop.scala:211)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply(SparkILoop.scala:199)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1$1.apply(SparkILoop.scala:199)
at scala.tools.nsc.interpreter.ILoop$$anonfun$mumly$1.apply(ILoop.scala:189)
at scala.tools.nsc.interpreter.IMain.beQuietDuring(IMain.scala:221)
at scala.tools.nsc.interpreter.ILoop.mumly(ILoop.scala:186)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.org$apache$spark$repl$SparkILoop$$anonfun$$loopPostInit$1(SparkILoop.scala:199)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$startup$1$1.apply(SparkILoop.scala:267)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$startup$1$1.apply(SparkILoop.scala:247)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.withSuppressedSettings$1(SparkILoop.scala:235)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.startup$1(SparkILoop.scala:247)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:282)
at org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)
at org.apache.spark.repl.Main$.doMain(Main.scala:78)
at org.apache.spark.repl.Main$.main(Main.scala:58)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
It seems RPC timeout exception. I have no clue to fix it. Please help. Thank you in advance.

Provider org.apache.spark.sql.hive.orc.DefaultSource could not be instantiated

I have a simple spark job that is reading data from Hive and some from db2, does some calculations and puts the results in db2. In the line of code where I try to read data from db2, I see the following error :
Exception in thread "main" java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.sql.hive.orc.DefaultSource could not be instantiated
at java.util.ServiceLoader.fail(ServiceLoader.java:232)
at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.filterImpl(TraversableLike.scala:247)
at scala.collection.TraversableLike$class.filter(TraversableLike.scala:259)
at scala.collection.AbstractTraversable.filter(Traversable.scala:104)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:533)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:89)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:89)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:304)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:146)
at com.ibm.sifs.trade.framework.persistence.TradePersistence.persistTradeSummary(TradePersistence.java:697)
at com.ibm.sifs.trade.summaries.IntraDaySummary.main(IntraDaySummary.java:136)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:782)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.VerifyError: Bad return type
Exception Details:
Location:
org/apache/spark/sql/hive/orc/DefaultSource.createRelation(Lorg/apache/
spark/sql/SQLContext;
[Ljava/lang/String;Lscala/Option;Lscala/Option;Lscala/collection/immutable
/Map;)Lorg/apache/spark/sql/sources/HadoopFsRelation; #35: areturn
Reason:
Type 'org/apache/spark/sql/hive/orc/OrcRelation' (current frame, stack[0]) is not assignable to 'org/apache/spark/sql/sources/HadoopFsRelation' (from method signature)
Current Frame:
bci: #35
flags: { }
locals: { 'org/apache/spark/sql/hive/orc/DefaultSource', 'org/apache/spark/sql/SQLContext', '[Ljava/lang/String;', 'scala/Option', 'scala/Option', 'scala/collection/immutable/Map' }
stack: { 'org/apache/spark/sql/hive/orc/OrcRelation' }
Bytecode:
0x0000000: b200 1c2b c100 1ebb 000e 592a b700 22b6
0x0000010: 0026 bb00 2859 2c2d b200 2d19 0419 052b
0x0000020: b700 30b0
at java.lang.Class.getDeclaredConstructors0(Native Method)
at java.lang.Class.privateGetDeclaredConstructors(Class.java:2671)
at java.lang.Class.getConstructor0(Class.java:3075)
at java.lang.Class.newInstance(Class.java:412)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
... 27 more
These are the jars in my spark-submit command :
--files /etc/spark2/2.6.4.0-91/0/hive-site.xml,/etc/spark2/2.6.4.0-91/0/hbase-site.xml --jars /usr/hdp/current/spark2-client/jars/spark-yarn_2.11-2.2.0.2.6.4.0-91.jar,/usr/hdp/2.6.4.0-91/hbase/lib/hbase-server.jar,/usr/hdp/2.6.4.0-91/hbase/lib/hbase-protocol.jar,/usr/hdp/2.6.4.0-91/hbase/lib/hbase-client.jar,/usr/hdp/2.6.4.0-91/hbase/lib/hbase-common.jar --driver-java-options "-Dlog4j.configuration=file:/etc/spark2/2.6.4.0-91/0/log4j.properties" --conf "spark.driver.extraClassPath=/home/sifsuser/lib/*:/usr/hdp/2.6.4.0-91/hive/lib/*:/usr/hdp/current/spark2-client/jars/*:/usr/hdp/2.6.4.0-91/hbase/lib/*"
This is due to spark version mismatch. Check if you have same version of spark in both your local and also in your project bundle.
I ran into this issue when I had spark 2.2 at my mac and 2.4 at my maven project. When I matched the version, everything worked fine.

Cannot connect to hive through scala in spark sql

I tried to connect to hive and select some data for test in spark sql by scala, but I failed to do so. The code I use is as follows:
object LoadHive {
def main(args: Array[String]) {
if (args.length < 2) {
println("Usage: [sparkmaster] [tablename]")
exit(1)
}
val master = args(0)
val tableName = args(1)
val sc = new SparkContext(master, "LoadHive", System.getenv("SPARK_HOME"))
val hiveCtx = new HiveContext(sc)
val input = hiveCtx.sql("FROM src SELECT key, value")
val data = input.map(_.getInt(0))
println(data.collect().toList)
}
}
And the exception log is as follows:
16/11/28 07:25:34 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
at org.apache.hadoop.hive.ql.metadata.Hive.<clinit>(Hive.java:166)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:249)
at org.apache.spark.sql.hive.HiveContext.metadataHive$lzycompute(HiveContext.scala:327)
at org.apache.spark.sql.hive.HiveContext.metadataHive(HiveContext.scala:237)
at org.apache.spark.sql.hive.HiveContext.setConf(HiveContext.scala:441)
at org.apache.spark.sql.hive.HiveContext.defaultOverrides(HiveContext.scala:226)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:229)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
at com.oreilly.learningsparkexamples.scala.LoadHive$.main(LoadHive.scala:19)
at com.oreilly.learningsparkexamples.scala.LoadHive.main(LoadHive.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1234)
... 26 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
... 32 more
Caused by: java.lang.NoSuchMethodError: org.apache.thrift.EncodingUtils.setBit(BIZ)B
at org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo.setCreateTimeIsSet(PrivilegeGrantInfo.java:245)
at org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo.<init>(PrivilegeGrantInfo.java:163)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultRoles_core(HiveMetaStore.java:675)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultRoles(HiveMetaStore.java:645)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:462)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
... 37 more
I feel very confused about this, has anybody met this issue? Do I need to built the spark with hive to solve this issue? I think the spark already support hive. The spark version I use is 1.6.1. Any help is appreciated!
To verify that Spark is working with Hive run
./bin/spark-sql --master local[*]
and execute any sql query (e.g. select count(1) from src;)
Note that Spark has to be built with Hive support and hive-site.xml exists on classpath.

Error initializating SparkContext with "spark-shell --master yarn-client", just "spark-shell" works fine

I have a single node with hadoop cluster running and now Im trying to start spark with yarn. Im starting spark like this spark-shell --master yarn-client, but Im getting this error below. For spark I just download, extract and configure the spark-env.sh file, and start all spark processes like this:
SPARK_JAVA_OPTS=-Dspark.driver.port=53411
HADOOP_CONF_DIR=/usr/local/hadoop-2.7.1/etc/hadoop
SPARK_MASTER_IP=master
And then I start all processes of spark with `
./start-all.sh
Do you have any idea why this error can be happening?
Error:
16/05/14 00:26:10 INFO cluster.YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after waiting maxRegisteredResourcesWaitingTime: 30000(ms)
16/05/14 00:26:10 ERROR spark.SparkContext: Error initializing SparkContext.
java.lang.NullPointerException
at org.apache.spark.SparkContext.<init>(SparkContext.scala:584)
at org.apache.spark.repl.SparkILoop.createSparkContext(SparkILoop.scala:1017)
at $line3.$read$$iwC$$iwC.<init>(<console>:15)
at $line3.$read$$iwC.<init>(<console>:24)
at $line3.$read.<init>(<console>:26)
at $line3.$read$.<init>(<console>:30)
at $line3.$read$.<clinit>(<console>)
at $line3.$eval$.<init>(<console>:7)
at $line3.$eval$.<clinit>(<console>)
at $line3.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:125)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
16/05/14 00:26:10 INFO spark.SparkContext: SparkContext already stopped.
java.lang.NullPointerException
at org.apache.spark.SparkContext.<init>(SparkContext.scala:584)
at org.apache.spark.repl.SparkILoop.createSparkContext(SparkILoop.scala:1017)
at $iwC$$iwC.<init>(<console>:15)
at $iwC.<init>(<console>:24)
at <init>(<console>:26)
at .<init>(<console>:30)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:125)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
java.lang.NullPointerException
at org.apache.spark.sql.SQLContext$.createListenerAndUI(SQLContext.scala:1367)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028)
at $iwC$$iwC.<init>(<console>:15)
at $iwC.<init>(<console>:24)
at <init>(<console>:26)
at .<init>(<console>:30)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
<console>:16: error: not found: value sqlContext
import sqlContext.implicits._
^
<console>:16: error: not found: value sqlContext
import sqlContext.sql
If I start spark with just spark-shell it works fine, sql context is initialized.
yarn-site.xml:
<configuration>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>
Jps Command:
id ResourceManager
id RunJar
id SecondaryNameNode
id DataNode
id NodeManager
id Jps
id NameNode
Additional error with spark debug mode on:
16/05/14 18:06:48 DEBUG Client: IPC Client (1374243709) connection to master/10.10.0.11:8032 from hadoopadmin: closed
16/05/14 18:06:48 DEBUG Client: IPC Client (1374243709) connection to master/10.10.0.11:8032 from hadoopadmin: stopped, remaining connections 1
16/05/14 18:06:48 DEBUG Client: IPC Client (1374243709) connection to master/10.10.0.11:9000 from hadoopadmin: closed
16/05/14 18:06:48 DEBUG Client: IPC Client (1374243709) connection to master/10.10.0.11:9000 from hadoopadmin: stopped, remaining connections 0
16/05/14 18:06:49 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after waiting maxRegisteredResourcesWaitingTime: 30000(ms)
16/05/14 18:06:49 ERROR SparkContext: Error initializing SparkContext.
java.lang.NullPointerException
Similar exception is thrown when YARN Resource Manager/Node Manager is unable to start a dedicated container for Spark. There is a good chance that it is related to resource allocation (memory/CPU). Check logs in the $HADOOP_HOME/logs. If node manager fails due to the lack of resources, you should find an error/warn message in node manager logs.

Exception when "spark-shell --master yarn-client"

When I run
spark-shell --master yarn-client
An error occurs as follows:
14/12/23 12:06:33 INFO client.RMProxy: Connecting to ResourceManager at main/10.20.236.201:8032
14/12/23 12:06:33 INFO yarn.Client: Got cluster metric info from ResourceManager, number of NodeManagers: 2
14/12/23 12:06:33 INFO yarn.Client: Max mem capabililty of a single resource in this cluster 2048
14/12/23 12:06:33 INFO yarn.Client: Preparing Local resources
java.lang.IllegalArgumentException: Can not create a Path from an empty string
at org.apache.hadoop.fs.Path.checkPathArg(Path.java:127)
at org.apache.hadoop.fs.Path.<init>(Path.java:135)
at org.apache.hadoop.fs.Path.<init>(Path.java:94)
at org.apache.spark.deploy.yarn.ClientBase$class.copyRemoteFile(ClientBase.scala:159)
at org.apache.spark.deploy.yarn.Client.copyRemoteFile(Client.scala:37)
at org.apache.spark.deploy.yarn.ClientBase$$anonfun$prepareLocalResources$5$$anonfun$apply$2.apply(ClientBase.scala:236)
at org.apache.spark.deploy.yarn.ClientBase$$anonfun$prepareLocalResources$5$$anonfun$apply$2.apply(ClientBase.scala:231)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
at org.apache.spark.deploy.yarn.ClientBase$$anonfun$prepareLocalResources$5.apply(ClientBase.scala:231)
at org.apache.spark.deploy.yarn.ClientBase$$anonfun$prepareLocalResources$5.apply(ClientBase.scala:229)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.apache.spark.deploy.yarn.ClientBase$class.prepareLocalResources(ClientBase.scala:229)
at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:37)
at org.apache.spark.deploy.yarn.Client.runApp(Client.scala:74)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:92)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:141)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:333)
at org.apache.spark.repl.SparkILoop.createSparkContext(SparkILoop.scala:981)
at $iwC$$iwC.<init>(<console>:8)
at $iwC.<init>(<console>:14)
at <init>(<console>:16)
at .<init>(<console>:20)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:823)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:868)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:780)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:121)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:120)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:264)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:120)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:57)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:940)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:142)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:57)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:104)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:57)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:957)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:911)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:911)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:911)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1006)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:329)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
This part of the error stands out:
java.lang.IllegalArgumentException: Can not create a Path from an empty string
How can I troubleshoot this error?

Resources