On Apache Cassandra 3.11.1, during cassandra decommission I noticed that the node tries to send hints. I checked the hints folder and found hints that are very old or are present for already removed nodes. I decided to delete them so they are not part of the decommission process, but after decommissioning a node, the process fails and the node gets stuck in UL state.
Stacktrace:
ERROR [HintsDispatcher:3] 2022-12-12 17:58:39,364 CassandraDaemon.java:228 - Exception in thread Thread[HintsDispatcher:3,1,RMI Runtime]
java.lang.RuntimeException: java.nio.file.NoSuchFileException: /var/lib/cassandra/hints/xxxx-1670867104313-1.hints
at org.apache.cassandra.io.util.ChannelProxy.openChannel(ChannelProxy.java:55) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.io.util.ChannelProxy.<init>(ChannelProxy.java:66) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.ChecksummedDataInput.open(ChecksummedDataInput.java:77) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsReader.open(HintsReader.java:78) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatcher.create(HintsDispatcher.java:73) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.deliver(HintsDispatchExecutor.java:273) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:260) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:238) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.run(HintsDispatchExecutor.java:217) ~[apache-cassandra-3.11.1.jar:3.11.1]
at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184) ~[na:1.8.0_131]
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[na:1.8.0_131]
at java.util.concurrent.ConcurrentHashMap$ValueSpliterator.forEachRemaining(ConcurrentHashMap.java:3566) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) ~[na:1.8.0_131]
at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151) ~[na:1.8.0_131]
at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[na:1.8.0_131]
at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418) ~[na:1.8.0_131]
at org.apache.cassandra.hints.HintsDispatchExecutor$TransferHintsTask.transfer(HintsDispatchExecutor.java:186) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$TransferHintsTask.run(HintsDispatchExecutor.java:159) ~[apache-cassandra-3.11.1.jar:3.11.1]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_131]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_131]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) [apache-cassandra-3.11.1.jar:3.11.1]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_131]
Caused by: java.nio.file.NoSuchFileException: /var/lib/cassandra/hints/0891d19f-7ba9-4fc6-973c-79f98253cf4e-1670867104313-1.hints
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) ~[na:1.8.0_131]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) ~[na:1.8.0_131]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) ~[na:1.8.0_131]
at sun.nio.fs.UnixFileSystemProvider.newFileChannel(UnixFileSystemProvider.java:177) ~[na:1.8.0_131]
at java.nio.channels.FileChannel.open(FileChannel.java:287) ~[na:1.8.0_131]
at java.nio.channels.FileChannel.open(FileChannel.java:335) ~[na:1.8.0_131]
at org.apache.cassandra.io.util.ChannelProxy.openChannel(ChannelProxy.java:51) ~[apache-cassandra-3.11.1.jar:3.11.1]
... 25 common frames omitted
Final Error:
ERROR [RMI TCP Connection(350)-127.0.0.1] 2022-12-12 18:05:33,190 StorageService.java:3954 - Error while decommissioning node
java.lang.RuntimeException: java.nio.file.NoSuchFileException: /var/lib/cassandra/hints/xxxx-1670867104313-1.hints
at org.apache.cassandra.io.util.ChannelProxy.openChannel(ChannelProxy.java:55) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.io.util.ChannelProxy.<init>(ChannelProxy.java:66) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.ChecksummedDataInput.open(ChecksummedDataInput.java:77) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsReader.open(HintsReader.java:78) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatcher.create(HintsDispatcher.java:73) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.deliver(HintsDispatchExecutor.java:273) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:260) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:238) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.run(HintsDispatchExecutor.java:217) ~[apache-cassandra-3.11.1.jar:3.11.1]
at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184) ~[na:1.8.0_131]
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[na:1.8.0_131]
at java.util.concurrent.ConcurrentHashMap$ValueSpliterator.forEachRemaining(ConcurrentHashMap.java:3566) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) ~[na:1.8.0_131]
at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151) ~[na:1.8.0_131]
at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174) ~[na:1.8.0_131]
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[na:1.8.0_131]
at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418) ~[na:1.8.0_131]
at org.apache.cassandra.hints.HintsDispatchExecutor$TransferHintsTask.transfer(HintsDispatchExecutor.java:186) ~[apache-cassandra-3.11.1.jar:3.11.1]
at org.apache.cassandra.hints.HintsDispatchExecutor$TransferHintsTask.run(HintsDispatchExecutor.java:159) ~[apache-cassandra-3.11.1.jar:3.11.1]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_131]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[na:1.8.0_131]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) ~[apache-cassandra-3.11.1.jar:3.11.1]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_131]
Caused by: java.nio.file.NoSuchFileException: /var/lib/cassandra/hints/0891d19f-7ba9-4fc6-973c-79f98253cf4e-1670867104313-1.hints
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) ~[na:1.8.0_131]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) ~[na:1.8.0_131]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) ~[na:1.8.0_131]
at sun.nio.fs.UnixFileSystemProvider.newFileChannel(UnixFileSystemProvider.java:177) ~[na:1.8.0_131]
at java.nio.channels.FileChannel.open(FileChannel.java:287) ~[na:1.8.0_131]
at java.nio.channels.FileChannel.open(FileChannel.java:335) ~[na:1.8.0_131]
at org.apache.cassandra.io.util.ChannelProxy.openChannel(ChannelProxy.java:51) ~[apache-cassandra-3.11.1.jar:3.11.1]
... 25 common frames omitted
How can I avoid such a scenario?
I tried nodetool truncatehints, but it does nothing on the node.
At this point, I would just run a nodetool removenode on it. The removenode process will:
get rid of a stuck DL/UL node.
move data to its new token ranges from the other replicas.
What happens in case all data was already migrated from the leaving node? will the removenode command then stream again and create duplicate data?
Yes, it streams it again, but it doesn’t create “duplicate data;” not in the same way that in-place writes do.
So a cleanup will clear out this re-streamed data?
No. The streaming happens at the SSTable file level. So essentially a whole new file is being streamed during the decomm/remove process. If there was already duplicate, in-place-updated data present, it still will be there on its new node. If there was only a single write at the partition level, then that's all that will be there. If it already exists, this process will simply write one file over another, and Cassandra will never know it was even there.
When you add or remove a node, a token range calculation is triggered. This means that some nodes end up containing data that they are no longer responsible for (because another node now is). Running a nodetool cleanup removes data which:
has already been streamed somewhere else.
remains on a node which
will never use it.
Related
When I add an external library to Zeppelin's Spark interpreter, sometimes, in a very random way, the spark interpreter does not start. Some other times, it starts but then I cannot access the added library. And sometimes it just works.
My best guess is that some jars are conflicting.
This is the output error
java.lang.NullPointerException
at org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:821)
at org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:612)
at org.apache.zeppelin.scheduler.Job.run(Job.java:186)
at org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:139)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Cassandra is giving digest mismatch error. Restarting service on all nodes isn't helping.
ERROR 10:55:11 Exception in thread Thread[HintsDispatcher:2,1,main]
org.apache.cassandra.io.FSReadError: java.io.IOException: Digest mismatch exception
at org.apache.cassandra.hints.HintsReader$HintsIterator.computeNext(HintsReader.java:199) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsReader$HintsIterator.computeNext(HintsReader.java:164) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.utils.AbstractIterator.hasNext(AbstractIterator.java:47) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatcher.sendHints(HintsDispatcher.java:157) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatcher.sendHintsAndAwait(HintsDispatcher.java:139) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatcher.dispatch(HintsDispatcher.java:123) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatcher.dispatch(HintsDispatcher.java:95) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.deliver(HintsDispatchExecutor.java:268) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:251) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.dispatch(HintsDispatchExecutor.java:229) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsDispatchExecutor$DispatchHintsTask.run(HintsDispatchExecutor.java:208) ~[apache-cassandra-3.0.14.jar:3.0.14]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_131]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_131]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_131]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:79) [apache-cassandra-3.0.14.jar:3.0.14]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_131]
Caused by: java.io.IOException: Digest mismatch exception
at org.apache.cassandra.hints.HintsReader$HintsIterator.computeNextInternal(HintsReader.java:216) ~[apache-cassandra-3.0.14.jar:3.0.14]
at org.apache.cassandra.hints.HintsReader$HintsIterator.computeNext(HintsReader.java:190) ~[apache-cassandra-3.0.14.jar:3.0.14]
... 16 common frames omitted
After some digging I found this https://issues.apache.org/jira/browse/CASSANDRA-13696 and I think I need to delete hintfiles so nodes can come to a consistent state, but cassandra is running in DCOS/mesosphere and I am not able to connect nodetool to truncate hintfiles
Any way I can delete hintfiles? Or any other way to make cluster consistent? Thanks in Advance.
Your files are probably getting corrupted.
I would try to keep your data in mount points managed by something like Rex-Ray
I have a spark cluster and a vertica database. I use
spark.read.jdbc( # etc
to load Spark dataframes into the cluster. When I do a certain groupby function
df2 = df.groupby('factor').agg(F.stddev('sum(PnL)'))
df2.show()
I then get a vertica syntax exception
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:347)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:39)
at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2193)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2192)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2199)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1935)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1934)
at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2576)
at org.apache.spark.sql.Dataset.head(Dataset.scala:1934)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2149)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:239)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.sql.SQLSyntaxErrorException: [Vertica][VJDBC](4856) ERROR: Syntax error at or near "Window"
at com.vertica.util.ServerErrorData.buildException(Unknown Source)
at com.vertica.io.ProtocolStream.readExpectedMessage(Unknown Source)
at com.vertica.dataengine.VDataEngine.prepareImpl(Unknown Source)
at com.vertica.dataengine.VDataEngine.prepare(Unknown Source)
at com.vertica.dataengine.VDataEngine.prepare(Unknown Source)
at com.vertica.jdbc.common.SPreparedStatement.<init>(Unknown Source)
at com.vertica.jdbc.jdbc4.S4PreparedStatement.<init>(Unknown Source)
at com.vertica.jdbc.VerticaJdbc4PreparedStatementImpl.<init>(Unknown Source)
at com.vertica.jdbc.VJDBCObjectFactory.createPreparedStatement(Unknown Source)
at com.vertica.jdbc.common.SConnection.prepareStatement(Unknown Source)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anon$1.<init>(JDBCRDD.scala:400)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:379)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more
Caused by: com.vertica.support.exceptions.SyntaxErrorException: [Vertica][VJDBC](4856) ERROR: Syntax error at or near "Window"
... 27 more
What I want to know is, what exactly did spark try to execute against the vertica database? Is there a trace config I can set somewhere?
Thanks!
You can look at the query_requests system table to see what SQL has been run against your database. You can filter on user_name and start_timestamp to try and help find the query.
Typically when you control the SQL, you would want to add in a label. But in this case you'll have to search for it.
Also be aware that the retention period of this is governed by the data collector settings.
Using the Spark web UI you can check the behavior and performance of your Spark application. It can also show you the SQL's in SQL tab for web UI. You can also explore the Resource Manager log for more details.
Spark web UI at http://<host ip>:4040.
You can access the SQL tab under /SQL URL, e.g. http://:4040/SQL/.
6
I tried to implement what is explained here. It is working when i keep number of partition in custom partition equal to one but when i change this keep any other value it gives out array out of bound exception
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 6, deenbandhu): java.lang.ArrayIndexOutOfBoundsException: -2
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:151)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:920)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:918)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:918)
at DataSetCreation$.main(CreateDataSet.scala:100)
at DataSetCreation.main(CreateDataSet.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ArrayIndexOutOfBoundsException: -2
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:151)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
I am unable to figure it out what is causing this error.
Thanks in advance
I found the issue in my Custom partitoner when calculating hashcode and getting partition number it is giving negative number and hence giving array out of bound error now i have taken the absolute value and it is working fine now.
I got the following exception when i was trying to decommission a live node. There is no reference in the manual saying that i need to stop the data coming into this node. Even then, decommissioning is specified for live nodes.
Can anyone let me know if am doing something wrong or if this is a bug on cassandra part?
Cassandra Version Used : 2.0.3
Exception in thread "main" java.lang.UnsupportedOperationException:
data is currently moving to this node; unable to leave the ring at
org.apache.cassandra.service.StorageService.decommission(StorageService.java:2629)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601) at
com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:111)
at
com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:45)
at
com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:235)
at com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138)
at com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:250)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:819)
at
com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:791)
at
javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1486)
at
javax.management.remote.rmi.RMIConnectionImpl.access$300(RMIConnectionImpl.java:96)
at
javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1327)
at
javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1419)
at
javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:847)
at sun.reflect.GeneratedMethodAccessor28.invoke(Unknown Source) at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601) at
sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:322)
at sun.rmi.transport.Transport$1.run(Transport.java:177) at
sun.rmi.transport.Transport$1.run(Transport.java:174) at
java.security.AccessController.doPrivileged(Native Method) at
sun.rmi.transport.Transport.serviceCall(Transport.java:173) at
sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:556)
at
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:811)
at
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:670)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
at java.lang.Thread.run(Thread.java:722)