Spark launcher handle not updating state on Standalone cluster mode - apache-spark

I'm trying to programmatically submit Spark jobs using the Spark Launcher library in a spring web application.
Everything works fine with yarn-client, yarn-cluster and standalone-client modes. However, when using standalone-cluster mode, the getState() of SparkAppHandle stays in UNKNOWN forever. Any advice? Thanks.
Here is the code of the Service
import org.apache.spark.launcher.SparkAppHandle;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.stereotype.Service;
#Service
public class SparkServices {
public String launchJob(String master, String mode) throws Exception {
SparkAppHandle handle = new SparkLauncher()
.setAppName("test1")
.setSparkHome("/usr/local/spark")
.setAppResource("hdfs://nn:9000/spark-application.jar")
.setMainClass("my.App")
.setMaster(master)
.setDeployMode(mode)
.setConf("spark.executor.instances", "2")
.setConf("spark.driver.memory", "2g")
.setConf("spark.driver.cores", "1")
.setConf("spark.executor.memory", "2g")
.setConf("spark.executor.cores", "1")
.addAppArgs("hdfs://nn:9000/spark-project/files/")
.setVerbose(true)
.startApplication(new SparkAppHandle.Listener() {
#Override
public void stateChanged(SparkAppHandle sparkAppHandle) {
System.out.println("state >>> " + sparkAppHandle.getState());
}
#Override
public void infoChanged(SparkAppHandle sparkAppHandle) {
System.out.println("info >>> " + sparkAppHandle.getState());
}
});
while (!handle.getState().isFinal()){
System.out.println("state >>> " + handle.getState());
Thread.sleep(10000);
}
return "finished with >>>" + handle.getState();
}
}
And the code of Controller
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
#RestController
public class TaskController {
#Autowired
private SparkServices sparkServices;
#GetMapping("/sparkJobs/{master}/{mode}")
public String sparkJob(#PathVariable("master") String master, #PathVariable("mode") String mode) throws Exception {
return sparkServices.launchJob(master, mode);
}
}

Related

Cucumber testng with PowerMockTestCase to mock static classes

I am using cucumber BDD, testng, java to write some BDD test. I would like to mock static classes in order to write my test. However when I write this testrunner, it fails to initialize the BDD scenarios.
Complete Example(note the commented line PrepareForTest) :
import gherkin.events.PickleEvent;
import io.cucumber.testng.CucumberOptions;
import io.cucumber.testng.PickleEventWrapper;
import io.cucumber.testng.TestNGCucumberRunner;
import org.powermock.api.mockito.PowerMockito;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.testng.PowerMockTestCase;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.concurrent.TimeUnit;
import static org.mockito.Matchers.any;
#CucumberOptions(
features = {
"src/test/resources/features/sample"
},
glue = {
"com.demo.stepdefinitions.sample"
},
plugin = {
"pretty",
"html:target/cucumber-reports/cucumber-pretty",
"json:target/cucumber-reports/sampple-report.json",
"rerun:target/cucumber-reports/sample-rerun.txt"
}
)
//#PrepareForTest({Util.class})
public class TestngWithDataproviderTest extends PowerMockTestCase {
private TestNGCucumberRunner testNGCucumberRunner;
private void mockActiveBucket() {
PowerMockito.mockStatic(Util.class);
PowerMockito.when(Util.getBucketId(any(Long.class))).thenReturn(3);
}
#BeforeClass(alwaysRun = true)
public void setUpClass() throws Exception {
testNGCucumberRunner = new TestNGCucumberRunner(this.getClass());
}
#Test(dataProvider = "users")
public void testMockStatic(String username){
System.out.println("username: " + username);
System.out.println("static test passed");
mockActiveBucket();
Assert.assertTrue(true);
}
#Test(groups = "cucumber scenarios", description = "Runs Cucumber Scenarios", dataProvider = "scenarios")
public void testCucumberCcenario(PickleEventWrapper pickleEvent) throws Throwable {
PickleEvent event = pickleEvent.getPickleEvent();
mockActiveBucket();
testNGCucumberRunner.runScenario(pickleEvent.getPickleEvent());
Assert.assertTrue(true);
}
#DataProvider(name = "scenarios")
public Object[][] scenarios() {
Object[][] scenarios = testNGCucumberRunner.provideScenarios();
return new Object[][]{{scenarios[0][0]}};
}
#DataProvider(name = "users")
public Object[][] users() {
return new Object[][]{{"user1"}, {"user2"}};
}
}
class Util {
public static int getBucketId(long eventTimestamp){
Long minsPast5MinBoundary = (eventTimestamp % TimeUnit.MINUTES.toMillis(5))/TimeUnit.MINUTES.toMillis(1);
return minsPast5MinBoundary.intValue();
}
}
The above test fails to load BDD scenarios dataProvider if I enable PrepareForTest annotation on the test. However, the other test which uses dataProvider works fine in both cases(enable or disable PrepareForTest)
ERROR:
Data provider mismatch
Method: testCucumberCcenario([Parameter{index=0, type=io.cucumber.testng.PickleEventWrapper, declaredAnnotations=[]}])
Arguments: [(io.cucumber.testng.PickleEventWrapperImpl) "Sunday isn't Friday"]
at org.testng.internal.reflect.DataProviderMethodMatcher.getConformingArguments(DataProviderMethodMatcher.java:45)
at org.testng.internal.Parameters.injectParameters(Parameters.java:796)
at org.testng.internal.Invoker.invokeTestMethods(Invoker.java:983)
at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:125)
at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:109)
at org.testng.TestRunner.privateRun(TestRunner.java:648)
at org.testng.TestRunner.run(TestRunner.java:505)
As a side effect of this, I am unable to mock static methods of util class while writing the BDD. I am new to cucumber BDD. Any help/pointers is appreciated.
After getting some help to root cause from #help-cucumber-jvm slack channel, I was able to root cause it to testng+powermock with dataproviders using custom classes.
For example this test fails
import org.powermock.api.mockito.PowerMockito;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.testng.PowerMockTestCase;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.ObjectFactory;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import static org.mockito.Matchers.any;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
#PrepareForTest(Util2.class)
public class TestngWithDataproviderTestngTest extends PowerMockTestCase {
#ObjectFactory
public org.testng.IObjectFactory getObjectFactory() {
return new org.powermock.modules.testng.PowerMockObjectFactory();
}
private void mockActiveBucket() {
PowerMockito.mockStatic(Util.class);
PowerMockito.when(Util.getBucketId(any(Long.class))).thenReturn(3);
}
#Test(dataProvider = "users")
public void testMockStatic(MyTestCaseImpl myTestCase) {
System.out.println("myTestCase: " + myTestCase);
System.out.println("static test passed");
mockActiveBucket();
Assert.assertTrue(true);
}
#DataProvider(name = "users")
public Object[][] users() {
return new Object[][]{{new MyTestCaseImpl(5)}};
}
}
//interface MyTestCase {
//}
class MyTestCaseImpl { //implements MyTestCase{
int i;
public MyTestCaseImpl() {
}
public MyTestCaseImpl(int i) {
this.i = i;
}
public int getI() {
return i;
}
public void setI(int i) {
this.i = i;
}
}
class Util2 {
public static int getBucketId(long eventTimestamp) {
Long minsPast5MinBoundary = (eventTimestamp % TimeUnit.MINUTES.toMillis(5)) / TimeUnit.MINUTES.toMillis(1);
return minsPast5MinBoundary.intValue();
}
}
Here as mentioned, seems to be a known issue with a workaround. Hope this helps.

Error org.picocontainer.PicoCompositionException: Duplicate Keys not allowed. Duplicate

I was trying to achieve, Cucumber feature level parallel execution using pico Container.
When I am using a shared Driver in a context Class as below, I get org.picocontainer.PicoCompositionException: Duplicate Keys not allowed. Duplicate
public class Context{
private ThreadLocal<WebDriver> drivers = new ThreadLocal<>();
public void setDriver(WebDriver wd) {
drivers.set(wd);
}
public WebDriver getDriver() {
return drivers.get();
}
//Runner Class
import java.net.MalformedURLException;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import cucumber.api.CucumberOptions;
import cucumber.api.testng.CucumberFeatureWrapper;
import cucumber.api.testng.TestNGCucumberRunner;
import net.thumbtack.cucumber.picocontainer.example.step.SharedDriver;
import cucumber.api.testng.*;
#CucumberOptions (glue = {"net.thumbtack.cucumber.picocontainer.example.step"},
features = "src/main/resources/"
,tags = {"#Scenario2,#Scenario3"})
public class TestRunner {
public TestRunner() throws MalformedURLException {
super();
// TODO Auto-generated constructor stub
}
private TestNGCucumberRunner testNGCucumberRunner;
#BeforeClass(alwaysRun = true)
public void setUpClass() throws Exception {
testNGCucumberRunner = new TestNGCucumberRunner(this.getClass());
System.setProperty("ExecEnv","Docker");
}
// #Test(dataProvider = "features")
// public void feature(PickleEventWrapper eventwrapper,CucumberFeatureWrapper cucumberFeature) throws Throwable {
#Test(groups="cucumber", description="Runs CucumberFeature",dataProvider = "features")
public void feature(CucumberFeatureWrapper cucumberFeature){
testNGCucumberRunner.runCucumber(cucumberFeature.getCucumberFeature());
// testNGCucumberRunner.runScenario(eventwrapper.getPickleEvent());
}
#DataProvider(parallel=true)
public Object[][] features() {
return testNGCucumberRunner.provideFeatures();
// return testNGCucumberRunner.provideScenarios();
}
#AfterClass(alwaysRun = true)
public void tearDownClass() throws Exception {
testNGCucumberRunner.finish();
}
}

Run appium test with Testng and logj4,Get err instantiating class org.apache.logging.log4j.spi.Provider

I'm trying to run an appium test on a real ios device using java, testng, and log4j. I am not too familiar with log4j. I am getting the following error in eclipse.
org.testng.TestNGException:
An error occurred while instantiating class MobileTests.AppiumIOSTestAppTest1: org.apache.logging.log4j.spi.Provider: Provider org.apache.logging.slf4j.SLF4JProvider not found
package MobileTests;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.Date;
import org.testng.Assert;
import org.testng.ITestResult;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
import org.testng.asserts.SoftAssert;
import org.apache.log4j.Logger;
import Base.TestBase;
import Common.ScreenshotURL;
import Locators.LocatorMethods;
public class AppiumIOSTestAppTest1 extends TestBase{
static SoftAssert softAssert = new SoftAssert();
static Logger log = Logger.getLogger(AppiumIOSTestAppTest1.class);
String className = this.getClass().getSimpleName();
Date date1= new Date();
String originaltimestamp = new Timestamp(date1.getTime()).toString();
String timestamp = originaltimestamp.replace(':', 'x').substring(11);
String foldername = folderpath+className+timestamp;
String error = "";
String errorname = "";
#Test
public void iosTestAppTest1 () throws IOException, InterruptedException
{
try
{
LocatorMethods.clickByXpath(driver, "textfield1.xpath");
LocatorMethods.sendKeysIntoElementByXpath(driver, "textfield1.xpath", Integer.toString(8));
LocatorMethods.clickByXpath(driver, "textfield2.xpath");
LocatorMethods.sendKeysIntoElementByXpath(driver, "textfield2.xpath", Integer.toString(9));
LocatorMethods.clickByXpath(driver, "compute.xpath");
String answer = LocatorMethods.getTextByXpath(driver, "answer.xpath");
try
{
Assert.assertTrue(answer.equalsIgnoreCase(Integer.toString(17)), "Answer is wrong.");
}
catch(AssertionError e)
{
log.debug("Wrong answer was calculated.");
log.error("This is an exception", e);
//error = e.toString();
//System.out.println(error);
errorname = "wronganswer";
ScreenshotURL.screenshotURL(driver, foldername, errorname, error);
softAssert.fail();
}
}
catch(AssertionError e)
{
System.out.println(e);
}
softAssert.assertAll();
}
#AfterMethod
public static void OnFailure(ITestResult testResult) throws IOException {
if (testResult.getStatus() == ITestResult.FAILURE)
{
System.out.println(testResult.getStatus());
}
}
}

OnDataChanged is not called

I have a problem with comunication between my phone and wear device. I decided to add wear module to my app. Wear app has just one class (MainActivity)
package cz.johrusk.myapplication;
import android.app.Activity;
import android.os.Bundle;
import android.support.annotation.NonNull;
import android.support.wearable.view.WatchViewStub;
import android.util.Log;
import android.widget.TextView;
import com.google.android.gms.common.ConnectionResult;
import com.google.android.gms.common.api.GoogleApiClient;
import com.google.android.gms.common.api.ResultCallback;
import com.google.android.gms.wearable.DataApi;
import com.google.android.gms.wearable.DataEventBuffer;
import com.google.android.gms.wearable.PutDataMapRequest;
import com.google.android.gms.wearable.PutDataRequest;
import com.google.android.gms.wearable.Wearable;
public class MainActivity extends Activity implements GoogleApiClient.OnConnectionFailedListener,DataApi.DataListener {
GoogleApiClient mGoogleApiClient;
private TextView mTextView;
static final String TAG = MainActivity.class.getSimpleName();
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
final WatchViewStub stub = (WatchViewStub) findViewById(R.id.watch_view_stub);
stub.setOnLayoutInflatedListener(new WatchViewStub.OnLayoutInflatedListener() {
#Override
public void onLayoutInflated(WatchViewStub stub) {
mTextView = (TextView) stub.findViewById(R.id.text);
}
});
mGoogleApiClient = new GoogleApiClient.Builder(this)
.addApi(Wearable.API)
.addConnectionCallbacks(new GoogleApiClient.ConnectionCallbacks() {
#Override
public void onConnected(Bundle connectionHint) {
Log.d(TAG, "onConnected: " + connectionHint);
sendNumber(1);
Log.d(TAG,"BBBBBBBB");
}
#Override
public void onConnectionSuspended(int cause) {
Log.d(TAG, "onConnectionSuspended: " + cause);
}
})
.addOnConnectionFailedListener(this)
.build();
mGoogleApiClient.connect();
Log.d(TAG,"mGoogleApiClient connected;");
}
#Override
public void onConnectionFailed(#NonNull ConnectionResult connectionResult) {
Log.d(TAG,"FAILE" + connectionResult);
}
public void sendNumber(int number) {
PutDataMapRequest putDataMapRequest = PutDataMapRequest.create("/number");
putDataMapRequest.getDataMap().putInt("number",number);
putDataMapRequest.getDataMap().putLong("Time",System.currentTimeMillis());
PutDataRequest putDataReq = putDataMapRequest.asPutDataRequest();
putDataReq.setUrgent();
Wearable.DataApi.putDataItem(mGoogleApiClient, putDataReq)
.setResultCallback(new ResultCallback<DataApi.DataItemResult>() {
#Override
public void onResult(#NonNull DataApi.DataItemResult dataItemResult) {
if (!dataItemResult.getStatus().isSuccess()) {
Log.d(TAG,"Fail");
}
else{
Log.d(TAG,"Succes");
}
}
});
}
#Override
protected void onStart() {
super.onStart();
mGoogleApiClient.connect();
}
#Override
public void onDataChanged(DataEventBuffer dataEventBuffer) {
Log.d(TAG,"TEST");
}
}
I quess that "Wearable.DataApi.putDataItem" should call WearableListenerService in my phone app. Here is that service:
package cz.johrusk.showsmscode.service;
import android.util.Log;
import com.google.android.gms.common.api.GoogleApiClient;
import com.google.android.gms.wearable.DataEvent;
import com.google.android.gms.wearable.DataEventBuffer;
import com.google.android.gms.wearable.DataMap;
import com.google.android.gms.wearable.DataMapItem;
import com.google.android.gms.wearable.Wearable;
import com.google.android.gms.wearable.WearableListenerService;
public class WatchListener_service extends WearableListenerService {
#Override
public void onCreate() {
super.onCreate();
GoogleApiClient mGoogleApiClient = new GoogleApiClient.Builder(this)
.addApi(Wearable.API)
.build();
mGoogleApiClient.connect();
}
#Override
public void onDataChanged(DataEventBuffer dataEventBuffer) {
Log.d("prijato","number is: ");
for (DataEvent dataEvent : dataEventBuffer) {
if (dataEvent.getType() == DataEvent.TYPE_CHANGED) {
DataMap dataMap = DataMapItem.fromDataItem(dataEvent.getDataItem()).getDataMap();
String path = dataEvent.getDataItem().getUri().getPath();
if (path.equals("/number")){
int number = dataMap.getInt("numa");
long time = dataMap.getInt("timestamp");
Log.d("received","number is: " + number);
}
}
}
}
}
However, onDataChanged method in WatchListener_service isn't called. onResult method inside ResultCallbacks print "Succes" so it seems that DataItem is send correctly.
I already find many similar problems on Stackoverlflow so I checked all these things:
Both modules has same applicationId
Both modules use 'com.google.android.gms:play-services-wearable:9.0.0'
SetUrgent is used to putDataRequest so there shouldnt be any delay.
WearableListenerService has declared correct intent filter in Manifest :
action android:name="com.google.android.gms.wearable.DATA_CHANGED"
data android:scheme="wear" android:host="*"
Both phone and Wear app run on physical device. My question is... What should I do to fix this issue?
Thanks
Check if both apps have the same debug key. I had some problems with that and problem was different keys for both apps (wear and mobile).
PS: The Android Wear API is ridiculous. I quit developing for Wear because of how terrible is that API that doest not work as they say this should.

How to load Spark Cassandra Connector in the shell?

I am trying to use Spark Cassandra Connector in Spark 1.1.0.
I have successfully built the jar file from the master branch on GitHub and have gotten the included demos to work. However, when I try to load the jar files into the spark-shell I can't import any of the classes from the com.datastax.spark.connector package.
I have tried using the --jars option on spark-shell and adding the directory with the jar file to Java's CLASSPATH. Neither of these options work. In fact, when I use the --jars option, the logging output shows that the Datastax jar is getting loaded, but I still cannot import anything from com.datastax.
I have been able to load the Tuplejump Calliope Cassandra connector into the spark-shell using --jars, so I know that's working. It's just the Datastax connector which is failing for me.
I got it. Below is what I did:
$ git clone https://github.com/datastax/spark-cassandra-connector.git
$ cd spark-cassandra-connector
$ sbt/sbt assembly
$ $SPARK_HOME/bin/spark-shell --jars ~/spark-cassandra-connector/spark-cassandra-connector/target/scala-2.10/connector-assembly-1.2.0-SNAPSHOT.jar
In scala prompt,
scala> sc.stop
scala> import com.datastax.spark.connector._
scala> import org.apache.spark.SparkContext
scala> import org.apache.spark.SparkContext._
scala> import org.apache.spark.SparkConf
scala> val conf = new SparkConf(true).set("spark.cassandra.connection.host", "my cassandra host")
scala> val sc = new SparkContext("spark://spark host:7077", "test", conf)
Edit: Things are a bit easier now
For in-depth instructions check out the project website
https://github.com/datastax/spark-cassandra-connector/blob/master/doc/13_spark_shell.md
Or feel free to use Spark-Packages to load the Library (Not all versions published)
http://spark-packages.org/package/datastax/spark-cassandra-connector
> $SPARK_HOME/bin/spark-shell --packages com.datastax.spark:spark-cassandra-connector_2.10:1.4.0-M3-s_2.10
The following assumes you are running with OSS Apache C*
You'll want to start the class with the –driver-class-path set to include all your connector libs
I'll quote a blog post from the illustrious Amy Tobey
The easiest way I’ve found is to set the classpath with then
restart the context in the REPL with the necessary classes imported to
make sc.cassandraTable() visible.
The newly loaded methods will not show up in tab completion. I don’t know why.
/opt/spark/bin/spark-shell --driver-class-path $(echo /path/to/connector/*.jar |sed 's/ /:/g')
It will print a bunch of log information then present scala> prompt.
scala> sc.stop
Now that the context is stopped, it’s time to import the connector.
scala> import com.datastax.spark.connector._
scala> val conf = new SparkConf()
scala> conf.set("cassandra.connection.host", "node1.pc.datastax.com")
scala> val sc = new SparkContext("local[2]", "Cassandra Connector Test", conf)
scala> val table = sc.cassandraTable("keyspace", "table")
scala> table.count
If you are running with DSE < 4.5.1
There is a slight issue with the DSE Classloader and previous package naming conventions that will prevent you from finding the new spark-connector libraries. You should be able to get around this by removing the line specifying the DSE Class loader in the scripts starting spark-shell.
If you want to avoid stoppping/starting the context in the shell you can also add it into your spark properties in:
{spark_install}/conf/spark-defaults.conf
spark.cassandra.connection.host=192.168.10.10
To access Cassandra from the spark-shell, I've built an assembly out of the cassandra-spark-driver with all dependencies (an "uberjar"). Providing it to the spark-shell using the --jars option like this:
spark-shell --jars spark-cassandra-assembly-1.0.0-SNAPSHOT-jar-with-dependencies.jar
I was facing the same issue described here and this method is both simple and convenient (instead of loading the long list of dependencies)
I've created a gist with the POM file that you can download. Using the pom to create the uberjar you should do:
mvn package
If you're using sbt, look into the sbt-assembly plugin.
The following steps describe how to setup a server with both a Spark Node and a Cassandra Node.
Setting Up Open Source Spark
This assumes you already have Cassandra setup.
Step 1: Download and setup Spark
Go to http://spark.apache.org/downloads.html.
a) To make things simple, we will use one of the prebuilt Spark packages.
Choose Spark version 2.0.0 and Pre-built for Hadoop 2.7 then Direct Download. This will download an archive with the built binaries for Spark.
b) Extract this to a directory of your choosing. I will put mine in ~/apps/spark-1.2
c) Test Spark is working by opening the Shell
Step 2: Test that Spark Works
a) cd into the Spark directory
Run "./bin/spark-shell". This will open up the Spark interactive shell program
b) If everything worked it should display this prompt: "scala>"
Run a simple calculation:
sc.parallelize( 1 to 50 ).sum(+)
which should output 1250.
c) Congratulations Spark is working!
Exit the Spark shell with the command "exit"
The Spark Cassandra Connector
To connect Spark to a Cassandra cluster, the Cassandra Connector will need to be added to the Spark project. DataStax provides their own Cassandra Connector on GitHub and we will use that.
Clone the Spark Cassandra Connector repository:
https://github.com/datastax/spark-cassandra-connector
cd into "spark-cassandra-connector" Build the Spark Cassandra Connector
by executing the command
./sbt/sbt Dscala-2.11=true assembly
This should output compiled jar files to the directory named "target". There will be two jar files, one for Scala and one for Java.
The jar we are interested in is: "spark-cassandra-connector-assembly-1.1.1-SNAPSHOT.jar" the one for Scala.
Move the jar file into an easy to find directory: I put mine into ~/apps/spark-1.2/jars
To load the connector into the Spark Shell:
start the shell with this command:
../bin/spark-shell –jars
~/apps/spark-1.2/jars/spark-cassandra-connector-assembly-1.1.1-SNAPSHOT.jar
Connect the Spark Context to the Cassandra cluster and stop the default context:
sc.stop
Import the necessary jar files:
import com.datastax.spark.connector._, org.apache.spark.SparkContext, org.apache.spark.SparkContext._, org.apache.spark.SparkConf
Make a new SparkConf with the Cassandra connection details:
val conf = new SparkConf(true).set("spark.cassandra.connection.host",
"localhost")
Create a new Spark Context:
val sc = new SparkContext(conf)
You now have a new SparkContext which is connected to your Cassandra cluster.
Spark-Cassandra-Connector Complete Code in JAVA with Window-7,8,10 Usefull.
import com.datastax.driver.core.Session;
import com.datastax.spark.connector.cql.CassandraConnector;
import com.google.common.base.Optional;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Tuple2;
import spark_conn.Spark_connection;
import java.io.Serializable;
import java.math.BigDecimal;
import java.text.MessageFormat;
import java.util.*;
import static com.datastax.spark.connector.CassandraJavaUtil.*;
public class App implements Serializable
{
private transient SparkConf conf;
private App(SparkConf conf) {
this.conf = conf;
}
private void run() {
JavaSparkContext sc = new JavaSparkContext(conf);
generateData(sc);
compute(sc);
showResults(sc);
sc.stop();
}
private void generateData(JavaSparkContext sc) {
CassandraConnector connector = CassandraConnector.apply(sc.getConf());
// Prepare the schema
try{
Session session=connector.openSession();
session.execute("DROP KEYSPACE IF EXISTS java_api");
session.execute("CREATE KEYSPACE java_api WITH
replication = {'class': 'SimpleStrategy', 'replication_factor': 1}");
session.execute("CREATE TABLE java_api.products
(id INT PRIMARY KEY, name TEXT, parents LIST<INT>)");
session.execute("CREATE TABLE java_api.sales
(id UUID PRIMARY KEY, product INT, price DECIMAL)");
session.execute("CREATE TABLE java_api.summaries
(product INT PRIMARY KEY, summary DECIMAL)");
}catch(Exception e){System.out.println(e);}
// Prepare the products hierarchy
List<Product> products = Arrays.asList(
new Product(0, "All products", Collections.<Integer>emptyList()),
new Product(1, "Product A", Arrays.asList(0)),
new Product(4, "Product A1", Arrays.asList(0, 1)),
new Product(5, "Product A2", Arrays.asList(0, 1)),
new Product(2, "Product B", Arrays.asList(0)),
new Product(6, "Product B1", Arrays.asList(0, 2)),
new Product(7, "Product B2", Arrays.asList(0, 2)),
new Product(3, "Product C", Arrays.asList(0)),
new Product(8, "Product C1", Arrays.asList(0, 3)),
new Product(9, "Product C2", Arrays.asList(0, 3))
);
JavaRDD<Product> productsRDD = sc.parallelize(products);
javaFunctions(productsRDD, Product.class).
saveToCassandra("java_api", "products");
JavaRDD<Sale> salesRDD = productsRDD.filter
(new Function<Product, Boolean>() {
#Override
public Boolean call(Product product) throws Exception {
return product.getParents().size() == 2;
}
}).flatMap(new FlatMapFunction<Product, Sale>() {
#Override
public Iterable<Sale> call(Product product) throws Exception {
Random random = new Random();
List<Sale> sales = new ArrayList<>(1000);
for (int i = 0; i < 1000; i++) {
sales.add(new Sale(UUID.randomUUID(),
product.getId(), BigDecimal.valueOf(random.nextDouble())));
}
return sales;
}
});
javaFunctions(salesRDD, Sale.class).saveToCassandra
("java_api", "sales");
}
private void compute(JavaSparkContext sc) {
JavaPairRDD<Integer, Product> productsRDD = javaFunctions(sc)
.cassandraTable("java_api", "products", Product.class)
.keyBy(new Function<Product, Integer>() {
#Override
public Integer call(Product product) throws Exception {
return product.getId();
}
});
JavaPairRDD<Integer, Sale> salesRDD = javaFunctions(sc)
.cassandraTable("java_api", "sales", Sale.class)
.keyBy(new Function<Sale, Integer>() {
#Override
public Integer call(Sale sale) throws Exception {
return sale.getProduct();
}
});
JavaPairRDD<Integer, Tuple2<Sale, Product>> joinedRDD = salesRDD.join(productsRDD);
JavaPairRDD<Integer, BigDecimal> allSalesRDD = joinedRDD.flatMapToPair(new PairFlatMapFunction<Tuple2<Integer, Tuple2<Sale, Product>>, Integer, BigDecimal>() {
#Override
public Iterable<Tuple2<Integer, BigDecimal>> call(Tuple2<Integer, Tuple2<Sale, Product>> input) throws Exception {
Tuple2<Sale, Product> saleWithProduct = input._2();
List<Tuple2<Integer, BigDecimal>> allSales = new ArrayList<>(saleWithProduct._2().getParents().size() + 1);
allSales.add(new Tuple2<>(saleWithProduct._1().getProduct(), saleWithProduct._1().getPrice()));
for (Integer parentProduct : saleWithProduct._2().getParents()) {
allSales.add(new Tuple2<>(parentProduct, saleWithProduct._1().getPrice()));
}
return allSales;
}
});
JavaRDD<Summary> summariesRDD = allSalesRDD.reduceByKey(new Function2<BigDecimal, BigDecimal, BigDecimal>() {
#Override
public BigDecimal call(BigDecimal v1, BigDecimal v2) throws Exception {
return v1.add(v2);
}
}).map(new Function<Tuple2<Integer, BigDecimal>, Summary>() {
#Override
public Summary call(Tuple2<Integer, BigDecimal> input) throws Exception {
return new Summary(input._1(), input._2());
}
});
javaFunctions(summariesRDD, Summary.class).saveToCassandra("java_api", "summaries");
}
private void showResults(JavaSparkContext sc) {
JavaPairRDD<Integer, Summary> summariesRdd = javaFunctions(sc)
.cassandraTable("java_api", "summaries", Summary.class)
.keyBy(new Function<Summary, Integer>() {
#Override
public Integer call(Summary summary) throws Exception {
return summary.getProduct();
}
});
JavaPairRDD<Integer, Product> productsRdd = javaFunctions(sc)
.cassandraTable("java_api", "products", Product.class)
.keyBy(new Function<Product, Integer>() {
#Override
public Integer call(Product product) throws Exception {
return product.getId();
}
});
List<Tuple2<Product, Optional<Summary>>> results = productsRdd.leftOuterJoin(summariesRdd).values().toArray();
for (Tuple2<Product, Optional<Summary>> result : results) {
System.out.println(result);
}
}
public static void main(String[] args) {
// if (args.length != 2) {
// System.err.println("Syntax: com.datastax.spark.demo.App <Spark Master URL> <Cassandra contact point>");
// System.exit(1);
// }
// SparkConf conf = new SparkConf(true)
// .set("spark.cassandra.connection.host", "127.0.1.1")
// .set("spark.cassandra.auth.username", "cassandra")
// .set("spark.cassandra.auth.password", "cassandra");
//SparkContext sc = new SparkContext("spark://127.0.1.1:9045", "test", conf);
//return ;
/* try{
SparkConf conf = new SparkConf(true);
conf.setAppName("Spark-Cassandra Integration");
conf.setMaster("yarn-cluster");
conf.set("spark.cassandra.connection.host", "192.168.1.200");
conf.set("spark.cassandra.connection.rpc.port", "9042");
conf.set("spark.cassandra.connection.timeout_ms", "40000");
conf.set("spark.cassandra.read.timeout_ms", "200000");
System.out.println("Hi.......Main Method1111...");
conf.set("spark.cassandra.auth.username","cassandra");
conf.set("spark.cassandra.auth.password","cassandra");
System.out.println("Connected Successful...!\n");
App app = new App(conf);
app.run();
}catch(Exception e){System.out.println(e);}*/
SparkConf conf = new SparkConf();
conf.setAppName("Java API demo");
// conf.setMaster(args[0]);
// conf.set("spark.cassandra.connection.host", args[1]);
conf.setMaster("spark://192.168.1.117:7077");
conf.set("spark.cassandra.connection.host", "192.168.1.200");
conf.set("spark.cassandra.connection.port", "9042");
conf.set("spark.ui.port","4040");
conf.set("spark.cassandra.auth.username","cassandra");
conf.set("spark.cassandra.auth.password","cassandra");
App app = new App(conf);
app.run();
}
public static class Product implements Serializable {
private Integer id;
private String name;
private List<Integer> parents;
public Product() { }
public Product(Integer id, String name, List<Integer> parents) {
this.id = id;
this.name = name;
this.parents = parents;
}
public Integer getId() { return id; }
public void setId(Integer id) { this.id = id; }
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public List<Integer> getParents() { return parents; }
public void setParents(List<Integer> parents) { this.parents = parents; }
#Override
public String toString() {
return MessageFormat.format("Product'{'id={0}, name=''{1}'', parents={2}'}'", id, name, parents);
}
}
public static class Sale implements Serializable {
private UUID id;
private Integer product;
private BigDecimal price;
public Sale() { }
public Sale(UUID id, Integer product, BigDecimal price) {
this.id = id;
this.product = product;
this.price = price;
}
public UUID getId() { return id; }
public void setId(UUID id) { this.id = id; }
public Integer getProduct() { return product; }
public void setProduct(Integer product) { this.product = product; }
public BigDecimal getPrice() { return price; }
public void setPrice(BigDecimal price) { this.price = price; }
#Override
public String toString() {
return MessageFormat.format("Sale'{'id={0}, product={1}, price={2}'}'", id, product, price);
}
}
public static class Summary implements Serializable {
private Integer product;
private BigDecimal summary;
public Summary() { }
public Summary(Integer product, BigDecimal summary) {
this.product = product;
this.summary = summary;
}
public Integer getProduct() { return product; }
public void setProduct(Integer product) { this.product = product; }
public BigDecimal getSummary() { return summary; }
public void setSummary(BigDecimal summary) { this.summary = summary; }
#Override
public String toString() {
return MessageFormat.format("Summary'{'product={0}, summary={1}'}'", product, summary);
}
}
}

Resources