adding Cassandra as sink in Flink error : All host(s) tried for query failed - cassandra

I was following up with an example at https://ci.apache.org/projects/flink/flink-docs-release-1.4/dev/connectors/cassandra.html to connect Cassandra as sink in Flink
My code for is shown below
public class writeToCassandra {
private static final String CREATE_KEYSPACE_QUERY = "CREATE KEYSPACE test WITH replication= {'class':'SimpleStrategy', 'replication_factor':1};";
private static final String createTable = "CREATE TABLE test.cassandraData(id varchar, heart_rate varchar, PRIMARY KEY(id));" ;
private final static Collection<String> collection = new ArrayList<>(50);
static {
for (int i = 1; i <= 50; ++i) {
collection.add("element " + i);
}
}
public static void main(String[] args) throws Exception {
//setting the env variable to local
StreamExecutionEnvironment envrionment = StreamExecutionEnvironment.createLocalEnvironment(1);
DataStream<Tuple2<String, String>> dataStream = envrionment
.fromCollection(collection)
.map(new MapFunction<String, Tuple2<String, String>>() {
final String mapped = " mapped ";
String[] splitted;
#Override
public Tuple2<String, String> map(String s) throws Exception {
splitted = s.split("\\s+");
return Tuple2.of(
UUID.randomUUID().toString(),
splitted[0] + mapped + splitted[1]
);
}
});
CassandraSink.addSink(dataStream)
.setQuery("INSERT INTO test.cassandraData(id,heart_rate) values (?,?);")
.setHost("127.0.0.1")
.build();
envrionment.execute();
} //main
} //writeToCassandra
I am getting the following error
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /127.0.0.1:9042 (com.datastax.driver.core.exceptions.TransportException: [/127.0.0.1] Cannot connect))
at com.datastax.driver.core.ControlConnection.reconnectInternal(ControlConnection.java:231)

Not sure if this is always required, but the way that I set up my CassandraSink is like this:
CassandraSink
.addSink(dataStream)
.setClusterBuilder(new ClusterBuilder() {
#Override
protected Cluster buildCluster(Cluster.Builder builder) {
return Cluster.builder()
.addContactPoints(myListOfCassandraUrlsString.split(","))
.withPort(portNumber)
.build();
}
})
.build();
I have annotated POJOs that are returned by the dataStream so I don't need the query, but you would just include ".setQuery(...)" after the ".addSink(...)" line.

The exception simply indicates that the example program cannot reach the C* database.
flink-cassandra-connector offers streaming API to connect to designated C* database. Thus, you need to have a C* instance running.
Each streaming job is pushed/serialized to the node that Task Manager runs at. In your example, you assume C* is running on the same node as the TM node. An alternative is to change the C* address from 127.0.0.1 to a public address.

Related

Mockito running independently works but fail when run together

I'm mocking the jdbc connection, resultset and PreparedStatment.
So, when a run the tests one-by-one works. But if a run all tests from class the method whenSelectB fail.
java.lang.AssertionError: There are 2 rows
Expected: <2>
but: was <0>
at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:20)
at net.sf.jkniv.whinstone.jdbc.dml.MockitoSample.whenSelectB(MockitoSample.java:155)
There is some trick to run this?
public class MockitoSample
{
private DataSource dataSource;
private Connection connection;
private PreparedStatement stmt;
private ResultSet rs;
private ResultSetMetaData rsMetadata;
private DatabaseMetaData dbMetadata;
private RepositoryConfig repositoryConfig;
private SqlContext sqlContext;
private Selectable sql;
#Before
public void setUp() throws SQLException
{
this.connection = mock(Connection.class);
this.dataSource = mock(DataSource.class);
this.stmt = mock(PreparedStatement.class);
this.rs = mock(ResultSet.class);
this.rsMetadata = mock(ResultSetMetaData.class);
this.dbMetadata = mock(DatabaseMetaData.class);
this.repositoryConfig = mock(RepositoryConfig.class);
this.sqlContext = mock(SqlContext.class);
this.sql = mock(Selectable.class);
given(this.dataSource.getConnection()).willReturn(this.connection);
given(this.connection.prepareStatement(anyString(), anyInt(), anyInt())).willReturn(this.stmt);
given(this.stmt.executeQuery()).willReturn(this.rs);
given(this.stmt.executeQuery(anyString())).willReturn(this.rs);
given(this.dbMetadata.getJDBCMajorVersion()).willReturn(1);
given(this.dbMetadata.getJDBCMinorVersion()).willReturn(0);
given(this.dbMetadata.getDriverName()).willReturn("MOCKITO");
given(this.dbMetadata.getDriverVersion()).willReturn("1");
given(this.rs.getMetaData()).willReturn(this.rsMetadata);
given(this.repositoryConfig.getName()).willReturn("Mockito");
given(this.repositoryConfig.lookup()).willReturn(this.dataSource);
given(this.repositoryConfig.getJndiDataSource()).willReturn("jdbc/Mockito");
given(this.repositoryConfig.getProperty(RepositoryProperty.JDBC_ADAPTER_FACTORY.key()))
.willReturn(DataSourceAdapter.class.getName());
given(this.repositoryConfig.getTransactionType()).willReturn(TransactionType.LOCAL);
given(this.repositoryConfig.getQueryNameStrategy()).willReturn("net.sf.jkniv.sqlegance.HashQueryNameStrategy");
given(this.sql.getValidateType()).willReturn(ValidateType.NONE);
given(this.sql.getSql(any())).willReturn("select * from dual");
given(this.sql.getSqlDialect()).willReturn(new AnsiDialect());
given(this.sql.getParamParser()).willReturn(ParamParserFactory.getInstance(ParamMarkType.COLON));
given(this.sql.getStats()).willReturn(NoSqlStats.getInstance());
given(this.sql.getSqlType()).willReturn(SqlType.SELECT);
given(this.sql.asSelectable()).willReturn((Selectable) this.sql);
given(this.sqlContext.getRepositoryConfig()).willReturn(this.repositoryConfig);
given(this.sqlContext.getQuery(anyString())).willReturn(this.sql);
}
#Test
public void whenSelectA() throws SQLException
{
Repository repository = RepositoryService.getInstance().lookup(RepositoryType.JDBC).newInstance(sqlContext);
given(this.rsMetadata.getColumnCount()).willReturn(2);
given(this.rsMetadata.getColumnLabel(1)).willReturn("id");
given(this.rsMetadata.getColumnName(1)).willReturn("id");
given(this.rsMetadata.getColumnLabel(2)).willReturn("name");
given(this.rsMetadata.getColumnName(2)).willReturn("name");
given(this.rs.getMetaData()).willReturn(this.rsMetadata);
given(this.sql.getReturnType()).willReturn(FlatBook.class.getName());
doReturn(FlatBook.class).when(this.sql).getReturnTypeAsClass();
given(rs.next()).willReturn(true, true, false);
given(rs.getObject(1)).willReturn(1001L, 1002L);
given(rs.getObject(2)).willReturn("Beyond Good and Evil", "The Rebel: An Essay on Man in Revolt");
Queryable q = QueryFactory.of("2 FlatBook");
List<FlatBook> books = repository.list(q);
assertThat("There are 2 rows", books.size(), equalTo(2));
assertThat("Row is a FlatBook object", books.get(0), instanceOf(FlatBook.class));
for (FlatBook b : books)
{
assertThat(b.getId(), notNullValue());
assertThat(b.getName(), notNullValue());
}
}
#Test
public void whenSelectB() throws SQLException
{
Repository repository = RepositoryService.getInstance().lookup(RepositoryType.JDBC).newInstance(sqlContext);
given(rsMetadata.getColumnCount()).willReturn(2);
given(this.rsMetadata.getColumnLabel(1)).willReturn("id");
given(this.rsMetadata.getColumnName(1)).willReturn("id");
given(this.rsMetadata.getColumnLabel(2)).willReturn("name");
given(this.rsMetadata.getColumnName(2)).willReturn("name");
given(this.rs.getMetaData()).willReturn(this.rsMetadata);
given(this.sql.getReturnType()).willReturn(FlatAuthor.class.getName());
doReturn(FlatAuthor.class).when(this.sql).getReturnTypeAsClass();
given(rs.next()).willReturn(true, true, false);
given(rs.getObject(1)).willReturn(1L, 2L);
given(rs.getObject(2)).willReturn("Author 1", "Author 2");
Queryable q = QueryFactory.of("2 FlatAuthor");
List<FlatAuthor> books = repository.list(q);
assertThat("There are 2 rows", books.size(), equalTo(2));
assertThat("Row is a FlatAuthor object", books.get(0), instanceOf(FlatAuthor.class));
for (FlatAuthor a : books)
{
assertThat(a.getId(), notNullValue());
assertThat(a.getName(), notNullValue());
}
verify(rs).close();
verify(stmt).close();
verify(connection, atLeast(1)).close();
}
The error happens inside the Repository instance, it uses thers.next ()(ResultSet) method but returnsfalse when it should return true twice.
My Repository instance holds the DataSouce class in theThreadLocal, so when whenSelectB tries to get the new Mock it retrieves the old DataSource that retrieves the old Connection, which gets the old Statement that retrieves the old ResultSet. In other words, I have a dirty context between test. Repository must hold the connection just when a transaction was began.
Thanks #Joakim-Danielson and #Antoniossss

CassandraOperations queryForObject() method always returning PrimaryKey instead of Entity Object

I am trying to access Cassandra # localhost using a standalone main() method. The main() method uses DataStax driver and CassandraOperations class from spring-data-cassandra module. CassandraOperation's queryForObject() method always return the primary key instead of Entity Object.
I am just using the code example given in the Spring Data Documentation.
Apache-Cassandra version : 2.1.2
Spring-Data-Cassandra version : 1.2.0.RELEASE
Entity Class :
import org.springframework.data.cassandra.mapping.PrimaryKey;
import org.springframework.data.cassandra.mapping.Table;
#Table
public class Person {
#PrimaryKey
private String id;
private String name;
private int age;
public Person(String id, String name, int age) {
this.id = id;
this.name = name;
this.age = age;
}
public String getId() {
return id;
}
public String getName() {
return name;
}
public int getAge() {
return age;
}
#Override
public String toString() {
return "Person [id=" + id + ", name=" + name + ", age=" + age + "]";
}
}
Client Code:
public class CassandraApp {
private static final Logger LOG = LoggerFactory.getLogger(CassandraApp.class);
private static Cluster cluster;
private static Session session;
public static void main(String[] args) {
try {
cluster = Cluster.builder().addContactPoints(InetAddress.getLocalHost()).build();
session = cluster.connect("person");
CassandraOperations cassandraOps = new CassandraTemplate(session);
cassandraOps.insert(new Person("1234567890", "David", 40));
Select s = QueryBuilder.select().from("person");
s.where(QueryBuilder.eq("id", "1234567890"));
System.out.println(cassandraOps.queryForObject(s, Person.class).getId());
cassandraOps.truncate("person");
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
}
Runtime exception:
Exception in thread "main" java.lang.ClassCastException: java.lang.String cannot be cast to com.prashanth.ts.entity.Person
at com.prashanth.ts.client.CassandraApp.main(CassandraApp.java:40)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
I am new to Spring Data. Any one can help me identify what I am doing wrong here.
I also tried removing the QueryBuilder and passing a simple query String like "select * from person" to the queryForObject() methodd
Note :
The insert operation is working perfectly.
I was able to make it work using selectOne method instead of queryForObject.
LOG.info(cassandraOps.selectOne(s, Person.class).getId());
Judging by documentation here, you need to add one more method(all) for QueryBuilder:
Select s = QueryBuilder.select().all().from("person");
On a side note, you are using spring-data-cassandra but you are not utilizing it's best features which would make your code much simpler.
I can see where the OP could have gotten confused here. Finding no code extension provided by Eclipse I went to the org.springframework.data.cassandra.core
Interface CassandraOperations documentation. There is no queryForObject documented so unless someone can explain otherwise
LOG.info(cassandraOps.queryForObject(s, Person.class).getId());
is just bad code. I tried to find the correct usage of queryForObject but all the searches took me back to the example in question which seems to have originated in 2008. Who knows, at one point it may have worked. The OP was trying to use Cassandra Operations to extract information from "s". I liked the idea of Amit T and got something working. I had my own class using Company instead of Person but the idea is the same.
try {
cluster = Cluster.builder().withoutMetrics().addContactPoints(InetAddress.getByName("192.168.1.5") ).build();
session = cluster.connect("rant");
CassandraOperations cassandraOps = new CassandraTemplate(session);
cassandraOps.insert(new Companies("name1", "user", "category", "first", "last", "city", "state", "zipcode", "phone", "email",
"addr1c", "adddr2c", "cityc", "statec", "zipcodec", "phonec", "emailc", "website", 0.0, 0.0,
0, 0, "pr", 0, "text"));
Select s = QueryBuilder.select().from("companies");
s.where(QueryBuilder.eq("company_company", "name1"));
// LOG.info(cassandraOps.queryForObject(s, Companies.class).getId());
LOG.info(cassandraOps.selectOne(s, Companies.class).CompanyInformation());
cassandraOps.truncate(Companies.class); // empties the table
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
I also created my own CompanyInformation() just as an exercise.
public String CompanyInformation() {
System.out.println("Company Information " + this.company_company);
return this.company_userid;
}
The output was as expected.
19:35:24.456 [cluster1-nio-worker-2] DEBUG com.datastax.driver.core.Connection - Connection[/192.168.1.5:9042-2, inFlight=1, closed=false] Keyspace set to rant
Company Information name1 <== from CompanyInformation()
19:35:24.483 [main] INFO com.androidcommand.app.SpringRbsApplication - user <== from LOG.Info
19:35:24.485 [main] DEBUG org.springframework.data.cassandra.core.cql.CqlTemplate - Executing CQL Statement [TRUNCATE companies;]

Cassandra queries not having any effect

I'm running a bunch of queries one after the other but It seems like some queries are not having any effect even though no errors are thrown UNLESS I restart the session after each query. I'm using datastax cassandra driver for this.
Here are the queries, which I'm storing in a file seperated by ###.
DROP KEYSPACE if exists test_space;
####
CREATE KEYSPACE test_space WITH replication = {'class': 'NetworkTopologyStrategy','0':'2'};
####
CREATE TABLE test_space.fr_core (
frid text PRIMARY KEY,
attributes text,
pk1 text,
pk2 text,
pk3 text,
pk4 text,
pk5 text,
pk6 text
);
####
Here's the code for executing the above statements :
public class CassandraKeyspaceDelete {
public static void main(String[] args) {
try {
new CassandraKeyspaceDelete().run();
} catch (Exception e) {
e.printStackTrace();
}
}
public void run() {
// Get file from resources folder
ClassLoader classloader = Thread.currentThread().getContextClassLoader();
InputStream is = classloader.getResourceAsStream("create_keyspace.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuilder out = new StringBuilder();
String line;
try {
while ((line = reader.readLine()) != null) {
out.append(line);
}
// read from input stream
reader.close();
} catch (Exception e) {
System.out.println("Error reading kespace creation script.");
return;
}
// System.out.println();
com.datastax.driver.core.Session readSession = CassandraManager.connect("12.10.1.122", "", "READ");
String selectStmnts[] = out.toString().split("####");// { };
for (String selectStmnt : selectStmnts) {
System.out.println("" + selectStmnt.trim());
if (selectStmnt.trim().length() > 0) {
ResultSet res = readSession.execute(selectStmnt.trim());
}
// readSession.close();
if (readSession.isClosed()) {
readSession = CassandraManager.connect("12.10.1.122", "", "READ");
}
}
System.out.println("Done");
return;
}
}
Here's the CassandraManager class :
public class CassandraManager {
static Cluster cluster;
public static Session session;
static PreparedStatement statement;
static BoundStatement boundStatement;
public static HashMap<String, Session> sessionStore = new HashMap<String, Session>();
public static Session connect(String ip, String keySpace,String type) {
PoolingOptions poolingOpts = new PoolingOptions();
poolingOpts.setCoreConnectionsPerHost(HostDistance.REMOTE, 2);
poolingOpts.setMaxConnectionsPerHost(HostDistance.REMOTE, 400);
poolingOpts.setMaxSimultaneousRequestsPerConnectionThreshold(HostDistance.REMOTE, 128);
poolingOpts.setMinSimultaneousRequestsPerConnectionThreshold(HostDistance.REMOTE, 2);
cluster = Cluster
.builder()
.withPoolingOptions( poolingOpts )
.addContactPoint(ip)
.withRetryPolicy( DowngradingConsistencyRetryPolicy.INSTANCE )
.withReconnectionPolicy( new ConstantReconnectionPolicy( 100L ) ).build();
Session s = cluster.connect();
return s;
}
}
When I run this, the first two CQL queries run without errors. When the third one runs, I get an error saying Keyspace test_space doesn't exist.
If I uncomment out readSession.close(), all the queries execute though each time the session is closed and then opened resulting in slow execution.
Why aren't the queries working unless session is restarted after each query ?
I created a new project and tried your code in my Cassandra sandbox. It worked with four changes:
My datacenter is defined as "DC1", so the replication factor I used for the test_space keyspace was {'class': 'NetworkTopologyStrategy','DC1':'1'};
My sandbox instance is secured, so I had to use .withCredentials in the Cluster.builder
I couldn't get getResourceAsStream to work, so I replaced that with a FileInputStream instead.
I moved readSession.close(); outside of the for loop.
Based on the fact that it worked on mine, I can't speak to the behavour that you are seeing, so I will offer a few observations:
Is your datacenter really named 0? Your keyspace replication factor {'class': 'NetworkTopologyStrategy','0':'2'} is telling Cassandra to put two replicas in the 0 datacenter. If that really is the case, you should make your datacenter name something a little more intuitive.
None of the statements in your text file return a result set. So doing this ResultSet res = readSession.execute(selectStmnt.trim()); really doesn't get you anything.
Given the name of your keyspace, I can only assume that you are testing some things out. So how do you know that you need all of these options on your cluster builder? My advice to you, is to start simple. Don't add the other options unless you know that you need them, and more importantly, what they do.
cluster = Cluster.builder()
.addContactPoint(ip)
.build();
Session s = cluster.connect();
Make sure that your readSession.close(); is outside of your for loop.
Something else that might help you, is to read through Things You Should Be Doing When Using Cassandra Drivers by DataStax's Rebecca Mills.

spark-streaming: how to output streaming data to cassandra

I am reading kafka streaming messages using spark-streaming.
Now I want to set Cassandra as my output.
I have created a table in cassandra "test_table" with columns "key:text primary key" and "value:text"
I have mapped the data successfully into JavaDStream<Tuple2<String,String>> data like this:
JavaSparkContext sc = new JavaSparkContext("local[4]", "SparkStream",conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(3000));
JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap );
JavaDStream<Tuple2<String,String>> data = messages.map(new Function< Tuple2<String,String>, Tuple2<String,String> >()
{
public Tuple2<String,String> call(Tuple2<String, String> message)
{
return new Tuple2<String,String>( message._1(), message._2() );
}
}
);
Then I have created a List:
List<TestTable> list = new ArrayList<TestTable>();
where TestTable is my custom class having the same structure as my Cassandra table, with members "key" and "value":
class TestTable
{
String key;
String val;
public TestTable() {}
public TestTable(String k, String v)
{
key=k;
val=v;
}
public String getKey(){
return key;
}
public void setKey(String k){
key=k;
}
public String getVal(){
return val;
}
public void setVal(String v){
val=v;
}
public String toString(){
return "Key:"+key+",Val:"+val;
}
}
Please suggest a way how to I add the data from JavaDStream<Tuple2<String,String>> data into the List<TestTable> list.
I am doing this so that I can subsequently use
JavaRDD<TestTable> rdd = sc.parallelize(list);
javaFunctions(rdd, TestTable.class).saveToCassandra("testkeyspace", "test_table");
to save the RDD data into Cassandra.
I had tried coding this way:
messages.foreachRDD(new Function<Tuple2<String,String>, String>()
{
public List<TestTable> call(Tuple2<String,String> message)
{
String k = message._1();
String v = message._2();
TestTable tbl = new TestTable(k,v);
list.put(tbl);
}
}
);
but seems some type mis-match happenning.
Please help.
Assuming that the intention of this program is to save the streaming data from kafka into Cassandra, it's not necessary to dump the JavaDStream<Tuple2<String,String>> data into a List<TestTable> list.
The Spark-Cassandra connector by DataStax supports this functionality directly through the Spark Streaming extensions.
It should be sufficient to use such extensions on the JavaDStream:
javaFunctions(data).writerBuilder("testkeyspace", "test_table", mapToRow(TestTable.class)).saveToCassandra();
instead of draining data on an intermediary list.

Large data processing using Spring Batch Multi-threaded Step and RepositoryItemWriter/ RepositoryItemReader

I am trying to write a batch processing application using spring batch with multi-thread step.this is simple application reading data from a table and writing to another table but data is large around 2 million record .
I am using RepositoryItemReader & RepositoryItemWriter for reading and writing data. But after processing some data it failing due to Unable to acquire JDBC Connection.
//Config.Java
#Bean
public TaskExecutor taskExecutor() {
SimpleAsyncTaskExecutor taskExecutor = new SimpleAsyncTaskExecutor();
taskExecutor.setConcurrencyLimit(10);
return taskExecutor;
}
#Bean(name = "personJob")
public Job personKeeperJob() {
Step step = stepBuilderFactory.get("step-1")
.<User, Person> chunk(1000)
.reader(userReader)
.processor(jpaProcessor)
.writer(personWriter)
.taskExecutor(taskExecutor())
.throttleLimit(10)
.build();
Job job = jobBuilderFactory.get("person-job")
.incrementer(new RunIdIncrementer())
.listener(this)
.start(step)
.build();
return job;
}
//Processor.Java
#Override
public Person process(User user) throws Exception {
Optional<User> userFromDb = userRepo.findById(user.getUserId());
Person person = new Person();
if(userFromDb.isPresent()) {
person.setName(userFromDb.get().getName());
person.setUserId(userFromDb.get().getUserId());
person.setDept(userFromDb.get().getDept());
}
return person;
}
//Reader.Java
#Autowired
public UserItemReader(final UserRepository repository) {
super();
this.repository = repository;
}
#PostConstruct
protected void init() {
final Map<String, Sort.Direction> sorts = new HashMap<>();
sorts.put("userId", Direction.ASC);
this.setRepository(this.repository);
this.setSort(sorts);
this.setMethodName("findAll");
}
//Writer.Java
#PostConstruct
protected void init() {
this.setRepository(repository);
}
#Transactional
public void write(List<? extends Person> persons) throws Exception {
repository.saveAll(persons);
}
application.properties
# Datasource
spring.datasource.platform=h2
spring.datasource.url=jdbc:h2:mem:batchdb
spring.main.allow-bean-definition-overriding=true
spring.datasource.hikari.maximum-pool-size=500
Error :
org.springframework.transaction.CannotCreateTransactionException: Could not open JPA EntityManager for transaction; nested exception is org.hibernate.exception.JDBCConnectionException: Unable to acquire JDBC Connection
at org.springframework.orm.jpa.JpaTransactionManager.doBegin(JpaTransactionManager.java:447)
......................
Caused by: org.hibernate.exception.JDBCConnectionException: Unable to acquire JDBC Connection
at org.hibernate.exception.internal.SQLExceptionTypeDelegate.convert(SQLExceptionTypeDelegate.java:48)
............................
Caused by: java.sql.SQLTransientConnectionException: HikariPool-1 - Connection is not available, request timed out after 30927ms.
You run out of connections.
Try to set the Hikari Connection Pool to a bigger number:
spring.datasource.hikari.maximum-pool-size=20

Resources