cassandra trigger on composite blob key - cassandra

I use Cassandra 2.1.9 and have table like
create table "Keyspace1"."Standard4" ( id blob, user_name blob, data blob, primary key(id, user_name));
and I follow the post in Cassandra Sample Trigger Code to get inserted value and do trigger code like
public class InvertedIndex implements ITrigger
{
private static final Logger logger = LoggerFactory.getLogger(InvertedIndex.class);
public Collection augment(ByteBuffer key, ColumnFamily update)
{
CFMetaData cfm = update.metadata();
ByteBuffer id_bb = key;
String id_Value = new String(id_bb.array());
Iterator col_itr=update.iterator();
Cell username_col=(Cell)col_itr.next();
ByteBuffer username_bb=CompositeType.extractComponent(username_col.name().collectionElement(),0);
String username_Value = new String(username_bb.array());
Cell data_col=(Cell)col_itr.next();
ByteBuffer data_bb=BytesType.instance.compose(data_col.value());
String data_Value = new String(data_bb.array());
logger.info(" id --> "+id_Value);
logger.info(" username-->"+username_Value);
logger.info(" data ---> "+data_Value);
return null;
}
}
I tried insert into "Keyspace1"."Standard4" (id, user_name, data) values (textAsBlob('id1'), textAsBlob('user_name1'), textAsBlob('data1'));
and got run time exception in ByteBuffer username_bb=CompositeType.extractComponent(username_col.name().collectionElement(),0);
Caused by: java.lang.NullPointerException: null
at org.apache.cassandra.db.marshal.CompositeType.extractComponent(CompositeType.java:191) ~[apache-cassandra-2.1.9.jar:2.1.9]
at org.apache.cassandra.triggers.InvertedIndex.augment(InvertedIndex.java:52) ~[na:na]
at org.apache.cassandra.triggers.TriggerExecutor.executeInternal(TriggerExecutor.java:223) ~[apache-cassandra-2.1.9.jar:2.1.9]
... 17 common frames omitted
Can anybody tell me how to correct?

You are trying to show all the inserted column name and value right ?
Here is the code:
#Override
public Collection<Mutation> augment(ByteBuffer key, ColumnFamily update) {
CFMetaData cfm = update.metadata();
System.out.println("key => " + ByteBufferUtil.toInt(key));
for (Cell cell : update) {
if (cell.value().remaining() > 0) {
try {
String name = cfm.comparator.getString(cell.name());
String value = cfm.getValueValidator(cell.name()).getString(cell.value());
System.out.println("Column Name => " + name + " Value => " + value);
} catch (Exception e) {
System.out.println("Exception : " + e.getMessage());
}
}
}
return null;
}

Related

Cannot insert into Cassandra table, getting SyntaxError

I have an assignment where I have to build a Cassandra database. I have connected Cassandra with IntelliJ, i'm writing in java and the output is shown in the command line.
My keyspace farm_db contains a couple of tables in wish i'm would like to insert data. I would like to insert the data with two columns and a list all in one row, in the table 'farmers'. This is a part of my database so far:
cqlsh:farm_db> use farm_db;
cqlsh:farm_db> Describe tables;
farmers foods_dairy_eggs foods_meat
foods_bread_cookies foods_fruit_vegetables
cqlsh:farm_db> select * from farmers;
farmer_id | delivery | the_farmer
-----------+----------+------------
This is what i'm trying to do:
[Picture of what i'm trying to do][1]
I need to insert the collection types 'list' and 'map' in 'farmers' but after a couple of failed attempts with that I tried using hashmap and arraylist instead. I think this could work but i seem to have an error in my syntax and I have no idea what the problem seem to be:
Exception in thread "main" com.datastax.driver.core.exceptions.SyntaxError: line 1:31 mismatched input 'int' expecting ')' (INSERT INTO farmers (farmer_id [int]...)
Am I missing something or am I doing something wrong?
This is my code:
public class FarmersClass {
public static String serverIP = "127.0.0.1";
public static String keyspace = "";
//Create db
public void crateDatabase(String databaseName) {
Cluster cluster = Cluster.builder()
.addContactPoints(serverIP)
.build();
keyspace = databaseName;
Session session = cluster.connect();
String create_db_query = "CREATE KEYSPACE farm_db WITH replication "
+ "= {'class':'SimpleStrategy', 'replication_factor':1};";
session.execute(create_db_query);
}
//Create table
public void createFarmersTable() {
Cluster cluster = Cluster.builder()
.addContactPoints(serverIP)
.build();
Session session = cluster.connect("farm_db");
String create_farm_table_query = "CREATE TABLE farmers(farmer_id int PRIMARY KEY, the_farmer Map <text, text>, delivery list<text>); ";
session.execute(create_farm_table_query);
}
//Insert data in table 'farmer'.
public void insertFarmers(int id, HashMap< String, String> the_farmer, ArrayList <String> delivery) {
Cluster cluster = Cluster.builder()
.addContactPoints(serverIP)
.build();
Session session = cluster.connect("farm_db");
String insert_query = "INSERT INTO farmers (farmer_id int PRIMARY KEY, the_farmer, delivery) values (" + id + "," + the_farmer + "," + delivery + ");";
System.out.println(insert_query);
session.execute(insert_query);
}
}
public static void main(String[] args) {
FarmersClass farmersClass = new FarmersClass();
// FarmersClass.crateDatabase("farm_db");
// FarmersClass.createFarmersTable();
//Collection type map
HashMap<String, String> the_farmer = new HashMap<>();
the_farmer.put("Name", "Ana Petersen ");
the_farmer.put("Farmhouse", "The great farmhouse");
the_farmer.put("Foods", "Fruits & Vegetables");
//Collection type list
ArrayList<String> delivery = new ArrayList<String>();
String delivery_1 = "Village 1";
String delivery_2 = "Village 2";
delivery.add(delivery_1);
delivery.add(delivery_2);
FarmersClass.insertFarmers(1, the_farmer, delivery);
}
The problem is the syntax of your CQL INSERT query:
String insert_query = \
"INSERT INTO farmers (farmer_id int PRIMARY KEY, the_farmer, delivery) \
values (" + id + "," + the_farmer + "," + delivery + ");";
You've incorrectly added int PRIMARY KEY in the list of columns.
The correct format is:
INSERT INTO table_name (pk, col2, col3) VALUES ( ... )
For details and examples, see CQL INSERT. Cheers!

Cassandra 3.6.0 bug: StackOverflowError thrown by HashedWheelTimer on Connection.release

When running some inserts and updates on Cassandra database via the java driver version 3.6.0, I get the following StackOverflowError, of which I am showing here just the top, but it repeats the last 10 rows endlessly.
There is no mention of any line in my code, so I don't know what was the specific operation that invoked this.
2018-09-03 00:19:58,294 WARN {cluster1-timeouter-0} [c.d.s.n.u.HashedWheelTimer] : An exception was thrown by TimerTask.
java.lang.StackOverflowError: null
at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4279)
at java.util.regex.Pattern$Curly.match(Pattern.java:4234)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4658)
at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
at java.util.regex.Pattern$Branch.match(Pattern.java:4602)
at java.util.regex.Pattern$BmpCharProperty.match(Pattern.java:3798)
at java.util.regex.Pattern$Start.match(Pattern.java:3461)
at java.util.regex.Matcher.search(Matcher.java:1248)
at java.util.regex.Matcher.find(Matcher.java:664)
at java.util.Formatter.parse(Formatter.java:2549)
at java.util.Formatter.format(Formatter.java:2501)
at java.util.Formatter.format(Formatter.java:2455)
at java.lang.String.format(String.java:2940)
at com.datastax.driver.core.exceptions.BusyConnectionException.<init>(BusyConnectionException.java:29)
at com.datastax.driver.core.Connection$ResponseHandler.<init>(Connection.java:1538)
at com.datastax.driver.core.Connection.write(Connection.java:711)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution.write(RequestHandler.java:451)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution.access$1600(RequestHandler.java:307)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution$1.onSuccess(RequestHandler.java:397)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution$1.onSuccess(RequestHandler.java:384)
at com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1355)
at com.google.common.util.concurrent.MoreExecutors$DirectExecutor.execute(MoreExecutors.java:398)
at com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:1024)
at com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:866)
at com.google.common.util.concurrent.AbstractFuture.set(AbstractFuture.java:689)
at com.google.common.util.concurrent.SettableFuture.set(SettableFuture.java:48)
at com.datastax.driver.core.HostConnectionPool$PendingBorrow.set(HostConnectionPool.java:755)
at com.datastax.driver.core.HostConnectionPool.dequeue(HostConnectionPool.java:407)
at com.datastax.driver.core.HostConnectionPool.returnConnection(HostConnectionPool.java:366)
at com.datastax.driver.core.Connection.release(Connection.java:810)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution$1.onSuccess(RequestHandler.java:407)
at com.datastax.driver.core.RequestHandler$SpeculativeExecution$1.onSuccess(RequestHandler.java:384)
at com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1355)
at com.google.common.util.concurrent.MoreExecutors$DirectExecutor.execute(MoreExecutors.java:398)
at com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:1024)
at com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:866)
at com.google.common.util.concurrent.AbstractFuture.set(AbstractFuture.java:689)
at com.google.common.util.concurrent.SettableFuture.set(SettableFuture.java:48)
at com.datastax.driver.core.HostConnectionPool$PendingBorrow.set(HostConnectionPool.java:755)
at com.datastax.driver.core.HostConnectionPool.dequeue(HostConnectionPool.java:407)
at com.datastax.driver.core.HostConnectionPool.returnConnection(HostConnectionPool.java:366)
at com.datastax.driver.core.Connection.release(Connection.java:810)
I do not use any UDTs.
Here are the keyspace and table creation code:
session.execute(session.prepare(
"CREATE KEYSPACE IF NOT EXISTS myspace WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'dc1': '3'} AND DURABLE_WRITES = true;").bind());
session.execute(session.prepare("CREATE TABLE IF NOT EXISTS myspace.tasks (myId TEXT PRIMARY KEY, pointer BIGINT)").bind());
session.execute(session.prepare("CREATE TABLE IF NOT EXISTS myspace.counters (key TEXT PRIMARY KEY, cnt COUNTER)").bind());
This is the prepared statement that I use:
PreparedStatement quickSearchTasksInsert = session.prepare("INSERT INTO myspace.tasks (myId, pointer) VALUES (:oid,:loc)");
The code that reproduces the issue does the following:
Runs about 10,000 times the method writeTask() with different values such as the following example rows which are selecteed from a SQL database:
05043FA57ECEAABC3E096B281A55356B, 1678192046
5DE661E77D19C157C31EB7309494EA89, 3959390363
85D6211384E6E190299093E501169625, 3146521416
0327817F8BD59039069C13D581E8EBBE, 2907072247
D913FA0F306D6516D8DF87EB0CB1EE9B, 2507147331
DC946B409CD1E59F560A0ED75559CB16, 2810148057
2A24B1DC71D395938BA77C6CA822A5F7, 1182061065
F70705303980DA40D125CC3497174A5D, 1735385855
runs the setLocNum() method with some Long number.
Loop back to (1) above.
public void writeTask(String myId, long pointer) {
try {
session.executeAsync(quickSearchTasksInsert.bind().setString("oid",myId).setLong("loc", pointer));
incrementCounter("tasks_count", 1);
} catch (OperationTimedOutException | NoHostAvailableException e) {
// some error handling ommitted from post
}
}
public synchronized void setLocNum(long num) {
setCounter("loc_num", num);
}
public void incrementCounter(String key, long incVal) {
try {
session.executeAsync(
"UPDATE myspace.counters SET cnt = cnt + " + incVal + " WHERE key = '" + key.toLowerCase() + "'");
} catch (OperationTimedOutException | NoHostAvailableException e) {
// some error handling ommitted from post
}
}
public void decrementCounter(String key, long decVal) {
try {
session.executeAsync(
"UPDATE myspace.counters SET cnt = cnt - " + decVal + " WHERE key = '" + key.toLowerCase() + "'");
} catch (OperationTimedOutException | NoHostAvailableException e) {
// some error handling ommitted from post
}
}
public synchronized void setCounter(String key, long newVal) {
try {
Long prevCounterValue = countersCache.get(key);
long oldCounter = prevCounterValue == null ? readCounter(key) : prevCounterValue.longValue();
decrementCounter(key, oldCounter);
incrementCounter(key, newVal);
countersCache.put(key, newVal);
} catch (OperationTimedOutException | NoHostAvailableException e) {
// some error handling ommitted from post
}
}

schemacrawlar can't print out table name

blow code just print out database name only,why?
public static void main(final String[] args) throws Exception
{
// Create a database connection
final DataSource dataSource = new DatabaseConnectionOptions("jdbc:mysql://localhost:3306/target_db");
final Connection connection = dataSource.getConnection("root", "password");
// Create the options
final SchemaCrawlerOptions options = new SchemaCrawlerOptions();
options.setSchemaInfoLevel(SchemaInfoLevelBuilder.standard());
options.setTableTypes(Lists.newArrayList("BASE TABLE","TABLE","VIEW"));
options.setRoutineInclusionRule(new ExcludeAll());
options.setSchemaInclusionRule(new RegularExpressionInclusionRule("target_db"));
options.setTableNamePattern("*");
// Get the schema definition
final Catalog catalog = SchemaCrawlerUtility.getCatalog(connection, options);
for (final Schema schema : catalog.getSchemas())
{
System.out.print("c--> " + schema.getCatalogName() + "\n");
for (final Table table : catalog.getTables(schema))
{
System.out.print("o--> " + table);
if (table instanceof View)
{
System.out.println(" (VIEW)");
} else
{
System.out.println();
}
for (final Column column : table.getColumns())
{
System.out.println(" o--> " + column + " (" + column.getColumnDataType() + ")");
}
}
}
}
}
Strangely,
./schemacrawler.sh -server=mysql -database=target_db -user=root -password=password -infolevel=ALL -command=schema
will output tables and corresponded columns.
Update:my configuration
schemacrawler-14.09.03-main
Ubuntu 16.04 64bit
MariaDB 10.2.1-MariaDB-1~xenial
(I assumed mariadb may not be supported yet,so switch between blow two drivers,but neither works)
mysql-connector-java-6.0.3
mariadb-java-client-1.4.6
Finally,I figured it out:
options.setTableTypes(Lists.newArrayList("BASE TABLE","TABLE","VIEW","UNKNOWN"));
Caution:with MariaDB,table types is "UNKNOWN"

Persisting data to DynamoDB using Apache Spark

I have a application where
1. I read JSON files from S3 using SqlContext.read.json into Dataframe
2. Then do some transformations on the DataFrame
3. Finally I want to persist the records to DynamoDB using one of the record value as key and rest of JSON parameters as values/columns.
I am trying something like :
JobConf jobConf = new JobConf(sc.hadoopConfiguration());
jobConf.set("dynamodb.servicename", "dynamodb");
jobConf.set("dynamodb.input.tableName", "my-dynamo-table"); // Pointing to DynamoDB table
jobConf.set("dynamodb.endpoint", "dynamodb.us-east-1.amazonaws.com");
jobConf.set("dynamodb.regionid", "us-east-1");
jobConf.set("dynamodb.throughput.read", "1");
jobConf.set("dynamodb.throughput.read.percent", "1");
jobConf.set("dynamodb.version", "2011-12-05");
jobConf.set("mapred.output.format.class", "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat");
jobConf.set("mapred.input.format.class", "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat");
DataFrame df = sqlContext.read().json("s3n://mybucket/abc.json");
RDD<String> jsonRDD = df.toJSON();
JavaRDD<String> jsonJavaRDD = jsonRDD.toJavaRDD();
PairFunction<String, Text, DynamoDBItemWritable> keyData = new PairFunction<String, Text, DynamoDBItemWritable>() {
public Tuple2<Text, DynamoDBItemWritable> call(String row) {
DynamoDBItemWritable writeable = new DynamoDBItemWritable();
try {
System.out.println("JSON : " + row);
JSONObject jsonObject = new JSONObject(row);
System.out.println("JSON Object: " + jsonObject);
Map<String, AttributeValue> attributes = new HashMap<String, AttributeValue>();
AttributeValue attributeValue = new AttributeValue();
attributeValue.setS(row);
attributes.put("values", attributeValue);
AttributeValue attributeKeyValue = new AttributeValue();
attributeValue.setS(jsonObject.getString("external_id"));
attributes.put("primary_key", attributeKeyValue);
AttributeValue attributeSecValue = new AttributeValue();
attributeValue.setS(jsonObject.getString("123434335"));
attributes.put("creation_date", attributeSecValue);
writeable.setItem(attributes);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return new Tuple2(new Text(row), writeable);
}
};
JavaPairRDD<Text, DynamoDBItemWritable> pairs = jsonJavaRDD
.mapToPair(keyData);
Map<Text, DynamoDBItemWritable> map = pairs.collectAsMap();
System.out.println("Results : " + map);
pairs.saveAsHadoopDataset(jobConf);
However I do not see any data getting written to DynamoDB. Nor do I get any error messages.
I'm not sure, but your's seems more complex than it may need to be.
I've used the following to write an RDD to DynamoDB successfully:
val ddbInsertFormattedRDD = inputRDD.map { case (skey, svalue) =>
val ddbMap = new util.HashMap[String, AttributeValue]()
val key = new AttributeValue()
key.setS(skey.toString)
ddbMap.put("DynamoDbKey", key)
val value = new AttributeValue()
value.setS(svalue.toString)
ddbMap.put("DynamoDbKey", value)
val item = new DynamoDBItemWritable()
item.setItem(ddbMap)
(new Text(""), item)
}
val ddbConf = new JobConf(sc.hadoopConfiguration)
ddbConf.set("dynamodb.output.tableName", "my-dynamo-table")
ddbConf.set("dynamodb.throughput.write.percent", "0.5")
ddbConf.set("mapred.input.format.class", "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat")
ddbConf.set("mapred.output.format.class", "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat")
ddbInsertFormattedRDD.saveAsHadoopDataset(ddbConf)
Also, have you checked that you have upped the capacity correctly?

Retrieving data from composite key via astyanax

I am very naive in cassandra & am using astyanax
CREATE TABLE employees (empID int, deptID int, first_name varchar,
last_name varchar, PRIMARY KEY (empID, deptID));
i want to get the values of query:
select * from employees where empID =2 and deptID = 800;
public void read(Integer empID, String deptID) {
OperationResult<ColumnList<String>> result;
try {
columnFamilies = ColumnFamily.newColumnFamily("employees", IntegerSerializer.get(), StringSerializer.get());
result = keyspace.prepareQuery(columnFamilies).getKey(empID).execute();
ColumnList<String> cols = result.getResult();
//Other stuff
}
how should i achieve this
As far as I can find, there isn't a super clean way to do this. You have to do it by executing a cql query and then iterating through the rows. This code is taken from the astynax examples file
public void read(int empId) {
logger.debug("read()");
try {
OperationResult<CqlResult<Integer, String>> result
= keyspace.prepareQuery(EMP_CF)
.withCql(String.format("SELECT * FROM %s WHERE %s=%d;", EMP_CF_NAME, COL_NAME_EMPID, empId))
.execute();
for (Row<Integer, String> row : result.getResult().getRows()) {
logger.debug("row: "+row.getKey()+","+row); // why is rowKey null?
ColumnList<String> cols = row.getColumns();
logger.debug("emp");
logger.debug("- emp id: "+cols.getIntegerValue(COL_NAME_EMPID, null));
logger.debug("- dept: "+cols.getIntegerValue(COL_NAME_DEPTID, null));
logger.debug("- firstName: "+cols.getStringValue(COL_NAME_FIRST_NAME, null));
logger.debug("- lastName: "+cols.getStringValue(COL_NAME_LAST_NAME, null));
}
} catch (ConnectionException e) {
logger.error("failed to read from C*", e);
throw new RuntimeException("failed to read from C*", e);
}
}
You just have to tune the cql query to return what you want. This is a bit frustrating because according to the documentation, you can do
Column<String> result = keyspace.prepareQuery(CF_COUNTER1)
.getKey(rowKey)
.getColumn("Column1")
.execute().getResult();
Long counterValue = result.getLongValue();
However I don't know what rowkey is. I've posted a question about what rowkey can be. Hopefully that will help

Resources