I'm working with Cassandra Cluster with 4 nodes (Amazon), I have test_ks_r2 with replication factor = 2, write and update with consistency level = ALL, read with consistency level = ONE. The code are using datastax java driver (version 2.1.5):
// schema of user column family
CREATE TABLE user (
id text PRIMARY KEY,
password text,
username text
);
// ResultSet class
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.HashMap;
import java.util.Map;
public class ResultSet {
private List<String> idList = new ArrayList<String>();
private Map<String, List<String>> data = new HashMap<String, List<String>>();
public void setIdList(List<String> v) {idList = v;}
public List<String> getIdList() { return idList;}
public void setData(Map<String, List<String>> v) {data = v;}
public Map<String, List<String>> getData() {return data;}
}
// test cassandra class
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.HashMap;
import java.util.Map;
import com.datastax.driver.core.*;
import com.datastax.driver.core.Cluster.Builder;
import com.datastax.driver.core.querybuilder.QueryBuilder;
class TestCassandra {
private ConsistencyLevel getConsistencyLevel(String consistency) {
System.out.println("consistency level string: " + consistency);
if(consistency.equals("one")) {
System.out.println("using consistency level: ONE");
return ConsistencyLevel.ONE;
}
else if(consistency.equals("two")) {
System.out.println("using consistency level: TWO");
return ConsistencyLevel.TWO;
}
else if(consistency.equals("quorum")) {
System.out.println("using consistency level: QUORUM");
return ConsistencyLevel.QUORUM;
}
else if(consistency.equals("all")) {
System.out.println("using consistency level: ALL");
return ConsistencyLevel.ALL;
}
else {
System.out.println("using default consistency level: ONE");
return ConsistencyLevel.ONE;
}
}
public void testWriteUpdateRead(int n, String mode, int delay) {
if (mode.equals("QueryBuilder")) {
System.out.println("test write & read with QueryBuilder ...");
} else if (mode.equals("PreparedStatement")) {
System.out.println("test write & read with PreparedStatement ...");
} else {
System.exit(-1);
}
ResultSet resultSet = write(n, mode, getConsistencyLevel("all"));
read(resultSet.getIdList(), resultSet.getData(), getConsistencyLevel("one"));
// update data
update(resultSet, mode, getConsistencyLevel("all"), delay);
// read again
read(resultSet.getIdList(), resultSet.getData(), getConsistencyLevel("one"));
}
public ResultSet update(ResultSet resultSet, String mode, ConsistencyLevel consistencyLevel, int delay) {
PreparedStatement ps = session.prepare("UPDATE user SET password = ? WHERE id = ?")
.setConsistencyLevel(consistencyLevel);
List<String> idList = resultSet.getIdList();
Map<String, List<String>> data = resultSet.getData();
for (String idRead: idList) {
try {
Thread.sleep(delay);
}
catch (Exception e) {
System.out.println(e);
}
String newPassword = "new password" + idRead;
if (mode.equals("QueryBuilder")) {
session.execute(
QueryBuilder.update("user")
.where(QueryBuilder.eq("id", idRead))
.with(QueryBuilder.set("password", newPassword))
.setConsistencyLevel(consistencyLevel)
);
} else {
ResultSet result = session.execute(ps.bind(newPassword, idRead));
System.out.println(result);
}
List<String> itemValues = new ArrayList<String>();
itemValues.add("empty");
itemValues.add(newPassword);
data.put(idRead, itemValues);
}
System.out.println("Update " + Objects.toString(idList.size(), null) + " times");
resultSet.setData(data);
return resultSet;
}
public ResultSet write(int n, String mode, ConsistencyLevel consistencyLevel) {
PreparedStatement ps = session
.prepare("INSERT INTO user (id, username, password) VALUES (?, ?, ?)")
.setConsistencyLevel(consistencyLevel);
String idWrite = "";
List<String> idList = new ArrayList<String>();
Map<String, List<String>> data = new HashMap<String, List<String>>();
for (int i=0; i<n;i++) {
idWrite = Objects.toString(System.currentTimeMillis(),null);
String username = "username" + idWrite;
String password = "password" + idWrite;
List<String> itemValues = new ArrayList<String>();
itemValues.add(username);
itemValues.add(password);
if (mode.equals("QueryBuilder")) {
session.execute(
QueryBuilder.insertInto("user")
.value("id", idWrite)
.value("username", username)
.value("password", password)
.setConsistencyLevel(consistencyLevel)
);
} else {
session.execute(ps.bind(idWrite, username, password));
}
idList.add(idWrite);
data.put(idWrite, itemValues);
}
ResultSet resultSet = new ResultSet();
resultSet.setIdList(idList);
resultSet.setData(data);
System.out.println("Write " + Objects.toString(n, null) + " times");
return resultSet;
}
public void read(List<String> idList, Map<String, List<String>> data, ConsistencyLevel consistencyLevel) {
int readCount;
int success = 0;
for (String idRead: idList) {
PreparedStatement stmt = session.prepare("SELECT * FROM user WHERE id = ?").setConsistencyLevel(consistencyLevel);
List<String> itemValues = data.get(idRead);
try {
readCount = 0;
Row resultRow = null;
for (Row row : session.execute(stmt.bind(idRead))) {
readCount++;
resultRow = row;
}
// there should be only 1 read, in such case, this is a successful read of a recent write
if (readCount==1) {
if(itemValues.get(1).equals(resultRow.getString("password"))) {
success++;
} else {
System.out.println("password: " + itemValues.get(1));
System.out.println("password in database: " + resultRow.getString("password"));
}
}
} catch (Exception e) {
success = success + 0; // do not increase success since there is an exception
}
}
System.out.println("Successfully read back " + Objects.toString(success, null) + " times");
}
}
Here is my result
Write 40 times
consistency level string: one
using consistency level: ONE
Successfully read back 40 times
consistency level string: all
using consistency level: ALL
Update 40 times
consistency level string: one
using consistency level: ONE
Successfully read back 32 times
Update always fail about ~ 30%. What am I doing wrong?
Related
Our program displays a tree control showing the metadata structure of the XML file they are using as a datasource. So it displays all elements & attributes in use in the XML file, like this:
Employees
Employee
FirstName
LastName
Orders
Order
OrderId
For the case where the user does not pass us a XSD file, we need to walk the XML file and build up the metadata structure.
The full code for this is at SaxonQuestions.zip, TestBuildTreeWithSchema.java and is also listed below.
The below code works but it has a problem. Suppose under Employee there's an element for SpouseName. This is only populated if the employee is married. What if the sample data file I have is all unmarried employees? Then the below code does not know there's a SpouseName element.
So my question is - how can I read the schema directly, instead of using the below code. If I read the schema then I get every node & attribute including the optional ones. I also get the type. And the schema optionally has a description for each node and I get that too.
Therefore, I need to read the schema itself. How can I do that?
And secondary question - why is the type for an int BigInteger instead of Integer or Long? I see this with Employee/#EmployeeID in Southwind.xml & Southwind.xsd.
TestBuildTreeWithSample.java
import net.sf.saxon.s9api.*;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
public class TestBuildTreeWithSchema {
public static void main(String[] args) throws Exception {
XmlDatasource datasource = new XmlDatasource(
new FileInputStream(new File("files", "SouthWind.xml").getCanonicalPath()),
new FileInputStream(new File("files", "SouthWind.xsd").getCanonicalPath()));
// get the root element
XdmNode rootNode = null;
for (XdmNode node : datasource.getXmlRootNode().children()) {
if (node.getNodeKind() == XdmNodeKind.ELEMENT) {
rootNode = node;
break;
}
}
TestBuildTreeWithSchema buildTree = new TestBuildTreeWithSchema(rootNode);
Element root = buildTree.addNode();
System.out.println("Schema:");
printElement("", root);
}
private static void printElement(String indent, Element element) {
System.out.println(indent + "<" + element.name + "> (" + (element.type == null ? "null" : element.type.getSimpleName()) + ")");
indent += " ";
for (Attribute attr : element.attributes)
System.out.println(indent + "=" + attr.name + " (" + (attr.type == null ? "null" : attr.type.getSimpleName()) + ")");
for (Element child : element.children)
printElement(indent, child);
}
protected XdmItem currentNode;
public TestBuildTreeWithSchema(XdmItem currentNode) {
this.currentNode = currentNode;
}
private Element addNode() throws SaxonApiException {
String name = ((XdmNode)currentNode).getNodeName().getLocalName();
// Question:
// Is this the best way to determine that this element has data (as opposed to child elements)?
Boolean elementHasData;
try {
((XdmNode) currentNode).getTypedValue();
elementHasData = true;
} catch (Exception ex) {
elementHasData = false;
}
// Questions:
// Is this the best way to get the type of the element value?
// Why BigInteger instead of Long for int?
Class valueClass = ! elementHasData ? null : ((XdmAtomicValue)((XdmNode)currentNode).getTypedValue()).getValue().getClass();
Element element = new Element(name, valueClass, null);
// add in attributes
XdmSequenceIterator currentSequence;
if ((currentSequence = moveTo(Axis.ATTRIBUTE)) != null) {
do {
name = ((XdmNode) currentNode).getNodeName().getLocalName();
// Questions:
// Is this the best way to get the type of the attribute value?
// Why BigInteger instead of Long for int?
valueClass = ((XdmAtomicValue)((XdmNode)currentNode).getTypedValue()).getValue().getClass();
Attribute attr = new Attribute(name, valueClass, null);
element.attributes.add(attr);
} while (moveToNextInCurrentSequence(currentSequence));
moveTo(Axis.PARENT);
}
// add in children elements
if ((currentSequence = moveTo(Axis.CHILD)) != null) {
do {
Element child = addNode();
// if we don't have this, add it
Element existing = element.getChildByName(child.name);
if (existing == null)
element.children.add(child);
else
// add in any children this does not have
existing.addNewItems (child);
} while (moveToNextInCurrentSequence(currentSequence));
moveTo(Axis.PARENT);
}
return element;
}
// moves to element or attribute
private XdmSequenceIterator moveTo(Axis axis) {
XdmSequenceIterator en = ((XdmNode) currentNode).axisIterator(axis);
boolean gotIt = false;
while (en.hasNext()) {
currentNode = en.next();
if (((XdmNode) currentNode).getNodeKind() == XdmNodeKind.ELEMENT || ((XdmNode) currentNode).getNodeKind() == XdmNodeKind.ATTRIBUTE) {
gotIt = true;
break;
}
}
if (gotIt) {
if (axis == Axis.ATTRIBUTE || axis == Axis.CHILD || axis == Axis.NAMESPACE)
return en;
return null;
}
return null;
}
// moves to next element or attribute
private Boolean moveToNextInCurrentSequence(XdmSequenceIterator currentSequence)
{
if (currentSequence == null)
return false;
while (currentSequence.hasNext())
{
currentNode = currentSequence.next();
if (((XdmNode)currentNode).getNodeKind() == XdmNodeKind.ELEMENT || ((XdmNode)currentNode).getNodeKind() == XdmNodeKind.ATTRIBUTE)
return true;
}
return false;
}
static class Node {
String name;
Class type;
String description;
public Node(String name, Class type, String description) {
this.name = name;
this.type = type;
this.description = description;
}
}
static class Element extends Node {
List<Element> children;
List<Attribute> attributes;
public Element(String name, Class type, String description) {
super(name, type, description);
children = new ArrayList<>();
attributes = new ArrayList<>();
}
public Element getChildByName(String name) {
for (Element child : children) {
if (child.name.equals(name))
return child;
}
return null;
}
public void addNewItems(Element child) {
for (Attribute attrAdd : child.attributes) {
boolean haveIt = false;
for (Attribute attrExist : attributes)
if (attrExist.name.equals(attrAdd.name)) {
haveIt = true;
break;
}
if (!haveIt)
attributes.add(attrAdd);
}
for (Element elemAdd : child.children) {
Element exist = null;
for (Element elemExist : children)
if (elemExist.name.equals(elemAdd.name)) {
exist = elemExist;
break;
}
if (exist == null)
children.add(elemAdd);
else
exist.addNewItems(elemAdd);
}
}
}
static class Attribute extends Node {
public Attribute(String name, Class type, String description) {
super(name, type, description);
}
}
}
XmlDatasource.java
import com.saxonica.config.EnterpriseConfiguration;
import com.saxonica.ee.s9api.SchemaValidatorImpl;
import net.sf.saxon.Configuration;
import net.sf.saxon.lib.FeatureKeys;
import net.sf.saxon.s9api.*;
import net.sf.saxon.type.SchemaException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import javax.xml.transform.Source;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
public class XmlDatasource {
/** the DOM all searches are against */
private XdmNode xmlRootNode;
private XPathCompiler xPathCompiler;
/** key == the prefix; value == the uri mapped to that prefix */
private HashMap<String, String> prefixToUriMap = new HashMap<>();
/** key == the uri mapped to that prefix; value == the prefix */
private HashMap<String, String> uriToPrefixMap = new HashMap<>();
public XmlDatasource (InputStream xmlData, InputStream schemaFile) throws SAXException, SchemaException, SaxonApiException, IOException {
boolean haveSchema = schemaFile != null;
// call this before any instantiation of Saxon classes.
Configuration config = createEnterpriseConfiguration();
if (haveSchema) {
Source schemaSource = new StreamSource(schemaFile);
config.addSchemaSource(schemaSource);
}
Processor processor = new Processor(config);
DocumentBuilder doc_builder = processor.newDocumentBuilder();
XMLReader reader = createXMLReader();
InputSource xmlSource = new InputSource(xmlData);
SAXSource saxSource = new SAXSource(reader, xmlSource);
if (haveSchema) {
SchemaValidator validator = new SchemaValidatorImpl(processor);
doc_builder.setSchemaValidator(validator);
}
xmlRootNode = doc_builder.build(saxSource);
xPathCompiler = processor.newXPathCompiler();
if (haveSchema)
xPathCompiler.setSchemaAware(true);
declareNameSpaces();
}
public XdmNode getXmlRootNode() {
return xmlRootNode;
}
public XPathCompiler getxPathCompiler() {
return xPathCompiler;
}
/**
* Create a XMLReader set to disallow XXE aattacks.
* #return a safe XMLReader.
*/
public static XMLReader createXMLReader() throws SAXException {
XMLReader reader = XMLReaderFactory.createXMLReader();
// stop XXE https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Prevention_Cheat_Sheet#JAXP_DocumentBuilderFactory.2C_SAXParserFactory_and_DOM4J
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
return reader;
}
private void declareNameSpaces() throws SaxonApiException {
// saxon has some of their functions set up with this.
prefixToUriMap.put("saxon", "http://saxon.sf.net");
uriToPrefixMap.put("http://saxon.sf.net", "saxon");
XdmValue list = xPathCompiler.evaluate("//namespace::*", xmlRootNode);
if (list == null || list.size() == 0)
return;
for (int index=0; index<list.size(); index++) {
XdmNode node = (XdmNode) list.itemAt(index);
String prefix = node.getNodeName() == null ? "" : node.getNodeName().getLocalName();
// xml, xsd, & xsi are XML structure ones, not ones used in the XML
if (prefix.equals("xml") || prefix.equals("xsd") || prefix.equals("xsi"))
continue;
// use default prefix if prefix is empty.
if (prefix == null || prefix.isEmpty())
prefix = "def";
// this returns repeats, so if a repeat, go on to next.
if (prefixToUriMap.containsKey(prefix))
continue;
String uri = node.getStringValue();
if (uri != null && !uri.isEmpty()) {
xPathCompiler.declareNamespace(prefix, uri);
prefixToUriMap.put(prefix, uri);
uriToPrefixMap.put(uri, prefix); }
}
}
public static EnterpriseConfiguration createEnterpriseConfiguration()
{
EnterpriseConfiguration configuration = new EnterpriseConfiguration();
configuration.supplyLicenseKey(new BufferedReader(new java.io.StringReader(deobfuscate(key))));
configuration.setConfigurationProperty(FeatureKeys.SUPPRESS_XPATH_WARNINGS, Boolean.TRUE);
return configuration;
}
}
Thanks for the clarifications. I think your real goal is to find a way to parse and process an XML Schema in Java without having to treat the XSD as an ordinary XML document (it is an ordinary XML document, but processing it using the standard facilities is not easy).
On that basis, I think this thread should help: In Java, how do I parse an xml schema (xsd) to learn what's valid at a given element?
Personally, I've never found any library that does a better job than the EMF XSD model. It's complex, but comprehensive.
I want to create better web service that display collection from NotesView with pagination.
And I have found some performance issue of View.getAllEntries from bigger view.
On MongoDB, I can use findAll() with skip() and limit().
How can I do like that on Domino ?
Use the ViewNavigator class. If you are paging through a large view, it is much faster than view.getAllEntries().
You can acquire an instance of ViewNavigator with view.createViewNav() or a similar method. For best performance, call view.setAutoUpdate(false) before you acquire the navigator.
You can find lots more information by searching the web. This article looks like a good place to start.
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.ibm.commons.util.io.json.JsonJavaObject;
import lotus.domino.NotesException;
import lotus.domino.View;
import lotus.domino.ViewColumn;
import lotus.domino.ViewEntryCollection;
import lotus.domino.ViewNavigator;
import lotus.domino.ViewEntry;
private String consultView(View view, int counter,int position) throws Exception{
String strValue = "";
ViewNavigator nav;
int count = 0;
view.setAutoUpdate(false);
nav = view.createViewNav();
nav.setEntryOptions(ViewNavigator.VN_ENTRYOPT_NOCOUNTDATA);
nav.setBufferMaxEntries(400);
int limit = counter;
int skippedEntries = nav.skip(position);
String number = "";
if (skippedEntries == position) {
Map<Integer, String> columnNameMap = new HashMap<Integer, String>();
for (ViewColumn col : (List<ViewColumn>) view.getColumns()) {
if (col.getColumnValuesIndex() < 65535) {
columnNameMap.put(col.getColumnValuesIndex(), col.getItemName());
}
}
List nodeData = new ArrayList();
ViewEntry entry = nav.getCurrent();
while (entry != null && count <= (limit - 1)) {
if (!entry.isCategory()) {
try {
HashMap<String, Object> entryMap = new HashMap<String, Object>();
count++;
List<Object> columnValues = entry.getColumnValues();
entryMap.put("unid", entry.getUniversalID());
entryMap.put("position", entry.getPosition('.'));
entryMap.put("pos", entry.getPosition('.'));
entryMap.put("userpos", count);
for (Integer index : columnNameMap.keySet())
entryMap.put(columnNameMap.get(index).toString(),columnValues.get(index));
nodeData.add(entryMap);
} catch (Exception e) {
e.printStackTrace();
}
}
ViewEntry tmpentry = nav.getNext(entry);
entry.recycle();
entry = tmpentry;
}
JsonJavaObject returnJSON = new JsonJavaObject();
returnJSON.put("errorcode", 0);
returnJSON.put("errormessage", "");
returnJSON.put("total",getViewCount(view));
returnJSON.put("data", nodeData);
strValue = returnJSON.toString();
}
nav.recycle();
view.recycle();
return strValue;
}
private int getViewCount(View view) throws NotesException {
int count = 0;
ViewEntryCollection entryCollection = view.getAllEntries();
count = entryCollection.getCount();
entryCollection.recycle();
return count;
}
}
This below function get all AllEntries from view and the outputs result in JSON object. Please try the following and let me know if it works.
private String consultView(View view, int counter,int position) throws Exception{
String strValue = "";
ViewNavigator nav;
int count = 0;
view.setAutoUpdate(false);
nav = view.createViewNav();
nav.setEntryOptions(ViewNavigator.VN_ENTRYOPT_NOCOUNTDATA);
nav.setBufferMaxEntries(400);
int limit = counter;
int skippedEntries = nav.skip(position);
String number = "";
int inde = 111;
if (skippedEntries == position) {
Map<Integer, String> columnNameMap = new HashMap<Integer, String>();
for (ViewColumn col : (List<ViewColumn>) view.getColumns()) {
if (col.getColumnValuesIndex() < 65535 && Utilisties.containsVar(viewObject.getRetCols(), col.getItemName())) {
columnNameMap.put(col.getColumnValuesIndex(), col.getItemName());
}
}
List nodeData = new ArrayList();
ViewEntry entry = nav.getCurrent();
while (entry != null && count <= (limit - 1)) {
if (!entry.isCategory()) {
try {
HashMap<String, Object> entryMap = new HashMap<String, Object>();
count++;
List<Object> columnValues = entry.getColumnValues();
entryMap.put("unid", entry.getUniversalID());
entryMap.put("position", entry.getPosition('.'));
entryMap.put("pos", entry.getPosition('.'));
entryMap.put("userpos", count);
for (Integer index : columnNameMap.keySet())
entryMap.put(columnNameMap.get(index).toString(),columnValues.get(index));
nodeData.add(entryMap);
} catch (Exception e) {
e.printStackTrace();
}
}
ViewEntry tmpentry = nav.getNext(entry);
entry.recycle();
entry = tmpentry;
}
JsonJavaObject returnJSON = new JsonJavaObject();
returnJSON.put("errorcode", 0);
returnJSON.put("errormessage", "");
if(viewObject.getGetCount())
returnJSON.put("total",getViewCount(view));
returnJSON.put("data", nodeData);
strValue = returnJSON.toString();
}
nav.recycle();
view.recycle();
return strValue;
I am using spark session to save a data frame to hive table. The code is as below.
df.write.mode(SaveMode.Append).format("orc").insertInto("table")
The data comes to spark from kafka. This can be huge amount of data coming throughout the day. Does , spark dataframe save internally does hive compaction ?. If not what is the best way to do compaction at regular intervals without affecting the table insertions.
In your example you should add partitionBy as data can be in huge amount
df.write..mode(SaveMode.Append).format("orc").partitionBy("age")
OR you can also archive as below
The way I have done this is to first register a temp table in Spark job itself and then leverage the sql method of the HiveContext to create a new table in hive using the data from the temp table. For example if I have a dataframe df and HiveContext hc the general process is:
df.registerTempTable("my_temp_table")
hc.sql("Insert into overwrite table_name PARTITION SELECT a,b, PARTITION_col from my_temp_table")
public class HiveCompaction {
private static SparkConf sparkConf;
private static JavaSparkContext sc;
private static SparkSession sqlContext = springutil.getBean("testSparkSession");
private static HashMap<Object, Object> partitionColumns;
public static void compact(String table, Dataset<Row> dataToCompact) {
logger.info("Started Compaction for - " + table);
if (!partitionColumns.containsKey(table)) {
compact_table_without_partition(table, dataToCompact);
} else {
compact_table_with_partition(table, dataToCompact, partitionColumns);
}
logger.info("Data Overwritten in HIVE table : " + table + " successfully");
}
private static void compact_table_with_partition(String table, Dataset<Row> dataToCompact,
Map<Object, Object> partitionData) {
String[] partitions = ((String) partitionData.get(table)).split(",");
List<Map<Object, Object>> partitionMap = getPartitionsToCompact(dataToCompact, Arrays.asList(partitions));
for (Map mapper : partitionMap) {
// sqlContext.sql("REFRESH TABLE staging.dummy_table");
String query = "select * from " + table + " where " + frameQuery(" and ", mapper);
Dataset<Row> originalTable = sqlContext.sql(query.toString());
if (originalTable.count() == 0) {
dataToCompact.write().mode("append").format("parquet").insertInto(table);
} else {
String location = getHdfsFileLocation(table);
String uuid = getUUID();
updateTable(table, dataToCompact, originalTable, uuid);
String destinationPath = framePath(location, frameQuery("/", mapper), uuid);
sqlContext.sql("Alter table " + table + " partition(" + frameQuery(",", mapper) + ") set location '"
+ destinationPath + "'");
}
}
}
private static void compact_table_without_partition(String table, Dataset<Row> dataToCompact) {
String query = "select * from " + table;
Dataset<Row> originalTable = sqlContext.sql(query.toString());
if (originalTable.count() == 0) {
dataToCompact.write().mode("append").format("parquet").insertInto(table);
} else {
String location = getHdfsFileLocation(table);
String uuid = getUUID();
String destinationPath = framePath(location, null, uuid);
updateTable(table, dataToCompact, originalTable, uuid);
sqlContext.sql("Alter table " + table + " set location '" + destinationPath + "'");
}
}
private static void updateTable(String table, Dataset<Row> dataToCompact, Dataset<Row> originalTable, String uuid) {
Seq<String> joinColumnSeq = getPrimaryKeyColumns();
Dataset<Row> unModifiedRecords = originalTable.join(dataToCompact, joinColumnSeq, "leftanti");
Dataset<Row> dataToInsert1 = dataToCompact.withColumn("uuid", functions.lit(uuid));
Dataset<Row> dataToInsert2 = unModifiedRecords.withColumn("uuid", functions.lit(uuid));
dataToInsert1.write().mode("append").format("parquet").insertInto(table + "_compacted");
dataToInsert2.write().mode("append").format("parquet").insertInto(table + "_compacted");
}
private static String getHdfsFileLocation(String table) {
Dataset<Row> tableDescription = sqlContext.sql("describe formatted " + table + "_compacted");
List<Row> rows = tableDescription.collectAsList();
String location = null;
for (Row r : rows) {
if (r.get(0).equals("Location")) {
location = r.getString(1);
break;
}
}
return location;
}
private static String frameQuery(String delimiter, Map mapper) {
StringBuilder modifiedQuery = new StringBuilder();
int i = 1;
for (Object key : mapper.keySet()) {
modifiedQuery.append(key + "=");
modifiedQuery.append(mapper.get(key));
if (mapper.size() > i)
modifiedQuery.append(delimiter);
i++;
}
return modifiedQuery.toString();
}
private static String framePath(String location, String framedpartition, String uuid) {
StringBuilder loc = new StringBuilder(location);
loc.append("/");
if (StringUtils.isNotEmpty(framedpartition)) {
loc.append(framedpartition);
loc.append("/");
}
loc.append("uuid=");
loc.append(uuid);
logger.info(loc.toString());
return loc.toString();
}
public static Seq<String> getColumnSeq(List<String> joinColumns) {
List<String> cols = new ArrayList<>(joinColumns.size());
for (int i = 0; i < joinColumns.size(); i++) {
cols.add(joinColumns.get(i).toLowerCase());
}
return JavaConverters.asScalaBufferConverter(cols).asScala().readOnly();
}
private static String getUUID() {
StringBuilder uri = new StringBuilder();
Random rand = new Random();
int randNum = rand.nextInt(200);
String uuid = DateTimeFormatter.ofPattern("yyyyMMddHHmmSSS").format(LocalDateTime.now()).toString()
+ (String.valueOf(randNum));
return uuid;
}
private static List<Map<Object, Object>> getPartitionsToCompact(Dataset<Row> filteredRecords,
List<String> partitions) {
Column[] columns = new Column[partitions.size()];
int index = 0;
for (String c : partitions) {
columns[index] = new Column(c);
index++;
}
Dataset<Row> partitionsToCompact = filteredRecords.select(columns)
.distinct(); /**
* TOD : add filter condition for selecting
* known paritions
*/
JavaRDD<Map<Object, Object>> querywithPartitions = partitionsToCompact.toJavaRDD().map(row -> {
return convertRowToMap(row);
});
return querywithPartitions.collect();
}
private static Map<Object, Object> convertRowToMap(Row row) {
StructField[] fields = row.schema().fields();
List<StructField> structFields = Arrays.asList(fields);
Map<Object, Object> a = structFields.stream()
.collect(Collectors.toMap(e -> ((StructField) e).name(), e -> row.getAs(e.name())));
return a;
}
private static Seq<String> getPrimaryKeyColumns() {
ArrayList<String> primaryKeyColumns = new ArrayList<String>();
Seq<String> joinColumnSeq = getColumnSeq(primaryKeyColumns);
return joinColumnSeq;
}
/*
* public static void initSpark(String jobname) { sparkConf = new
* SparkConf().setAppName(jobname); sparkConf.setMaster("local[3]");
* sparkConf.set("spark.driver.allowMultipleContexts", "true"); sc = new
* JavaSparkContext(); sqlContext = new SQLContext(sc); }
*/
public static HashMap<Object, Object> getParitionColumns() {
HashMap<Object, Object> paritionColumns = new HashMap<Object, Object>();
paritionColumns.put((Object) "staging.dummy_table", "trade_date,dwh_business_date,region_cd");
return paritionColumns;
}
public static void initialize(String table) {
// initSpark("Hive Table Compaction -" + table);
partitionColumns = getParitionColumns();
}
}
Usage:
String table = "staging.dummy_table";
HiveCompaction.initialize(table);
Dataset<Row> dataToCompact = sparkSession.sql("select * from staging.dummy_table");
HiveCompaction.compact(table, dataToCompact);
sparkSession.sql("select * from staging.dummy_table_compacted").show();
System.out.println("Compaction successful");
I have a composite column (Int32Type,BytesType,AsciiType) that I need to read its value (based on criteria), modify it and save it back (something like manual counter column).
The composite column that I'm querying might exist or it may not.
What is the best way to do that in Hector?
I cannot vouch the following solution is the best but it does the basic functionality like creating composite columns. It basically does reading and writing which essentially inline to "I need to read its value (based on criteria), modify it and save it back (something like manual counter column)." . But I think with this sample codes, it should be able to serve as a basic and improve here and there and so that it become the best. :-) Will test it thoroughly when there is free time. With that said, the following is my suggestion.
package com.hector.dataTypes;
import java.util.Iterator;
import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
import me.prettyprint.cassandra.serializers.CompositeSerializer;
import me.prettyprint.cassandra.serializers.IntegerSerializer;
import me.prettyprint.cassandra.serializers.StringSerializer;
import me.prettyprint.cassandra.service.CassandraHostConfigurator;
import me.prettyprint.cassandra.service.ColumnSliceIterator;
import me.prettyprint.cassandra.service.ThriftCluster;
import me.prettyprint.hector.api.Cluster;
import me.prettyprint.hector.api.Keyspace;
import me.prettyprint.hector.api.beans.AbstractComposite.ComponentEquality;
import me.prettyprint.hector.api.beans.ColumnSlice;
import me.prettyprint.hector.api.beans.Composite;
import me.prettyprint.hector.api.beans.HColumn;
import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
import me.prettyprint.hector.api.ddl.ColumnType;
import me.prettyprint.hector.api.ddl.ComparatorType;
import me.prettyprint.hector.api.exceptions.HectorException;
import me.prettyprint.hector.api.factory.HFactory;
import me.prettyprint.hector.api.mutation.Mutator;
import me.prettyprint.hector.api.query.QueryResult;
import me.prettyprint.hector.api.query.SliceQuery;
import org.apache.cassandra.db.marshal.Int32Type;
import org.apache.cassandra.utils.ByteBufferUtil;
import com.google.common.base.Joiner;
/**
*
* #author jasonw
*
*/
public class CompositeExample
{
private String m_node;
private String m_keyspace;
private String m_column_family;
private ThriftCluster m_cassandraCluster;
private CassandraHostConfigurator m_cassandraHostConfigurator;
private Mutator<String> mutator;
private SliceQuery<String, Composite, String> sliceQuery;
public CompositeExample(String p_node, String p_keyspace, String p_column_family, String p_cluster)
{
m_node = p_node;
m_keyspace = p_keyspace;
m_column_family = p_column_family;
m_cassandraHostConfigurator = new CassandraHostConfigurator(m_node);
m_cassandraCluster = new ThriftCluster(p_cluster, m_cassandraHostConfigurator);
Cluster cluster = HFactory.getOrCreateCluster(p_cluster, m_cassandraHostConfigurator);
Keyspace keyspace = HFactory.createKeyspace(m_keyspace, cluster);
mutator = HFactory.createMutator(keyspace, StringSerializer.get());
sliceQuery = HFactory.createSliceQuery(keyspace, StringSerializer.get(), CompositeSerializer.get(), StringSerializer.get());
}
public boolean createCompositeColumn(String... p_new_columns)
{
try
{
ColumnFamilyDefinition cfDef = HFactory.createColumnFamilyDefinition(m_keyspace, m_column_family, ComparatorType.COMPOSITETYPE);
cfDef.setColumnType(ColumnType.STANDARD);
cfDef.setComparatorTypeAlias("(".concat(Joiner.on(",").join(p_new_columns)).concat(")"));
cfDef.setKeyValidationClass("UTF8Type");
cfDef.setDefaultValidationClass("UTF8Type");
m_cassandraCluster.addColumnFamily(cfDef, true);
return true;
}
catch (HectorException e)
{
e.printStackTrace();
}
return false;
}
public boolean saveColumn(String p_field_one, String p_field_two, String p_field_three)
{
try
{
Composite c = new Composite();
c.addComponent(Int32Type.instance.fromString(p_field_one), ByteBufferSerializer.get());
c.addComponent(ByteBufferUtil.bytes(p_field_two), ByteBufferSerializer.get());
c.addComponent(p_field_three, StringSerializer.get());
HColumn<Composite, String> col = HFactory.createColumn(c, "composite_value", CompositeSerializer.get(), StringSerializer.get());
mutator.addInsertion("key", m_column_family, col);
mutator.execute();
return true;
}
catch (HectorException e)
{
e.printStackTrace();
}
return false;
}
public boolean readColumn(String p_key, int p_column_number, ComponentEquality p_equality, int p_value)
{
if (p_column_number < 0 || p_column_number > 2)
{
return false;
}
try
{
sliceQuery.setColumnFamily(m_column_family);
sliceQuery.setKey(p_key);
Composite start = new Composite();
start.addComponent(0, p_value, p_equality);
Composite end = new Composite();
end.addComponent(0, p_value, ComponentEquality.GREATER_THAN_EQUAL);
sliceQuery.setRange(start, end, false, 1000);
QueryResult<ColumnSlice<Composite, String>> qr = sliceQuery.execute();
System.out.println("size = " + qr.get().getColumns().size());
Iterator<HColumn<Composite, String>> iter = qr.get().getColumns().iterator();
while (iter.hasNext())
{
HColumn<Composite, String> column = iter.next();
System.out.print(column.getName().get(0, IntegerSerializer.get()));
System.out.print(":");
System.out.print(column.getName().get(1, StringSerializer.get()));
System.out.print(":");
System.out.print(column.getName().get(2, StringSerializer.get()));
System.out.println("=" + column.getValue());
}
return true;
}
catch (HectorException e)
{
e.printStackTrace();
}
catch (Exception why)
{
why.printStackTrace();
}
return false;
}
public static void main(String[] args)
{
boolean isSuccess = false;
String node_ip = "192.168.0.1";
String keyspace_name = "mykeyspace";
String column_family_name = "compositecf";
String cluster_name = "Test Cluster";
CompositeExample test1 = new CompositeExample(node_ip, keyspace_name, column_family_name, cluster_name);
isSuccess = test1.createCompositeColumn("Int32Type", "BytesType", "AsciiType");
if (!isSuccess)
{
System.err.println("failed to create cf");
System.exit(-1);
}
isSuccess = test1.saveColumn("1027", "blablabla", "this is ascii field");
if (!isSuccess)
{
System.err.println("failed to write");
System.exit(-1);
}
isSuccess = test1.readColumn("key", 0, ComponentEquality.EQUAL, 1027);
if (!isSuccess)
{
System.err.println("failed to read");
System.exit(-1);
}
}
}
Composite col = new Composite(yourInt, yourBytes, yourString);
ColumnSlice<Composite, valueType> result = HFactory.createSliceQuery(keyspace, keySerializer, compositeSerializer, intSerializer)
.setColumnFamily(columnFamily)
.setKey(key)
.setRange(col, col, false, 1)
.execute()
.get();
if (result.getColumns().isEmpty()) {
// do whatever you need to do if there's no value
} else {
int value = result.getColumns().get(0).getValue();
int newValue = //some modification to value
Mutator<keyType> mutator = HFactory.createMutator(keyspace, keySerializer);
HColumn<Composite, int> column = HFactory.createColumn(col, newValue, CompositeSerializer, intSerializer);
mutator.addInsertion(key, columnFamily, column);
mutator.execute();
}
For example there are three records in a recordstore , and the structure of a record in the recordstore is like this : lastname;firstname;moneyborrowed
I want to show these three records inside a LWUIT Table and I want them to be sorted by the lastname column. How to achieve that ?
save using
Preferences preferences = new Preferences("mydbname");
preferences.put("key","lastname;firstname;moneyborrowed");
preferences.save();
and retrieve using
String val = (string) preferences.get("key");
Preferences.java
import java.util.Enumeration;
import java.util.Hashtable;
import javax.microedition.rms.RecordEnumeration;
import javax.microedition.rms.RecordStore;
import javax.microedition.rms.RecordStoreException;
public class Preferences {
private final String mRecordStoreName;
private final Hashtable mHashtable;
public Preferences(String recordStoreName)
throws RecordStoreException {
mRecordStoreName = recordStoreName;
mHashtable = new Hashtable();
load();
}
public String get(String key) {
return (String)mHashtable.get(key);
}
public void put(String key, String value) {
if (value == null) value = "";
mHashtable.put(key, value);
}
private void load() throws RecordStoreException {
RecordStore rs = null;
RecordEnumeration re = null;
try {
rs = RecordStore.openRecordStore(mRecordStoreName, true);
re = rs.enumerateRecords(null, null, false);
while (re.hasNextElement()) {
byte[] raw = re.nextRecord();
String pref = new String(raw);
// Parse out the name.
int index = pref.indexOf('|');
String name = pref.substring(0, index);
String value = pref.substring(index + 1);
put(name, value);
}
}
finally {
if (re != null) re.destroy();
if (rs != null) rs.closeRecordStore();
}
}
public void save() throws RecordStoreException {
RecordStore rs = null;
RecordEnumeration re = null;
try {
rs = RecordStore.openRecordStore(mRecordStoreName, true);
re = rs.enumerateRecords(null, null, false);
// First remove all records, a little clumsy.
while (re.hasNextElement()) {
int id = re.nextRecordId();
rs.deleteRecord(id);
}
// Now save the preferences records.
Enumeration keys = mHashtable.keys();
while (keys.hasMoreElements()) {
String key = (String)keys.nextElement();
String value = get(key);
String pref = key + "|" + value;
byte[] raw = pref.getBytes();
rs.addRecord(raw, 0, raw.length);
}
}
finally {
if (re != null) re.destroy();
if (rs != null) rs.closeRecordStore();
}
}
}