rule identifier failed predicate - groovy

recently, i tried to do some pig script with groovy, here is my code
def appID = ['pub000000', 'pub000004', 'pub000001', 'pub000004'] as Object[]
before :appInfo = new Object[4]
now: **def appInfo = ['info1','info2','info3','info4']**
for (int i = 0; i < appInfo.size(); i++) {
//Load all the related appInfo tables
pigServer.registerQuery("${appInfo[i]} = LOAD'hbase://Information.${appID[i]}' " +
"USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('meta:number1 " +
"meta:number2') " +
"AS (number1:chararray, number2:chararray);")
}
pigServer.registerQuery("totalAppinfo = UNION ${appInfo[0]},${appInfo[1]},${appInfo[2]},${appInfo[3]};")
I worked it out finally, just to give the array the value.

Complete guess (never used piglatin), but does this work?
def appID = ['pub000000', 'pub000004', 'pub000001', 'pub000004']
appID.each { id ->
pigServer.registerQuery( "${id} = LOAD'hbase://Information.${id}' " +
"USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('meta:number1 " +
"meta:number2') " +
"AS (number1:chararray, number2:chararray);")
}
pigServer.registerQuery("totalAppinfo = UNION ${appId.join(',')};")
Edit after update to question:
def appID = ['pub000000', 'pub000004', 'pub000001', 'pub000004']
def appInfo = ['info1','info2','info3','info4']
[appInfo,appID].transpose().each { info, id ->
pigServer.registerQuery( "${info} = LOAD'hbase://Information.${id}' " +
"USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('meta:number1 " +
"meta:number2') " +
"AS (number1:chararray, number2:chararray);")
}
pigServer.registerQuery("totalAppinfo = UNION ${appInfo.join(',')};")

Related

Replace series of Unicode characters / Python / Twitter

I am pulling text from a tweet using the Twitter API and Python 3.3 and I'm running into the part of the tweet where the tweeter put three symbols in the tweet. They are shown below.
The two flags and the thumbs up seem to be causing the problem. The following is the plain text tweet.
RT #John_Hunt07: Just voted for #marcorubio is Florida! I am ready for a New American Century!! #FLPrimary \ud83c\uddfa\ud83c\uddf8\ud83c\uddfa\ud83c\uddf8\ud83d\udc4d
The following is the code I'm using.
import json
import mysql.connector
import sys
from datetime import datetime
from MySQLCL import MySQLCL
class Functions(object):
"""This is a class for Python functions"""
#staticmethod
def Clean(string):
temp = str(string)
temp = temp.replace("'", "").replace("(", "").replace(")", "").replace(",", "").strip()
return temp
#staticmethod
def ParseTweet(string):
for x in range(0, len(string)):
tweetid = string[x]["id_str"]
tweetcreated = string[x]["created_at"]
tweettext = string[x]["text"]
tweetsource = string[x]["source"]
tweetsource = tweetsource
truncated = string[x]["truncated"]
inreplytostatusid = string[x]["in_reply_to_status_id"]
inreplytouserid = string[x]["in_reply_to_user_id"]
inreplytoscreenname = string[x]["in_reply_to_screen_name"]
geo = string[x]["geo"]
coordinates = string[x]["coordinates"]
place = string[x]["place"]
contributors = string[x]["contributors"]
isquotestatus = string[x]["is_quote_status"]
retweetcount = string[x]["retweet_count"]
favoritecount = string[x]["favorite_count"]
favorited = string[x]["favorited"]
retweeted = string[x]["retweeted"]
if "possibly_sensitive" in string[x]:
possiblysensitive = string[x]["possibly_sensitive"]
else:
possiblysensitive = ""
language = string[x]["lang"]
#print(possiblysensitive)
print(Functions.UnicodeFilter(tweettext))
#print(inreplytouserid)
#print("INSERT INTO tweet(ExTweetID, TweetText, Truncated, InReplyToStatusID, InReplyToUserID, InReplyToScreenName, IsQuoteStatus, RetweetCount, FavoriteCount, Favorited, Retweeted, Language, TweetDate, TweetSource, PossiblySensitive) VALUES (" + str(tweetid) + ", '" + Functions.UnicodeFilter(tweettext) + "', " + str(truncated) + ", " + Functions.CheckNull(inreplytostatusid) + ", " + Functions.CheckNull(inreplytouserid) + ", '" + Functions.CheckNull(inreplytoscreenname) + "', " + str(isquotestatus) + ", " + str(retweetcount) + ", " + str(favoritecount) + ", " + str(favorited) + ", " + str(retweeted) + ", '" + str(language) + "', '" + Functions.ToSQL(tweetcreated) + "', '" + Functions.ToSQL(tweetsource) + "', " + str(possiblysensitive) + ")")
#MySQLCL.Set("INSERT INTO tweet(ExTweetID, TweetText, Truncated, InReplyToStatusID, InReplyToUserID, InReplyToScreenName, IsQuoteStatus, RetweetCount, FavoriteCount, Favorited, Retweeted, Language, TweetDate, TweetSource, PossiblySensitive) VALUES (" + str(tweetid) + ", '" + tweettext + "', " + str(truncated) + ", " + Functions.CheckNullNum(inreplytostatusid) + ", " + Functions.CheckNullNum(inreplytouserid) + ", '" + Functions.CheckNull(inreplytoscreenname) + "', " + str(isquotestatus) + ", " + str(retweetcount) + ", " + str(favoritecount) + ", " + str(favorited) + ", " + str(retweeted) + ", '" + language + "', '" + str(Functions.FormatDate(tweetcreated)) + "', '" + str(Functions.UnicodeFilter(tweetsource)) + "', " + str(possiblysensitive) + ")")
#staticmethod
def ToBool(variable):
if variable.lower() == 'true':
return True
elif variable.lower() == 'false':
return False
#staticmethod
def CheckNullNum(var):
if var == None:
return "0"
else:
return str(var)
#staticmethod
def CheckNull(var):
if var == None:
return ""
else:
return var
#staticmethod
def ToSQL(var):
temp = var
temp = temp.replace("'", "")
return str(temp)
#staticmethod
def UnicodeFilter(var):
temp = var
temp = temp.replace(chr(0x2019), "")
temp = temp.replace(chr(0x003c), "(lessthan)")
temp = temp.replace(chr(0x003e), "(greaterthan)")
temp = temp.replace(chr(0xd83c), "")
temp = temp.replace(chr(0xddfa), "")
temp = temp.replace(chr(0xddf8), "")
temp = temp.replace(chr(0xd83d), "")
temp = temp.replace(chr(0xdc4d), "")
temp = Functions.ToSQL(temp)
return temp
#staticmethod
def FormatDate(var):
temp = var
dt = datetime.strptime(temp, "%a %b %d %H:%M:%S %z %Y")
retdt = str(dt.year) + "-" + str(dt.month) + "-" + str(dt.day) + "T" + str(dt.hour) + ":" + str(dt.minute) + ":" + str(dt.second)
return retdt
As you can see, I've been using the function UnicodeFilter in order to try to filter out the unicode characters in hex. The function works when dealing with single unicode characters, but when encountering multiple unicode characters placed together, this method fails and gives the following error:
'charmap' codec can't encode characters in position 107-111: character maps to 'undefined'
Do any of you have any ideas about how to get past this problem?
UPDATE: I have tried Andrew Godbehere's solution and I was still running into the same issues. However, I decided to see if there were any specific characters that were causing a problem, so I decided to print the characters to the console character by character. That gave me the error as follows:
'charmap' codec can't encode character '\U0001f1fa' in position 0: character maps to 'undefined'
Upon seeing this, I added this to the UnicodeFilter function and continued testing. I have run into multiple errors of the same kind while printing the tweets character by character. However, I don't want to have to keep making these exceptions. For example, see the revised UnicodeFilter function:
#staticmethod
def UnicodeFilter(var):
temp = var
temp = temp.encode(errors='ignore').decode('utf-8')
temp = temp.replace(chr(0x2019), "")
temp = temp.replace(chr(0x003c), "(lessthan)")
temp = temp.replace(chr(0x003e), "(greaterthan)")
temp = temp.replace(chr(0xd83c), "")
temp = temp.replace(chr(0xddfa), "")
temp = temp.replace(chr(0xddf8), "")
temp = temp.replace(chr(0xd83d), "")
temp = temp.replace(chr(0xdc4d), "")
temp = temp.replace(chr(0x2026), "")
temp = temp.replace(u"\U0001F1FA", "")
temp = temp.replace(u"\U0001F1F8", "")
temp = temp.replace(u"\U0001F44D", "")
temp = temp.replace(u"\U00014F18", "")
temp = temp.replace(u"\U0001F418", "")
temp = temp.replace(u"\U0001F918", "")
temp = temp.replace(u"\U0001F3FD", "")
temp = temp.replace(u"\U0001F195", "")
temp = Functions.ToSQL(temp)
return str(temp)
I don't want to have to add a new line for every character that causes a problem. Through this method, I have been able to pass multiple tweets, but this issue resurfaces with every tweet that contains different symbols. Is there not a solution that will filter out all these characters? Is it possible to filter out all characters not in the utf-8 character set?
Try the built-in unicode encode/decode error handling functionality: str.encode(errors='ignore')
For example:
problem_string = """\
RT #John_Hunt07: Just voted for #marcorubio is Florida! I am ready for a New American Century!! #FLPrimary \ud83c\uddfa\ud83c\uddf8\ud83c\uddfa\ud83c\uddf8\ud83d\udc4d
"""
print(problem_string.encode(errors='ignore').decode('utf-8'))
Ignoring errors removes problematic characters.
> RT #John_Hunt07: Just voted for #marcorubio is Florida! I am ready for a New American Century!! #FLPrimary
Other error handling options may be of interest.
xmlcharrefreplace for instance would yield:
> RT #John_Hunt07: Just voted for #marcorubio is Florida! I am ready for a New American Century!! #FLPrimary πŸ‡ΊπŸ‡ΈπŸ‡ΊπŸ‡ΈπŸ‘
If you require custom filtering as implied by your UnicodeFilter function, see Python documentation on registering an error handler.
Python provides a useful stacktrace so you can tell where errors are coming from.
Using it, you will have found that your print is causing the exception.
print() is failing because you're running Python from the Windows console, which, by default only, supports your local 8bit charmap. You can add support with: https://github.com/Drekin/win-unicode-console
You can also just write your data straight to a text file. Open the file with:
open('output.txt', 'w', encoding='utf-8')
Found the answer. The issue was that there was a range of characters in the tweets that were causing problems. Once I found the correct Unicode range for the characters, I implemented the for loop to replace any occurrence of any Unicode character within that range. After implementing that, I was able to pull thousands of tweets without any formatting or MySQL errors at all.
#staticmethod
def UnicodeFilter(var):
temp = var
temp = temp.replace(chr(0x2019), "'")
temp = temp.replace(chr(0x2026), "")
for x in range(127381, 129305):
temp = temp.replace(chr(x), "")
temp = MySQLCL.ToSQL(temp)
return str(temp)

subset of a database using SUM

rs = s.executeQuery("SELECT Class1_predicted, Class2_predicted, Class3_predicted, Class_predicted"
+ SUM(PROFIT_LOSS) AS "Total Profit",
+ "FROM xxxxxx "
+ "WHERE CLASS1_PREDICTED = curr_class1_predicted, CLASS2_PREDICTED = curr_class2_predicted, CLASS3_PREDICTED = curr_class3_predicted, CLASS_PREDICTED = curr_class_predicted,"
+ "PROFIT_LOSS >= 0,"
+ "GROUP BY Class1_predicted, Class2_predicted, Class3_predicted,Class_predicted");
rs.next();
int recordCount = rs.getInt(1);
myConsole.getOut().println("Number of records in subset of table xxxxx where P/L >= 0: " + recordCount);
I am getting an error on the AS in the second line ?
Not sure how to correct ?
Bob M
You should put the whole query in a String.
It seems like here you are not actually quoting the expression:
+ SUM(PROFIT_LOSS) AS "Total Profit",
It should be something like this:
+ "SUM(PROFIT_LOSS) AS Total_Profit,"

gryo scope and accelerometer output

I am currently working on project using and arduino, a gyro, an accelerometer, and a Bluetooth chip to try to model some data. I am currently trying to gather data, package it up and send it to a phone via Bluetooth. The issue is the Bluetooth chip I am using is a low energy one and so it can only send messages of 20 bytes at a time. I am trying to get past this issue by storing the data collected for a certain amount of time then send it all in 20 byte bursts. I am currently testing this method without sending the data and just printing the data to the serial monitor. This is where my issue is arising, when printing the data in real time everything works but when I try to store it in an array I get this:
593,575,567,0,0,0
592,575,567,0,0,0
592,575,567,0,0,0
592,575,567,0,0,0
592,575,567,0,0,0
593,575,567,0,0,0
586,576,568,0,0,0
0,0,0
0,0
0,0
,0,0,0
0,0,0
As you can see it seems to just break. If anyone could help me out it would be great!
Here is the relevant code chunk
for(int i = 0; i < loopVal; i++)
{
yawGyroValDouble = 0;
pitchGyroValDouble = 0;
rollGyroValDouble = 0;
totalClicksY = 0;
angleY = 0;
totalClicksP = 0;
angleP = 0;
totalClicksR = 0;
angleR = 0;
xRe = 0;
yRe = 0;
zRe = 0;
s = "";
int starttime = millis(); // get start time
int endtime = starttime; // init end time
while ((endtime - starttime) < time)
{
getGyroValues(); // This will update rollGyroVal, pitchGyroVal, and yawGyroVal with new values
yawGyroValDouble =yawGyroVal;
if(abs(yawGyroValDouble) > abs(gyroNoiseThresh)){ // ignore noise
totalClicksY+=yawGyroValDouble; // update runsum
}
pitchGyroValDouble =pitchGyroVal;
if(abs(yawGyroValDouble) > abs(gyroNoiseThresh)){ // ignore noise
totalClicksP+=pitchGyroValDouble; // update runsum
}
rollGyroValDouble =rollGyroVal;
if(abs(yawGyroValDouble) > abs(gyroNoiseThresh)){ // ignore noise
totalClicksR+=rollGyroValDouble; // update runsum
}
xRe = analogRead(pinX);
yRe = analogRead(pinY);
zRe = analogRead(pinZ);
delay (gyroDelayTime);
endtime = millis();
}
angleY = totalClicksY / clicksPerDegCCW;
angleP = totalClicksP / clicksPerDegCCW;
angleR = totalClicksR / clicksPerDegCCW;
String yawSend = String(angleY);
String pitchSend = String(angleP);
String rollSend = String(angleR);
String xSend = String(xRe);
String ySend = String(yRe);
String zSend = String(zRe);
//s = "Accel - X: " + xSend + " Y: " + ySend + " Z: " + zSend + "\n" + "Gyro - Yaw: " + yawSend + " Pitch: " + pitchSend + " Roll: " + rollSend;
s = "" + xSend + "," + ySend + "," + zSend + "," + yawSend + "," + pitchSend + "," + rollSend;
Serial.println(s);
res[i] = s;
}
You didn't show where totalClicksY, totalClicksP, totalClicksR, and clicksPerDegCCW are declared, but I'm betting they are declared as integer types (int or long). If so, the result of your maths:
angleY = totalClicksY / clicksPerDegCCW;
angleP = totalClicksP / clicksPerDegCCW;
angleR = totalClicksR / clicksPerDegCCW;
will be integers. And if the results of those divisions are less than 1, they will be truncated to 0.
Try declaring totalClicksY, totalClicksP, totalClicksR and clicksPerDegCCW as double. That, or cast them when you do the math, like this:
angleY = (double)totalClicksY / (double)clicksPerDegCCW;
angleP = (double)totalClicksP / (double)clicksPerDegCCW;
angleR = (double)totalClicksR / (double)clicksPerDegCCW;
(I'm also assuming that angleY, angleP, and angleR are also declared as doubles - if not they definitely should be).

Reading style names in table cell of doc file in Apache-POI

I am able to read the table cells, But I wanted also to read the applied style name of each cell of a row in a table. How can I achieve this?
EDIT
Following is the code snip which I have tried. By this I am able to read cell text also the applied pstyle(para style), but not able to read the rstyles.
private static void processDoc(String path) throws Exception {
POIFSFileSystem fis = new POIFSFileSystem(new FileInputStream(path));
HWPFDocument wdDoc = new HWPFDocument(fis);
// list all style names and indexes in stylesheet
/*for (int j = 0; j < wdDoc.getStyleSheet().numStyles(); j++) {
if (wdDoc.getStyleSheet().getStyleDescription(j) != null) {
System.out.println(j + ": " + wdDoc.getStyleSheet().getStyleDescription(j).getName());
} else {
// getStyleDescription returned null
System.out.println(j + ": " + null);
}
}*/
// set range for entire document
Range range = wdDoc.getRange();
for (int i = 0; i < range.numParagraphs(); i++) {
Paragraph p = range.getParagraph(i);
// check if style index is greater than total number of styles
if (wdDoc.getStyleSheet().numStyles() > p.getStyleIndex()) {
//System.out.println(wdDoc.getStyleSheet().numStyles() + " -> " + p.getStyleIndex());
StyleDescription style = wdDoc.getStyleSheet().getStyleDescription(p.getStyleIndex());
String styleName = style.getName();
// write style name and associated text
System.out.println(styleName + " -> " + p.text().replaceAll("[\u0000-\u001f]", ""));
} else {
System.out.println("\n" + wdDoc.getStyleSheet().numStyles() + " ----> " + p.getStyleIndex());
}
}
}

Beginner: concat a string from different primitives

Apologies, as I'm sure this is a stupid question, but...
Please could anyone explain to me why this:
public class java {
public static void main(String[] args) {
byte zero = 0;
short one = 1;
int three = 3;
long one2 = 1;
float onepointnought = 1.0f;
double onedotnone = 1.0;
char letterh = 'H';
char letterw = 'w';
char letterr = 'r';
char letterd = 'd';
boolean bool = true;
String output = letterh + three + one + one2 + zero + " " + letterw + zero + letterr + one + letterd + " " + (onepointnought+onedotnone) + " " + bool;
System.out.println(output);
} }
Is outputting:
77 w0r1d 2.0 true
I'm expecting it to say "H3ll0 w0r1d 2.0 true"
It's from the interactive online java tutorials over at http://www.learnjavaonline.org/
Thanks!
Neil.
In this sentence
String output = letterh + three + one + one2 + zero + " " + letterw + zero + letterr + one + letterd + " " + (onepointnought+onedotnone) + " " + bool;
the letterh contains 'H' whose ASCII value is 72 & the addition of three + one + one2 + zero is 5 because these are non-string variables, so it is displaying (72 + 5)77 in the result,
you must convert three , one , one2 , zero to sting variable

Resources