I'm parsing a 2GB+ xml file, initially i process(multi-thread) records one by one.
But single insert sql query to DB it's too slow(DB bottle neck) hence I need to create a mass insert query/cvs file after going through the xml document. But unlike java, in nodejs i can't tell if the documents has been fully parse.
Been working on it for 12hrs+, would be great if anyone can help me out. Perhaps try other xml libraries ? or just use good old java.
var bigXml = require('big-xml');
reader = bigXml.createReader('dblp.xml', /^(article)$/, { gzip: false });
var count = 0;
var temp = [];
reader.on('record', async function (record) {
console.log("Processing article:" + count);
count++;
var pubBody = importPubBuilder(record);
temp.push(pubBody);
//taking temp Array to csv when finish reading file
});
reader.on('error', function (err) {
console.log(err);
});
function importPubBuilder(record) {
var body = {};
body.pubkey = record.attrs.key;
body.mdate = record.attrs.mdate;
body.title = null;
body.ee = null;
body.url = null;
if (record.children.find(obj => { return obj.tag == "title" }) != null) {
body.title = record.children.find(obj => { return obj.tag == "title" }).text;
}
if( record.children.find(obj => { return obj.tag == "ee" }) != null){
body.ee =record.children.find(obj => { return obj.tag == "ee" }).text
}
if( record.children.find(obj => { return obj.tag == "url" }) !=null){
body.url = record.children.find(obj => { return obj.tag == "url" }).text
}
return body
}
Updated added close event emitter when file stream close. Fork with improvement at npm big-xml-notify link
Related
I have a code where I need to upload bulk records like (50,000) from an CSV. From angular 6, we are making an restapi call from where we are passing the formdata to node server. In node portion we are looping that set of records(50,000) and we are uploading it in to our backend by splitting it like 1500. Records will be uploaded for every consecutive 1500.
So In our local it is working completely fine & we have tested uploading same 50000 records. But we have moved the same code to our SIT environment where we are facing the error. After uploading certain amount of record (20000 records), again its starts uploading from first record. This is some weird behaviour, which we couldnt test it in local. Can anyone please please suggest What I need to do?
var bulkUploadData = async function (jsonblob, res) {
var payloadjson = [];
var jsonResponse = {};
try {
for (let i = 0; i < jsonblob.length; i++) {
if(jsonblob[i].PN != null){
var JFB = {}
var arrayy = [];
if (jsonblob[i].Otyp != "L") {
JFB.srv = jsonblob[i].SA;
JFB.ptype = jsonblob[i].PTy;
JFB.ms = jsonblob[i].PN;
var a = JSON.stringify(JFB)
payloadjson.push(a);
}
else {
JFB.ms = jsonblob[i].PN
JFB.srv = jsonblob[i].SA;
JFB.ptype = jsonblob[i].PTy;
var a = JSON.stringify(JFB)
payloadjson.push(a);
}
if ((payloadjson.length % 1500) == 0) {
console.log("Inside first loop---------counter--------- ", i );
var result = await update.invokeSDK("fnname", payloadjson, res)
payloadjson = [];
await sleepTime(20)
console.log("-----sleeped for 20ms----- ",i)
}
if (jsonblob.length == i + 1 && payloadjson.length > 0) {
var result = await update.invokeSDK("fnname", payloadjson, res)
payloadjson = [];
}
}
console.log("FRKNG Length _________i: ",i);
}
jsonResponse = { "failedRecords": "fail" }
return jsonResponse;
} catch (err) {
console.log('error:----', err);
return err;
}
}
I am trying to use cheerio.js to dump out all the tags in an xml document. My attempt is as follows but it seems to iterate not over every tag but every word in the document (or so it seems to me)
let uniqTags = {};
const listTags = function($, tagname) {
uniqTags[tagname] = '';
let childNodes = $(tagname).contents()
.map((index, element) => {
if (element.type === 'tag') {
return element.name
}
})
.get();
if (childNodes.length) {
for (let i = 0, j = childNodes.length; i < j; i++) {
listTags($, childNodes[i]);
}
}
}
const xml = fs.readFileSync(path.join(xmldir, xmlfile), 'utf8')
const $ = cheerio.load(xml, {
normalizeWhitespace: true,
xmlMode: true
});
listTags($, 'document');
thanks to #pguardiario, I was able to figure out the following code that works
$('*').contents()
.filter((index, element) => { return element.type === 'tag' })
.map((index, element) => { return element.name } )
.get();
running the above on almost 250K xml files inside a fs.readdirsync() loop took only 15 mins to generate a list of unique tags used in all the files.
I have an issue with handling parallel requests with cloud functions.
My scenario is to select a driver from the db and update its status.
I do check for that status property before updating it, but when I send multiple requests (database on create triggers to be specific) within a second it doesn't seem to read the updated status property. And it always updates with the information of the last request. I also have noticed that sometimes the requests are processed altogether.
What can I do to fix these issues?
index.js
const db = app.database();
const TripManagementUtil = require('./utils').TripManagementUtil;
exports.triggerNotifications = functions.database.ref('/Trip/{pushId}').onCreate( (snapshot, context) =>
{
var newTrip = snapshot.val();
var tripKey = context.params.pushId;
var tripManagementUtil = new TripManagementUtil();
tripManagementUtil.searchDrivers(tripKey, newTrip, db);
});
utils.js
searchDrivers(tripKey, trip, db){
const results = [];
var lat = trip.pickupLocation.lat, long = trip.pickupLocation.lng;
var vehicleTypeID = trip.vehicleTypeID;
var alreadyAssigned = trip.alreadyAssigned;
var self = this;
if(alreadyAssigned == null || alreadyAssigned == 'undefined'){
alreadyAssigned = [];
}
const geofireQuery = new GeoFire(db.ref('vehicleLocation').child(vehicleTypeID + "")).query({
center: [lat, long],
radius: constants.searchRadius
})
.on('key_entered', (key, coords, distance) => {
if(alreadyAssigned.indexOf(key) == -1){
var result = {
driverID: key,
distance: distance
}
results.push(result);
}
});
setTimeout(() => {
geofireQuery.cancel();
if (results.length === 0) {
self.noDriversHandler(alreadyAssigned, tripKey, db);
} else {
results.sort((a, b) => a.distance - b.distance);
var driversAvailable = false;
var index = 0;
function checkDriver(){
db.ref().child("driver").child("available").child(results[index].driverID).once('value').then(function(vehicleSnap){
var vehicle = vehicleSnap.val();
if(!driversAvailable){
if(vehicle != null && vehicle.vehicleTypeID == vehicleTypeID
&& (vehicle.tripStatus != TripVehicleActionEnum.DriverConfirmed && vehicle.tripStatus != TripVehicleActionEnum.VehicleAssigned)
&& alreadyAssigned.indexOf(vehicle.driverID +"") === -1){
driversAvailable = true;
self.driverExistsHandler(trip, tripKey, alreadyAssigned, vehicle, db);
}
if(!driversAvailable && index + 1 == results.length){
self.noDriversHandler(alreadyAssigned, tripKey, db);
}
index++;
}
else{
index++;
checkDriver();
}
});
}
checkDriver();
}
}, 1500);
}
To write data to the database where the value is based on an existing value, you'll want to use Firebase Realtime Database transactions. For more on this, and examples, see save data transactionally in the Firebase documentation.
When a user registers a new account, I want to generate a reference code. I'm using the do-while loop for checking exists of the reference code in the database but the loop keeps running at least 2 times although the records in the database are small. When I tried to run the code, it was stuck and keep waiting for the response forever.
Here's my implement:
function makeCode() {
var text = "";
var possible = "ABCDEFGHIJKLMNPQRSTUVWXYZabcdefghijklmnpqrstuvwxyz0123456789";
for (var i = 0; i < 8; i++) {
text += possible.charAt(Math.floor(Math.random() * possible.length));
}
console.log("### makeReferenceCode DONE ###")
return text;
}
function genReferenceCode() {
let referencePromise = new Promise((resolve, reject) => {
let referenceRef = db.ref("ref_codes")
var isExisted = true
var code = makeCode().toUpperCase()
console.log("### Generate Reference Code: " + code)
do {
referenceRef.orderByChild("code").equalTo(code).limitToFirst(1).on("value", function (snapshots) {
if(snapshots.numChildren() > 0) {
console.log("### code existed ###")
code = makeCode().toUpperCase()
isExisted = true
} else {
console.log("### code not existed: " + code)
isExisted = false
resolve({success: true, data: code})
}
}, function (error) {
isExisted = true
reject({success: false, error_code: "GEN_REF_CODE_00", error_msg: "Can't generate reference code!"})
})
} while (isExisted === true)
})
return referencePromise;
}
It is 2016, Node has had nearly full ES6 support since v4, and Promises have been around since 0.12. It's time to leave callbacks in the dust IMO.
I'm working on a commander.js-based CLI util which leverages a lot of async operations - http requests and user input. I want to wrap the Commander actions in async functions so that they can be treated as promises, and also to support generators (useful for the co-prompt library I'm using for user input).
I've tried wrapping the CB with co in two ways:
1)
program.command('myCmd')
.action(program => co(function* (program) {...})
.catch(err => console.log(err.stack)) );
and
2) program.command('myCmd').action(co.wrap(function* (program) { .. }));
The problem with 1) is that the program parameter isn't passed
The problem with 2) is that errors are swallowed...
I'd really like to get this working as it yields much nicer code in my use case - involving a lot of http requests and also waiting for user input using the co-prompt library..
Is it a better option altogether perhaps to wrap program.Command.prototype.action somehow?
thanks!
I've used a bespoke version of something like co to get a db.exec function which uses yield to do database request. You can pass parameters into a generator function (I pass in a connection object - see the comment where I do it).
Here is by db.exec function that is very similar to what co does
exec(generator) {
var self = this;
var it;
debug('In db.exec iterator');
return new Promise((accept,reject) => {
debug('In db.exec Promise');
var myConnection;
var onResult = lastPromiseResult => {
debug('In db.exec onResult');
var obj = it.next(lastPromiseResult);
if (!obj.done) {
debug('db.exec Iterator NOT done yet');
obj.value.then(onResult,reject);
} else {
if (myConnection) {
myConnection.release();
debug('db.exec released connection');
}
accept(obj.value);
debug('db.exec Promise Resolved with value %d',obj.value);
}
};
self._connection().then(connection => {
debug('db.exec got a connection');
myConnection = connection;
it = generator(connection); //This passes it into the generator
onResult(); //starts the generator
}).catch(error => {
logger('database', 'Exec Function Error: ' + error.message);
reject(error);
});
});
}
the connection object also wraps by database connection object and provides a generator function ability to process the rows of the results from the database, but I won't post that here (although the example below is using it to process the rows).
Here is an example of using the exec function to run a sequence of sql
db.exec(function*(connection) {
if (params.name === ADMIN_USER) {
debug('Admin Logon');
user.name = ADMIN_DISPLAY;
user.keys = 'A';
user.uid = 0;
let sql = 'SELECT passwordsalt FROM Admin WHERE AdminID = 0';
connection.request(sql);
yield connection.execSql(function*() {
let row = yield;
if (row) {
user.nopass = (row[0].value === null);
} else {
user.nopass = false;
}
debug('Admin Password bypass ' + user.nopass.toString());
});
} else {
debug('Normal User Logon');
let sql = `SELECT u.UserID,PasswordSalt,DisplayName,AccessKey,l.LogID FROM Users u
LEFT JOIN UserLog l ON u.userID = l.userID AND DATEDIFF(D,l.LogDate,GETDATE()) = 0
WHERE u.UserName = #username`;
let request = connection.request(sql);
request.addParameter('username',db.TYPES.NVarChar,params.name);
let count = yield connection.execSql(function*() {
let row = yield;
if (row) {
user.uid = row[0].value;
user.name = row[2].value;
user.keys = (row[3].value === null) ? '' : row[3].value;
user.nopass = (row[1].value === null) ;
user.lid = (row[4].value === null) ? 0 : row[4].value;
debug('Found User with uid = %d and lid = %d, keys = %s',
user.uid, user.lid, user.keys);
}
});
if (count === 0) {
debug('Not Found User');
// couldn't find name in database
reply(false,false);
return;
}
}
if (!user.nopass) {
debug('Need a Password');
//user has a password so we must check it
passGood = false; //assume false as we go into this
let request = connection.request('CheckPassword');
request.addParameter('UserID',db.TYPES.Int,user.uid);
request.addParameter('password',db.TYPES.VarChar,params.password);
yield connection.callProcedure(function*() {
let row = yield;
if (row) {
//got a valid row means we have a valid password
passGood = true;
}
});
} else {
passGood = true;
}
if (!passGood) {
debug('Not a Good Pasword');
reply(false,true);
} else {
if (user.uid !== 0 && user.lid === 0) {
let sql = `INSERT INTO UserLog(UserID,LogDate,TimeOn,UserName) OUTPUT INSERTED.logID
VALUES(#uid,GETDATE(),GETDATE(),#username)`;
let request = connection.request(sql);
request.addParameter('uid',db.TYPES.Int,user.uid);
request.addParameter('username',db.TYPES.NVarChar,user.name);
yield connection.execSql(function*() {
let row = yield;
if (row) {
user.lid = row[0].value;
debug('Users Log Entry = %d',user.lid);
}
});
}
reply(true,user);
}
})
.catch((err) => {
logger('database','Error on logon: ' + err.message);
reply(false,false);
});
});
There is a quite simple way to do async function in Commander.js
async function run() {
/* code goes here */
}
program
.command('gettime')
.action(run);
program.parse(process.argv);