Retrieve the last 3200 tweets of a specific user in node.js - node.js

I am new to javascript and node.js and this is my first post, so please bear with me.
I am using ntwitter to get all previous tweets of a specific user.
My problem is that if the user has more than 200 tweets, I need to create a loop and I am not sure if I do it right.
This is the async function that gets the 200 latest tweets:
exports.getUserTimeline = function(user, callback) {
twit.getUserTimeline({ screen_name: user, count: 200 }, function(err, data) {
if (err) {
return callback(err);
}
callback(err, data);
});
}
I found a solution to do this using a recursive function, but it's quite ugly.. How can I improve it ?
exports.getUserHistory = function(user, callback) {
recursiveSearch(user, callback);
function recursiveSearch(user, callback, lastId, data) {
var data = data || []
, args = {screen_name: user, count: 200};
if(typeof lastId != "undefined") args.max_id = lastId;
twit.getUserTimeline(args, function(err, subdata) {
if (err) {
console.log('Twitter search failed!');
return callback(err);
}
if (data.length !== 0) subdata.shift();
data = data.concat(subdata);
var lastId = parseInt(data[data.length-1].id_str);
if (subdata.length !== 0) {
recursiveSearch(user, callback, lastId, data);
} else {
callback(err, data);
}
});
}
}
Thank's a lot!
Update: This is the improved (refactored) function suggested by hunterloftis with two modifications:
property max_id should not be specified on the first iteration
the case where the user exists but no tweets have been posted must be handled
code:
function getUserHistory(user, done) {
var data = [];
search();
function search(lastId) {
var args = {
screen_name: user,
count: 200,
include_rts: 1
};
if(lastId) args.max_id = lastId;
twit.getUserTimeline(args, onTimeline);
function onTimeline(err, chunk) {
if (err) {
console.log('Twitter search failed!');
return done(err);
}
if (!chunk.length) {
console.log('User has not tweeted yet');
return done(err);
}
//Get rid of the first element of each iteration (not the first time)
if (data.length) chunk.shift();
data = data.concat(chunk);
var thisId = parseInt(data[data.length - 1].id_str);
if (chunk.length) return search(thisId);
console.log(data.length + ' tweets imported');
return done(undefined, data);
}
}
}
When retrieving tweets I noticed that my tweet count wasn't always the same as the 'statuses_count' property of the user. It took me some time to figure out that this difference corresponds to the number of deleted tweets :)

Does your recursive function work? Doesn't look too bad to me. I might refactor it just a little into something more like this:
function getUserHistory(user, done) {
var data = [];
search();
function search(lastId) {
var args = {
screen_name: user,
count: 200,
max_id: lastId
};
twit.getUserTimeline(args, onTimeline);
function onTimeline(err, chunk) {
if (err) {
console.log('Twitter search failed!');
return done(err);
}
if (data.length) chunk.shift(); // What is this for?
data = data.concat(chunk);
var thisId = parseInt(data[data.length - 1].id_str);
if (chunk.length) return search(thisId);
return done(undefined, data);
}
}
}

Related

Unable to retrive data and push inside loop in node js

I am trying to retrieve attendance list along with user details.
I am using caminte.js(http://www.camintejs.com/) Cross-db ORM for database interaction.
Here is my code sample of model function "attendanceList".
exports.attendanceList = function (req, callback) {
var query = req.query;
var searchfilters = {};
if(!req.user){
callback({ code:400, status:'error', message: 'Invalid Request', data:{}});
}else{
searchfilters["vendor_id"] = parseInt(req.user._id);
}
if(query.location && parseString(query.location) != '') {
searchfilters["location"] = parseString(query.location);
}
if (query.device_details && parseString(query.device_details) != '') {
searchfilters["device_details"] = parseString(query.device_details);
}
if(query.created_on) {
searchfilters["created_on"] = query.created_on;
}
if(query.status) {
searchfilters["status"] = { regex: new RegExp(query.status.toLowerCase(), "i") };
}
var SkipRecord = 0;
var PageSize = 10;
var LimitRecord = PageSize;
var PageIndex = 1;
if(query.pagesize) {
PageSize = parseInt(query.pagesize);
}
if(query.pageindex) {
PageIndex = parseInt(query.pageindex);
}
if (PageIndex > 1) {
SkipRecord = (PageIndex - 1) * PageSize;
}
LimitRecord = PageSize;
var SortRecord = "created_on";
if(query.sortby && query.sorttype) {
var sortingBy = query.sortby;
var sortingType = 'ASC';
if(typeof query.sorttype !== 'undefined') {
sortingType = query.sorttype;
}
SortRecord = sortingBy + ' ' + sortingType;
}
Attendance.find({ where: searchfilters, order: SortRecord, limit: LimitRecord, skip: SkipRecord }, async function (err, result) {
if(err){
callback({ code:400, status:'error', message:'Unable to connect server', errors:err });
} else {
await result.map(function(row, i){
User.findById(parseInt(row.user_id), function(err, data){
if(err){
console.log(err);
} else {
result[i]['userDetails'] = data;
}
});
});
await Attendance.count({ where: searchfilters }, function (err, count) {
callback({ code:200, status:'success', message:'OK', total:count, data:result });
});
}
});
};
I am getting only attendance list without user details. How do I force to push user details into attendance list? Any Help!!
Thank You
This behavior is asynchronous. When you're making request to DB, your code keeps running, while task to get data comes to task queue.
To keep things simple, you need to use promises while handling asynchronous jobs.
Rewrite your code from this:
Attendance.find({ where: searchfilters, order: SortRecord, limit: LimitRecord, skip: SkipRecord }, async function (err, result) {
if(err){
callback({ code:400, status:'error', message:'Unable to connect server', errors:err });
} else {
await result.map(function(row, i){
User.findById(parseInt(row.user_id), function(err, data){
if(err){
console.log(err);
} else {
result[i]['userDetails'] = data;
}
});
});
await Attendance.count({ where: searchfilters }, function (err, count) {
callback({ code:200, status:'success', message:'OK', total:count, data:result });
});
}
});
To this:
const findAttendanceFirst = (searchFilters, SortRecord, LimitRecord, SkipRecord) => {
return new Promise((resolve, reject) => {
Attendance.find({ where: searchFilters, order: SortRecord, limit: LimitRecord, skip: SkipRecord }, (err, result) => {
if(err) return reject(err);
resolve(result);
});
});
}
const findUserByIdForUserDetails = (userId) => {
return new Promise((resolve, reject) => {
User.findById(parseInt(userId), function(err, data){
if(err) return reject(err);
resolve(data);
})
});
}
const getAttendanceCount = (searchFilters) => {
return new Promise((resolve, reject) => {
Attendance.count({ where: searchFilters }, (err, count) => {
if(err) return reject(err);
resolve(count);
});
})
}
So, now we can use this separate functions to make async behavior looks like sync.
try {
const data = await findAttendanceFirst(searchFilters, SortRecord, LimitRecord, SkipRecord);
for(let userData of data){
try {
userData.userDetails = await findUserByIdForUserDetails(userData.user_id);
} catch(e) {
// Some error happened, so no user details.
// you can set here null or nothing to userDetails.
}
}
let count;
try {
count = await getAttendanceCount(searchFilters);
} catch(e){
// Same as before.
}
const callBackData = { code:200, status:'success', message:'OK', total:count, data:result };
// And here you can do whatever you want with callback data. Send to client etc.
} catch(e) {
}
NB: I've not tested this code, it will be easier for yu to play with your actual data and use Promises and async/await
Just remember that each request to db is asynchronous, and you need to make your code wait for this data.

returning Mongoose query result from Async call

I'm working on a problem where I need to query the db for an instance of a Voter, and use that instance to update an Election, returning to the original function whether that update was successful or not. My code currently looks like this:
function addCandidatesToElection(req, res) {
let electionName = req.body.electionName;
let candidates = req.body.candidates;
let addedCandidatesSucessfully = true;
for(let i=0; i<candidates.length; i++) {
addedCandidatesSucessfully = _addCandidateToElection(electionName, candidates[i]);
console.log("added candidates sucessfully:" + addedCandidatesSucessfully);
}
if(addedCandidatesSucessfully) {
res.send("createElection success");
} else {
res.send("createElection fail");
}
}
which calls this function:
function _addCandidateToElection(electionName, candidateName) {
async.parallel(
{
voter: function(callback) {
Voter.findOne({ 'name' : candidateName }, function(err,voter) {
callback(err, voter);
});
}
},
function(e, r) {
if(r.voter === null){
return 'Voter not found';
} else {
Election.findOneAndUpdate(
{'name': electionName },
{$push: { candidates: r.voter }},
{new: true},
function(err, election) {
if(err){ return err; }
return (election) ? true : false;
});
}
}
);
}
I've already tried printing out the Voter instance(r.voter) to check if it exists (it does), and also printing out the election object returned by the mongoose call, which also works. However, I'm getting a null value in the
addedCandidatesSucessfully = _addCandidateToElection(electionName, candidates[i]);
line, regardless of the result of the call. I think it has to do with the mongoose call returning a local value which is never returned to the function that called _addCandidateToElection, but I don't know how I should return that. I've tried putting control flags such as
let foundAndUpdatedElection = false;
on the first line of _addCandidateToElection and updating it inside the Mongoose query's callback, but apparently it doesn't change.
How should I return the result of the query to the addCandidatesToElection function?
You should probably 'promisify' your code to help you better deal with the asynchronous nature of js. Try the following instead of your example:
function findVoter(candidateName) {
return new Promise(function(resolve, reject) {
Voter.findOne({ 'name' : candidateName }, function(err,voter) {
if(error) {
reject(error);
} else {
resolve(voter);
}
});
});
}
function addCandidateToElection(electionName, candidateName) {
return findVoter(candidateName).then(function(voter) {
return new Promise(function(resolve, reject) {
Election.findOneAndUpdate(
{'name': electionName },
{$push: { candidates: voter }},
{new: true},
function(err, election) {
if (err) {
reject(err);
} else {
resolve(!!election);
}
});
});
}
function addCandidatesToElection(req, res) {
let electionName = req.body.electionName;
let candidates = req.body.candidates;
let addedCandidatesSucessfully = true;
let candidatePromiseArray = [];
for(let i=0; i<candidates.length; i++) {
candidatePromiseArray.push(addCandidateToElection(electionName, candidates[i]));
}
Promise.all(candidatePromiseArray)
.then(function(results) {
console.log(results);
res.send('create election success');
})
.catch(function(error) {
console.error(error);
res.send('failed');
});
}
You will also no longer need to use the async library because promises are now native in ES6

How to implement nested query in sqlite3

So i have this 2-layer query in node.js, each query could return multiple results. My code actually just ignores that for now. This is the best i can get, it seems working.
How to correct it please, i don't know how to callback for the 2nd one.
Also the db.close() is always called before the 2nd query finishes, even i have serialize().
var getInfo1Db = function(callback) {
var db = new sqlite3.Database("DB.sqlite3");
var cnt = 0;
var info1JsonObj = [];
db.all("select * from Info1DB",
function(err, rows) {
db.serialize(function() {
for(var ii=0, len=rows.length; ii<len; ii++) {
var t2 = rows[ii].info1;
var doorId = ...
db.all("select * from DoorDB where ObjectID=" + doorId,
function(err, row2) {
if(err) {
} else {
var doorName = row2[0]...
var info1JsonElem = {
"DoorName" : doorName
};
info1JsonObj.push(info1JsonElem);
cnt++;
if(cnt === rows.length) {
callback(null, info1JsonObj);
}
}
}
); // for the only door info based on door id
} // for each row of info1
db.close(); // why this finishes before the 2nd db.all
} ); // end of serialize
});
};
You can't implement nested query in sqlite3's normal way. ( I mean you even can't do it in the callback hell way, because the sqlite3 need to close the connection before another query called. otherwise you will always got error)
You have to use Promise, async and await to do this.
( it's worth to spend 30 minutes to learn these 3 words )
Step1. define a async function like this:
async query_1() {
new Promise(resolve => {
db = ...
db.serialize( () => {
db.get('select .. from ... where id = 1', [], (error, row) => {
// here is the KEY: put the result into resolve
// this equals to the "return" statement in non-sync method.
resolve(row)
}
})
db.close()
})
}
and also implement your query_2 function like this:
async query_2() {
let query_1_result = await this.query_1()
db = ...
db.serialize( () => {
db.get('select .. from ... where dependency_id = ' + query_1_result, [], (error, row) => {
// other code here...
}
})
db.close()
}
refer to my answer: https://stackoverflow.com/a/67881159/445908
How about using 2 function to do these ?
function db_query1(your_param,...., callback){
  // database operation
db.run( sql , [param,...] , function(err,rows){
if(err) // return
else{
// get rows with callback
callback(null, rows);
}
});
}
function db_query2(your_param,...., callback){
  // database operation
db.run( sql , [param,...] , function(err,rows){
if(err) // return
else{
// get rows with callback
callback(null, rows);
}
});
}
And call these function:
db_query1(....,function(err,result1){
if(err) ...// return
// do the things with result1
// And then call query2
db_query2(....,function(err,result2){
if(err) ...// return
// do the things with result1
});
});
Hope this will help :)
You can use Promises.all, an array and the second callback for node sqlite3 db.each() that is executed when all rows have been fetched. Node Sqlite3 db.each usage to simplify the nested query and
I cannot really get the meaning of the variables you are using thus I assume that each row in Info1DB has a one-to-many relationship with DoorDB on the field doorId.
async function getInfo (callback) {
sql = "select * from Info1DB;";
numVersions = 0;
countVersions = 0;
info1JsonObj = [];
db.serialize(function() {
db.each(sql, [], (err, info1Row) => {
sql = "select * from DoorDB where ObjectID=?;";
info1Row.doors = [];
doorId = ...
db.each(sql, [doorId], (err, doorRow) => {
info1Row.doors.push(new Promise((resolve, reject) => {
if (err) {
reject(err);
} else {
resolve(doorRow);
}
}));
}, (err, num) => {
Promise.all(info1Row.doors)
.then((doors) => {
info1Row.doors = doors;
info1JsonObj.push(info1Row);
countVersions++;
if (countVersions == numVersions) {
callback(null, info1JsonObj);
}
}).catch((err) => {
callback(err, null);
});
});
}, (err, versions) => {
numVersions = versions;
});
});
}

Execute a loop on a POST request on Node.js server

im trying to count how many people of every gender are there in a json list passed by the client with a POST request (on Node.js server). I have problems understanding javascript asynchronization, callbacks and closures.
What i want is:
getting a list from the client,
for every entry ask my collection if that is a m, a f or a u,
count how many fs, ms and us there are,
send an array to the client with the three values.
I always get "Cant set headers after they are sent" or similar errors due to async execution. I tried different callback orders and many different options.
This is how the functions on the server looks like:
app.post('/genderize', function(req, res){
createCounter("conto", req, function(req,contat ){
count(req, contat);
}).then(res.send( result ));
});
function createCounter( nome, req, callback ) {
result = [0,0,0];
var contatore = function(){
var m = 0;
var f = 0;
var u = 0;
addM = function(){ console.log( "m++ "+result[1]);result[1]++; };
addF = function(){ f++; };
addU = function(){ u++; };
getM = function(){ return this.m;};
getResult = function(){
console.log( result+ " * "+ getM() + " * " + this.u + " * "+ this.f );
return result;
};
return {
addM: addM,
addF: addF,
addU: addU,
getResult: getResult
};
}
callback( req, contatore() );
}
function count( req, counter ){
var collection = db.get('nomi');
var data = req.body.data;
data.forEach(function(value, i){
collection.find({ nome : req.body.data[i].name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, docs) {
if (!isEmptyObject(docs)) {
docs = JSON.parse(JSON.stringify(docs));;
if(docs[0].sesso == "M"){
counter.addM();
} else {
counter.addF();
}
} else {
counter.addU();
}
});
});
}
There are several issues with this example, but the main thing that you missed is that when you perform your database query, the collection.find call will return immediately, but will only execute its callback (function(err, docs)) at some later time after the database has replied.
Here's a working rewrite:
app.post('/genderize', function(req, res) {
if (!req.body.data || req.body.data.length === undefined) {
return res.status(400).send('Invalid request body.');
}
countGenders(db.get('nomi'), req.body.data, function (err, genders) {
if (err) return res.status(500).send('Unable to process request.');
res.send([genders.M, genders.F, genders.U]);
});
});
function getGenderFromName(collection, name, next) {
collection.find({nome : name.split(" ")[0].toUpperCase()}, {fields: {_id:0, nome:0}}, function (err, docs) {
if (err) return next(err);
var gender = 'U';
if (docs && docs.length > 0) {
gender = (docs[0].sesso == "M") ? 'M' : 'F';
}
next(null, gender);
});
}
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
var series = function(i) {
if (i == data.length) return next(null, result);
getGenderFromName(collection, data[i].name, function(err, gender) {
if (err) return next(err);
result[gender]++;
series(i+1);
});
};
series(0);
}
Lets review the changes:
Removed the createCounter structure. No need for a heavy, get/set pattern for this simple example.
Checked for error values in every asynchronous callback
if (err) return next(err);
Within a route handler, typically you will want to end the request with a res.status(500).send(). In most other cases, return next(err) will 'bubble' the error up.
Moved the database query into a new function, getGenderFromName. It mostly retains your original code. This was optional, but substantially improves the readability of the count function.
Finally, rewrote the count function using an appropriate asynchronous iteration pattern, courtesy of http://book.mixu.net/node/ch7.html. Mixu gives a very easy to understand explanation of asynchronous node, give it a read.
An even better option would be use the excellent async module. You could rewrite the count method as
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
async.eachSeries(
data,
function (value, next) {
getGenderFromName(collection, value.name, function(err, gender) {
if (err) return next(err);
result[gender]++;
next();
});
},
function (err) { next(err, results); }
);
}
Async includes lots of different control flow methods to use, not just simple iterations.
Here is a better way to do this. This really cleans up the asynchronous nature of javascript. Checkout the async library that I am using here.
var collection = db.get('nomi');
var async = require('async');
app.post('/genderize', function(req, res){
let countingObject = {
females: 0,
males: 0,
unknown: 0
};
async.each(req.body.data, function(name, callback) {
collection.findOne({ nome : name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, nameObject) {
//instead, maybe check if it is male, female, or otherwise mark as unknown?
if (!isEmptyObject(nameObject)) {
//this object probably has getters that you could use instead
nameObject = JSON.parse(JSON.stringify(nameObject));
if(nameObject.sesso == "M"){
countingObject.males++;
} else {
countingObject.females++;
}
} else {
countingObject.unknown++;
}
callback();
});
}, function() {
res.setHeader('Content-Header', 'application/json');
res.send(JSON.stringify(countingCallback));
});
});

node.js fall through data cache pattern

I am looking for a clean way to structure my node.js code for the following situation. I thought of using EventEmitters to create a "workflow" type of thing. Also I thought of using one of the async libraries out there, that has been less thought out though.
Problem:
Looking for a piece of data
check cache, if found return
check db, if found return (store in cache)
get live data and return, (store in db, cache)
I mocked something up quickly using event emitters below.
var util = require("util");
var events = require('events');
var CheckForData = function() {
events.EventEmitter.call(this);
this.checkForData = function(key) {
this.emit("checkForDataRequest", key);
}
var _checkCache = function(key) {
if (key === 'cache') {
this.emit("found", {data:'cached data'});
}
else {
console.log("not found in cache "+key);
this.emit("checkDatastore", key);
}
}
var _chechDatastore = function(key) {
if (key === 'db') {
this.emit("found", {data:'db data'});
this.emit("storeCache", key, {data:'db data'});
}
else {
console.log("not found in db "+key);
this.emit("getData", key);
}
}
var _getData = function(key) {
if (key === 'live') {
this.emit("found", {data:'live data'});
this.emit("storeData", key, {data:'live data'});
}
else {
console.log("not found in live "+key);
this.emit("notFound", key);
}
}
var _storeData = function(key, data) {
this.emit("storeDb", key, data);
this.emit("storeCache", key, data);
}
var _storeDb = function(key, data) {
console.log("storing data in db. for "+key);
console.log(data);
}
var _storeCache = function(key, data) {
console.log("storing data in cache. for "+key);
console.log(data);
}
var _found = function(data) {
return data;
}
var _notFound = function(key) {
return key;
}
this.on("checkForDataRequest", _checkCache);
this.on("checkDatastore", _chechDatastore);
this.on("getData", _getData);
this.on("found", _found);
this.on("notFound", _notFound);
this.on("storeData", _storeData);
this.on("storeDb", _storeDb);
this.on("storeCache", _storeCache);
};
util.inherits(CheckForData, events.EventEmitter);
module.exports = new CheckForData();
To test it...
var checkForData = require('./check-for-data');
checkForData.on("found", function(data) {
console.log("Found data ");
console.log(data);
});
checkForData.on("notFound", function(key) {
console.log("NOOO Found data for " + key);
});
console.log("-------");
checkForData.checkForData('cache');
console.log("-------");
checkForData.checkForData('db');
console.log("-------");
checkForData.checkForData('live');
console.log("-------");
checkForData.checkForData('other');
console.log("-------");
Then async.js, i made a quick checkSeries which is basically the async.detectSeries but instead of returning the item in the collection return the result. See below...
var async = require('async');
function check_cache(key) {
console.log('checking cache');
return null;
}
function check_datastore(key) {
console.log('checking datastore');
return null;//{data: "db data"};
}
function check_api(options) {
console.log('calling api');
return {data: "live data"};
}
function exists(item, callback) {
callback(item());
}
async.checkSeries([check_cache, check_datastore, check_api], exists, function(result) {
// result now equals the first function that return data
console.log(result);
});
Any suggestions, hints, tips, ...? Is there a pattern or library that i am missing? Do you think it be possible/easier to do in Step, flow, ...? Memoize?
That seems like a lot of work for the caller and a lot of extra code that doesn't seem to be adding much value. I have something that looks like this in my model.
Foo.get = function (id, cb) {
var self = this;
// check the cache first
cache.get('foo:' + id, function (err, cacheFoo) {
// if found, just return it
if (!err && cacheFoo) {
return cb(null, cacheFoo);
}
// otherwise get from db
self.findOne({id: id}, function (err, dbFoo) {
if (err || !dbFoo) {
return cb(new Error('Foo not found', 404));
// you could do get live call here
}
// and store in cache
cache.store('foo:' + id, dbFoo);
return cb(null, dbFoo);
});
});
};
Callers can then always just call Foo.get(id, callback) and they don't have to care how it is actually retrieved. If it gets more complicated, you could use an async library (such as the aptly named async) to make the code more readable, but this should still be completely hidden from the caller.

Resources