how insert csv data to mongodb with nodejs - node.js

Hi im developing an app with nodeJS, express and a mongoDB, i need to take users data from a csv file and upload it to my database this db has a schema designed with mongoose.
but i don know how to do this, what is the best approach to read the csv file check for duplicates against the db and if the user (one column in the csv) is not here insert it?
are there some module to do this? or i need to build it from scratch? im pretty new to nodeJS
i need a few advices here
Thanks
this app have an angular frontend so the user can upload the file, maybe i should read the csv in the front end and transform it into an array for node, then insert it?

Use one of the several node.js csv libraries like this one, and then you can probably just run an upsert on the user name.
An upsert is an update query with the upsert flag set to true: {upsert: true}. This will insert a new record only if the search returns zero results. So you query may look something like this:
db.collection.update({username: userName}, newDocumentObj, {upsert: true})
Where userName is the current username you're working with and newDocumentObj is the json document that may need to be inserted.
However, if the query does return a result, it performs an update on those records.
EDIT:
I've decided that an upsert is not appropriate for this but I'm going to leave the description.
You're probably going to need to do two queries here, a find and a conditional insert. For this find query I'd use the toArray() function (instead of a stream) since you are expecting 0 or 1 results. Check if you got a result on the username and if not insert the data.
Read about node's mongodb library here.
EDIT in response to your comment:
It looks like you're reading data from a local csv file, so you should be able to structure you program like:
function connect(callback) {
connStr = 'mongodb://' + host + ':' + port + '/' + schema; //command line args, may or may not be needed, hard code if not I guess
MongoClient.connect(connStr, function(err, db) {
if(err) {
callback(err, null);
} else {
colObj = db.collection(collection); //command line arg, hard code if not needed
callback(null, colObj);
}
});
}
connect(function(err, colObj) {
if(err) {
console.log('Error:', err.stack);
process.exit(0);
} else {
console.log('Connected');
doWork(colObj, function(err) {
if(err) {
console.log(err.stack);
process.exit(0);
}
});
}
});
function doWork(colObj, callback) {
csv().from('/path/to/file.csv').on('data', function(data) {
//mongo query(colObj.find) for data.username or however the data is structured
//inside callback for colObj.find, check for results, if no results insert data with colObj.insert, callback for doWork inside callback for insert or else of find query check
});
}

Related

Query data from postgresql functions not working in app, but works when testing manually

I met an annoyance when trying to use simple function to query data for my web app. The idea is to use one function to list the contents of one table; the other function to use the user-selected record_id in this table to query the detailed contents data in another table.
When running, the app ran the two functions without any error while no data got. Checked the console and found the second function query results is null (I console.log the input for the second function, found they got and using the correct query keys). Since I am sure database has the data for query.
I tried:
use psql command line to query data using the same query keys, I have the results without problem;
I run a node command line, and try to run the two functions by providing the query keys, it also gave me the correct results.
So the functions should work. Now my question is why put them in the app and let them run by themselves, they did not get the query results?
I am using pg = require("pg"); and const pool = new pg.Pool(config) for database connection;
Your sharing of your experience will be very appreciated.
(UPDATE)The functions are like below:
function listItemDB(callback){
pool.connect(function(err, client, done) {
if(err) {
return console.error('error fetching client from pool', err);
}
//use the client for executing the query
client.query(`SELECT * FROM basicdb.items`,
function(err, result) {
//call `done(err)` to release the client back to the pool (or destroy it if there is an error)
done(err);
if(err) {
return console.error('error running query', err);
}
// console.log(result.rows);
callback(result.rows);
});
});
}
The above function is only trying to get "item1" and "dataset1" for future use and pass them to below function args. It does its job perfectly.
function getFileName(itemName,datasetName, callback) {
let fileName;
console.log(itemName,datasetName);
pool.connect(function(err, client, done) {
if(err) {
return console.error('error fetching client from pool', err);
}
client.query("SELECT * "+
"FROM basicdb.dataset "+
"INNER JOIN basicdb.items "+
"ON basicdb.dataset.item_id = basicdb.items.item_id "+
"WHERE (basicdb.items.item_name = ($1)) "+
"AND (basicdb.dataset.datasetname = ($2))",[itemName,datasetName],
function (err, result){
done();
if(err) {
return console.error('error running query', err);
}
let records = result.rows;
fileName = records[records.length-1].filename;
callback(fileName);
});
});
}
This above function is trying to get the filename so the main app can use it. The code to call the above function in my main app.js is like below:
db.getFileName("item1","dataset1",function(fileName) {
//do something with the fileName....}
("db" is the module name which include the functions.)
I finally found the problem, which is a low-level mistake and has nothing to do with the database and the queries.
The item names got from the dropdown list in the app, which was feed to the function args, has one " "(space) attached to the end of the name(i dont know why?), which always "!=" the record in the database:-(, so always no query result in the app. But for the function test, I hardcode the item name which is correct "==" the record in the database. Since it is " ", even when I console.log(itemName), I did not find the space at the end.
It turns out to be 'A mistake of space'.

NodeJS writes to MongoDB only once

I have a NodeJS app that is supposed to generate a lot of data sets in a synchronous manner (multiple nested for-loops). Those data sets are supposed to be saved to my MongoDB database to look them up more effectively later on.
I use the mongodb - driver for NodeJS and have a daemon running. The connection to the DB is working fine and according to the daemon window the first group of datasets is being successfully stored. Every ~400-600ms there is another group to store but after the first dataset there is no output in the MongoDB console anymore (not even an error), and as the file sizes doesn't increase i assume those write operations don't work (i cant wait for it to finish as it'd take multiple days to fully run).
If i restart the NodeJS script it wont even save the first key anymore, possibly because of duplicates? If i delete the db folder content the first one will be saved again.
This is the essential part of my script and i wasn't able to find anything that i did wrong. I assume the problem is more in the inner logic (weird duplicate checks/not running concurrent etc).
var MongoClient = require('mongodb').MongoClient, dbBuffer = [];
MongoClient.connect('mongodb://127.0.0.1/loremipsum', function(err, db) {
if(err) return console.log("Cant connect to MongoDB");
var collection = db.collection('ipsum');
console.log("Connected to DB");
for(var q=startI;q<endI;q++) {
for(var w=0;w<words.length;w++) {
dbBuffer.push({a:a, b:b});
}
if(dbBuffer.length) {
console.log("saving "+dbBuffer.length+" items");
collection.insert(dbBuffer, {w:1}, function(err, result) {
if(err) {
console.log("Error on db write", err);
db.close();
process.exit();
}
});
}
dbBuffer = [];
}
db.close();
});
Update
db.close is never called and the connection doesn't drop
Changing to bulk insert doesn't change anything
The callback for the insert is never called - this could be the problem! The MongoDB console does tell me that the insert process was successful but it looks like the communication between driver and MongoDB isn't working properly for insertion.
I "solved" it myself. One misconception that i had was that every insert transaction is confirmed in the MongoDB console while it actually only confirms the first one or if there is some time between the commands. To check if the insert process really works one needs to run the script for some time and wait for MongoDB to dump it in the local file (approx. 30-60s).
In addition, the insert processes were too quick after each other and MongoDB appears to not handle this correctly under Win10 x64. I changed from the Array-Buffer to the internal buffer (see comments) and only continued with the process after the previous data was inserted.
This is the simplified resulting code
db.collection('seedlist', function(err, collection) {
syncLoop(0,0, collection);
//...
});
function syncLoop(q, w, collection) {
batch = collection.initializeUnorderedBulkOp({useLegacyOps: true});
for(var e=0;e<words.length;e++) {
batch.insert({a:a, b:b});
}
batch.execute(function(err, result) {
if(err) throw err;
//...
return setTimeout(function() {
syncLoop(qNew,wNew,collection);
}, 0); // Timer to prevent Memory leak
});
}

Where need it call dropCollection using nodejs with mongodb?

I was doing a server using nodejs, it need get data from mongodb. I retrieve data after require(../db.js). Somebody said the mongodb needn't be close in nodejs,because nodejs is a single process.....
My question: Need I call dropCollection to close the collection after invoked the db function many times;and How to do?Where to do that? Please,Thanks.
You dont need to drop the collection after invoking db functions,simply call db.close() though it is not needed. But if you want to do it , you can do it as follows:
var dropRestaurants = function(db, callback) {
db.collection('restaurants').drop( function(err, response) {
console.log(response)
callback();
});
};

Node Js MongoDB Query Against returned array

I have a mongodb Relationships collection that stores the user_id and the followee_id(person the user is following). If I query for against the user_id I can find all the the individuals the user is following. Next I need to query the Users collection against all of the returned followee ids to get their personal information. This is where I confused. How would I accomplish this?
NOTE: I know I can embed the followees in the individual user's document and use and $in operator but I do not want to go this route. I want to maintain the most flexibility I can.
You can use an $in query without denormalizing the followees on the user. You just need to do a little bit of data manipulation:
Relationship.find({user_id: user_id}, function(error, relationships) {
var followee_ids = relationships.map(function(relationship) {
return relationship.followee_id;
});
User.find({_id: { $in: followee_ids}}, function(error, users) {
// voila
});
};
if i got your problem right(i think so).
you need to query each of the "individuals the user is following".
that means to query the database multiple queries about each one and get the data.
because the queries in node.js (i assume you using mongoose) are asynchronies you need to get your code more asynchronies for this task.
if you not familier with the async module in node.js it's about time to know it.
see npm async for docs.
i made you a sample code for your query and how it needs to be.
/*array of followee_id from the last query*/
function query(followee_id_arr, callback) {
var async = require('async')
var allResults = [];
async.eachSerias(followee_id_arr, function (f_id, callback){
db.userCollection.findOne({_id : f_id},{_id : 1, personalData : 1},function(err, data){
if(err) {/*handel error*/}
else {
allResults.push(data);
callback()
}
}, function(){
callback(null, allResults);
})
})
}
you can even make all the queries in parallel (for better preformance) by using async.map

How do you save multiple records in SailsJS in one action?

I am wondering what the best practice would be for saving multiple records in an action that makes changes, particularly add and remove, to multiple records. The end goal is to have one function have access to all of the changed data in the action. Currently I am nesting the saves in order for the innermost saves to have access to the data in all the updated records. Here is an example of how I am saving:
record1.save(function (error, firstRecord) {
record2.save(function (erro, secondRecord) {
record3.save(function (err, thirdRecord) {
res.send({recordOne: firstRecord, recordTwo: secondRecord, recordThree: thirdRecord});
});
});
});
With this structure of saving, recordOne, recordTwo, and recordThree display the expected values on the server. However, checking localhost/1337/modelName reveals that the models did not properly update and have incorrect data.
You could use the built in promise engine Bluebird to to that.
Promise.then(function() {
return [record1.save(),record2.save(),record3.save()];
}).spread(function(record1_saved,record2_saved,record3_saved){
res.send({recordOne: record1_saved, recordTwo: record2_saved, recordThree: record3_saved});
req.namespamce = this;
}).catch(function(err){
res.badRequest({
error: err.message
});
req.namespamce = this;
});

Resources