How to insert bulk data to postgresql db from CSV file? - node.js

I have to insert more than 100 records which are present in CSV file to PostgreSQL db. So I have tried the below mentioned code, it is reading the data from the file but unable to insert them to PostgreSQL table so is there any other way to perform this? Like csvtojson etc.?
const csv = require('csv');
var csvParser = require('csv-parse');
Controller.uploadCsv = async(data) => {
fs.createReadStream(data.path)
.pipe(csvParser({
delimiter: '\t',
endLine: '\n',
escapeChar: '"',
enclosedChar: '"'
}))
.on('data', function(data) {
console.log(data)// returning in console mentioned below
console.log(data.name) // is undefined
const add = {
name: data.name,
address: data.address,
phoneNo: data.phoneNumber,
email: data.email,
created_at: new Date(),
updated_at: new Date()
};
const result = await models.table.create(add);
})
.on('end', function(data) {
console.log('reading finished')
})
}
router.js
router.post('/file', upload.single('file'),(req, res, next) => {
Controller.uploadCsv(req.file)
.then((result) => res.json(result))
.catch(next)
})
console data
[ 'name',
'address'
'phoneNumber',
'email',
'created_at',
'updated_at']
[ 'aaa',
'delhi',
'1102558888',
'test#gmail.com',
'2017-10-08T06:17:09.922Z',
'2018-10-08T06:17:09.922Z',]
[ 'Oreo',
'bgl',
'1112589633',
'test123#gmail.com',
'2017-10-08T06:17:09.922Z',
'2018-10-08T06:17:09.922Z' ]

Insert the async keyword on the OnData function. Remember, it's not sequencial execution, so the records may be inserted on a completely diferent order between one program execution and another.
Replace:
.on('data', function(data) {
With:
.on('data', async function(data) {

TL;DR. Your code has a minor error that may be causing your problem - it's when you use await, in order to run it you'd need to put async before the function on the data handler - it may work for small files, but please read on it's not the right solution - I added one of the proper ways below.
ES6 async/await is a language construct that allows you to await for resolution of a Promise and continue executing the code in an async function. In your code you do have an async function declaration, however you added await in a non-async function. To clarify - await keyword will only be allowed when the closest function() { is async - in your case it's not.
I actually don't think your code would even compile and after some changes you'd fall straight to a problem mentioned in this question - this is because you're trying to run an asynchronous operation on a synchronous event handler in node. This asynchronous insert to the database will get run, but the end event will fire before the operations are completed.
In order to do this correctly - you could use a transform stream or abandon streaming altogether and simply use an array from CSV (there's more than enough good modules for that). I am however the author of the scramjet framework and I also think this should simply work as you wrote it, or maybe even simpler.
Here's a code that will do what you want:
const {StringStream} = require('scramjet');
Controller.uploadCsv = async(data) =>
fs.createReadStream(data.path)
.pipe(new StringStream('utf-8'))
.CSVParse({
delimiter: '\t',
newline: '\n',
escapeChar: '"',
quoteChar: '"'
})
.map(data => ({
name: data.name,
address: data.address,
phoneNo: data.phoneNumber,
email: data.email,
created_at: new Date(),
updated_at: new Date()
}))
.each(async entry => await models.table.create(entry))
.each(result => log(result)) // if it's worth logging
.run();
Scramjet simply uses streams (all classes extend built-in node.js streams) underneath, but exposes an interface similar to synchronous ones on Array etc. You can run your async operations and it returns a Promise from run operation.

Related

mongoose .find() scope: How can I call found data out of the query?

I'm trying to call the the founded variable outside of the array but it return an empty array instead. Can someone explain why the console.log inside the function work but not outside of the function.
// Video Schema
let mongoose = require("mongoose");
let Schema = mongoose.Schema;
var videoSchema = new Schema ({
title: String,
videoURL: String,
author: String,
time: String,
viewcount: Number,
categories: [{
type: Schema.Types.ObjectId,
ref: "Category"
}],
description: String,
})
let Video = mongoose.model("Video", videoSchema);
module.exports = {
videoSchema: videoSchema,
Video: Video
}
app.js
let Video = require(__dirname + "/dbs/Video.js").Video;
app.get("/", function(req,res) {
let videos = []
Video.find(function(err, foundVideo) {
if (!err) {
videos = foundVideo.slice(0)
console.log(videos) // this return me with an object array [{obj1}, {obj2}]
} else {
return err
}
})
console.log(videos) // This return an empty array []
}
How can I store the foundVideos array in the videos variable so that can call the variable global?
When you do this operation:
Video.find(function(err, data) {
// something
console.log("one")
})
// nothing
console.log("two")
The function between parentheses is the callback of the find() operation. This means it will be called back when the find execution ends, and it may make use of err or data parameters inside its scope. It will execute the console.log("one").
This way to "wait" for results is due to the asynchronous nature of js.
Instead, the code outside the callback method will be triggered just after the find is called, and it will not wait for the find operation to be finished. Therefore, in this example, two will be printed before one.
In your example, the variable videos you try to print outside the callback method console.log(videos) is empty as is printed before the videos are actually there.
You should write all your callback code in the !err case:
if (!err) {
videos = foundVideo.slice(0)
console.log(videos) // this return me with an object array [{obj1}, {obj2}]
}
Update
As you noticed, a coder is forced to implement the code in the callback method. Then, other methods or requests depending on the data from first request tend to make it complex to structure the code.
const processVideos = function(data) {
if(!data) {
// do something when data is empty
}
// process videos here
}
const notifyError = function(err) {
if(!err)
return
// do something with the error here
}
Video.find(function(err, data) {
processVideos(data)
notifyError(err)
})
Use always your "genius" and programming patterns to avoid complex code, large methods and unreadable sections.

How to show Created By "NAME" using MongoDB

I am new to MongoDB, so I was wondering if there any way to show the name of created by or modified by XYZ user? I want it in my application to display the name who created or modified something.
You need to account for this info when structuring your data model. E.g. let's say you're writing posts to the posts collection, and want to add the post author from authors collection.
Now, the simplest way to do this is to have this data directly embedded in your post document. E.g. for creation data we use insert, something like this:
// posts.service.js
function save(postData, userData) {
// We return a Promise here
return db.posts.insert({
title: postData.title,
text: postData.text,
published: true,
// now comes your audit data
createdBy: user.username,
createdAt: new Date().toISOString(),
});
}
module.exports = { save };
You use it like this, ie. in your /posts API controller:
// ... other stuff then:
const postsService = require('./posts.service');
route.post('/posts', function(req, res, next) {
postsService.save({
title: req.body.title,
text: req.body.text,
}, req.user)
// Handle response and error.
.then(response => res.json(response))
.catch(error => next(error));
And for updating a post, you'd add this to posts.service.js (using update):
// posts.service
// ...after your *save* function above
function update(postId, postData, userData) {
return db.posts.update({
id: postId,
}{
title: postData.title,
text: postData.text,
published: true,
// now comes your audit data
modifiedBy: user.username,
modifiedAt: new Date().toISOString(),
});
}
// update module exports:
module.exports = { save, update };
Now, on your controller, add the route to handle updates:
// ... after the creation route
route.put('/posts/:postId', function(req, res, next) {
postsService.update(req.params.postId, {
title: req.body.title,
text: req.body.text,
}, req.user)
// Handle response and error.
.then(response => res.json(response))
.catch(error => next(error));
Now, other ways to do this can mean you only include reference (ObjectId) of whomever modified the data. Or maybe more info.
But smarter and easier thing to do would be to use something like mongoose for handling your data, and then use a plugin that does all this automatically. Something like this: https://github.com/taptaptech/mongoose-audit.
You could look for something similar at npm: https://www.npmjs.com/search?q=mongoose%20audit.
What these things do is they add pre-save hooks to your documents and in those hooks the audit data is tracked. So you can use something finished, or you can look at what these packages do and try to replicate the functionality - better if this is a hobby project and you wanna learn how things work.
Now, how do you show this information in your frontend is likely good for a new question, as you specify no information about any of the software/packages you're using.
you should just write it down in your db upon edit/create, there is no build in functionality for this.

Knex Inserting Same Record Twice when Query Executed Asynchronously

In my /api/v1/chats POST route I make an async call to user_chat_queries.addUserChat for each user_id passed in the request body. The idea is that if lots of user_id's come in I don't want to have each insertion await, instead I'd like to to dispatch all the insertions asynchronously so I do:
Asynchronous Route handler (https://github.com/caseysiebel/lang-exchange/blob/master/src/server/routes/chats.js#L57):
await Promise.all(user_ids.map((user_id) => {
return user_chat_queries.addUserChat(user_id, chat.id)
}));
As opposed to,
Synchronously:
for (let user_id of user_ids) {
await user_chat_queries.addUserChat(user_id, chat.id)
}
And in the user_chat_queries (https://github.com/caseysiebel/lang-exchange/blob/master/src/server/db/queries/user_chat.js#L5):
addUserChat: ( async (user_id, chat_id) => {
const user_chat = await userChats
.insert({ user_id, chat_id })
.returning('*')
const data = await db('user_chat').select('*')
return user_chat;
}),
Now the route is accessed from my test file: (https://github.com/caseysiebel/lang-exchange/blob/master/test/routes.chats.test.js#L83)
it('should add 2 user_chats', (done) => {
chai.request(server)
.post('/api/v1/chats')
.send({
created_at: Date.now(),
user_ids: [ 2, 4 ]
})
.end((err, res) => {
should.not.exist(err);
res.status.should.equal(201);
res.type.should.equal('application/json');
res.body.status.should.eql('success');
const chat = res.body.data;
chat.should.include.keys('id', 'created_at');
knex('user_chat')
.select('*')
.then((data) => console.log('data', data))
done();
});
});
The log shows that the { user_id: 4, chat_id: 3 } in inserted in the user_chat table twice.
The expected result (and the result when executed synchronously) is one { user_id: 2, chat_id: 3 } record and one { user_id: 4, chat_id: 3 } record are inserted into the user_chat table.
I can't track down what is causing this. It seems to me that addUserChat should insert a record made from the inputs it is passed each time, regardless of when it resolves.
Full code base: https://github.com/caseysiebel/lang-exchange
Debugging console output: https://gist.github.com/caseysiebel/262997efdd6467c72304ee783dadd9af#file-console-L5
you need to use mapSeries type mechanism (Which is not available on default Promise API). So, you may need to use other 3rd party library like bluebird.
more details bluebird mapSeries
You don't show what userChats is in your code examples, but it looks like you may have a global instance of knex that you reuse across different queries, such as:
const userChats = knex('user_chats')
This is not safe because each query building operation mutates the query builder. You may get away with it if you're running in series but when you run in parallel, both your addUserChat calls are using the same userChats query builder at the same time and weird stuff happens.
Rule of thumb: call knex() once for every query you build.

Iterating over data from mongoose in node

I have this fiddle that illustrates what I want to do and it works as intended:
https://jsfiddle.net/mbLwsnak/
All it does is loop through an array of objects, and adds a key value pair.
I have this same code in my node app, and it doesn't work. The new pair is not added to the objects, even though the code is practically the same, so I figure it must be the context.
Schema.find()
.then(function(currencydata){
var data = currencydata;
data.forEach(function(obj){
obj.thing = "Thing";
})
console.log(data);
mdata.currencydata = data;
})
.then(function(result){
res.render('index', mdata);
})
So the only difference I can think of that matters is that this is all inside an asynchronous function. I even tried putting the log inside a setTimeout in case it was something to do with being async, but no change.
Edit:
currencydata is an array of objects returned by the mongoose schema. It looks like this:
[{
country: 'Canada',
currency: 'CAD',
cost: 5.98,
exchangerate: 1.33,
value: -10.9
}, {
country: 'United States',
currency: 'USD',
cost: 5.06,
exchangerate: 1,
value: 0
}, {
country: 'India',
currency: 'INR',
cost: 170,
exchangerate: 68.33,
value: -50.8
}]
Use lean() to convert a mongoose document into a plain javascript
object.
Then inside the first then() you need to return a promise so that the
next then() will get the returned value.
Inside the promise object, the resolve and reject functions, when called, resolve or reject the promise, respectively. Promise Reference
Schema.find()
.lean()
.then((currencydata, err) => {
return new Promise((resolve, reject) => {
currencydata.forEach(data => {
data.thing = 'Thing';
});
resolve(currencydata);
});
})
.then((newdata, err) => {
res.render('index', newdata);
});

How to update with Sequelize with 'NOW()' on a timestamp?

I'm trying to do something like the following:
model.updateAttributes({syncedAt: 'NOW()'});
Obviously, that doesn't work because it just gets passed as a string. I want to avoid passing a node constructed timestamp, because later I compare it to another 'ON UPDATE CURRENT_TIMESTAMP' field and the database and source could be running different times.
Is my only option to just make a database procedure and call that?
You can use Sequelize.fn to wrap it appropriately:
instance.updateAttributes({syncedAt: sequelize.fn('NOW')});
Here's a full working example:
'use strict';
var Sequelize = require('sequelize');
var sequelize = new Sequelize(/*database*/'test', /*username*/'test', /*password*/'test',
{host: 'localhost', dialect: 'postgres'});
var model = sequelize.define('model', {
syncedAt: {type: Sequelize.DATE}
});
sequelize.sync({force: true})
.then(function () {
return model.create({});
})
.then(function () {
return model.find({});
})
.then(function(instance){
return instance.updateAttributes({syncedAt: sequelize.fn('NOW')});
})
.then(function () {
process.exit(0);
})
.catch(function(err){
console.log('Caught error! ' + err);
});
That produces
UPDATE "models" SET "syncedAt"=NOW(),"updatedAt"='2015-02-09 18:05:28.989 +00:00' WHERE "id"=1
Worth mentioning (for people coming here via search) that NOW() isn't standard and doesn't work on SQL server - so don't do this if you care about portability.
sequelize.literal('CURRENT_TIMESTAMP')
may work better
you can use: sequelize.literal('CURRENT_TIMESTAMP').
Example:
await PurchaseModel.update( {purchase_date : sequelize.literal('CURRENT_TIMESTAMP') }, { where: {id: purchaseId} } );

Resources