I've got the following problem: I am looping through API calls using different dates to append the output to a CSV file. However, the output data in the CSV file only contains data from the first date.
When I log the results to the command prompt I do get multiple dates, meaning the problem occurs when writing the output to the CSV.
Moment.js is used for setting the start and end date to loop through and fast-csv to write the output of the API call to a CSV file.
// load and configure
const piwik = require ('piwik').setup ('placeholderurl', 'XXXXX');
// filesystem requirement
var fs = require('fs');
// fast-csv requirement
var csv = require("fast-csv");
// moment.js requirement
var moment = require('moment');
// variabelen voor het loopen door datums
var a = moment().format('2016-05-12');
var b = moment().format('2016-05-15');
var stream = fs.createWriteStream ('my.csv', {flags: 'a'})
// samenstellen API url
for (var m = moment(a); m.isBefore(b); m.add(1, 'days')) {
piwik.api (
{
method: 'Live.getLastVisitsDetails',
idSite: 3,
period: 'day',
format: 'csv',
date: moment(m).format('YYYY-MM-DD')
},
function (err, data) {
if (err) {
console.log (err);
return;
}
console.log(data)
csv
.writeToStream(fs.createWriteStream("my.csv"), data, {flags: 'a', headers: true});
}
);
}
API token and url removed for privacy reasons.
Solved. Got rid of the PIWIK API package and decided to use HTTP GET to retrieve the url manually.
The code:
// http requirement
var http = require('http');
var request = require('request');
// filesystem requirement
var fs = require('fs');
// moment.js requirement
var moment = require('moment');
// variabelen voor het loopen door datums
var a = moment().format('2016-05-12');
var b = moment().format('2016-05-15');
var m = moment(a);
//var stream = fs.createWriteStream ('my.csv', {flags: 'a'})
// samenstellen API url
for (var m = moment(a); m.isBefore(b); m.add(1, 'days')) {
request
.get("http://placeholder.com/?module=API&method=Live.getLastVisitsDetails&idSite=3&period=day&date=" + moment(m).format('YYYY-MM-DD') + "&format=csv&token_auth=placeholdertoken&filter_limit=-1")
.on('error', function(err) {
console.log(err)
})
.pipe(fs.createWriteStream('data-' + moment(m).format('YYYY-MM-DD') + '.csv'))
console.log(moment(m).format('YYYY-MM-DD') + " " + "saved")
}
Related
I have tried this with npm package called json2csv.
It is working fine for records up to 75 000. when the data is more than that i am not getting any response from the callback function exporttocsv as given below.
const json2csv = require('json2csv').parse;
var today = new Date();
var mongoClient = require('mongodb').MongoClient
, assert = require('assert');
var dd = today.getDate();
var mm = today.getMonth() + 1; //January is 0!
var yyyy = today.getFullYear();
if (dd < 10) {
dd = '0' + dd;
}
if (mm < 10) {
mm = '0' + mm;
}
var today = dd + '_' + mm + '_' + yyyy;
router.put('/mass_report', (req, res) => {
mass_data_download();
res.json("Mass report download initiated");
});
function exporttocsv(data,name, callback) {
/* Start: Json to xlsx conversion */
if (!fs.existsSync('./csv/'+today+'/')) {
fs.mkdirSync('./csv/'+today+'/');
}
var csv = json2csv(data);
var fname = './csv/'+today+'/' +name+ new Date().getTime() + '.csv';
fs.writeFileSync(fname, csv, 'binary',(error,response)=>{
console.log(error);
console.log(response);
});
callback(fname);
}
function mass_data_download(){
db.collection('mass_data').aggregate([
{$match:{
created_on: {
$gte: new Date("2017-09-01T00:00:00.000Z"),
}
}}
]).sort({_id:-1}).toArray( function (error, response) {
if(error){
console.log(error)
}
else{
console.log(response.length);
exporttocsv(response,'mass_report', function (fname) {
console.log('reports download complted');
})
}
})
}
is there any limitations while exporting data to csv?
or how to achieve this with any other alternatives?
The thing is you are handling huge amount of data in memory at the same time. You should avoid it at all costs. Node.js is perfect for using streams, piggyback on it. Consider Mongo as your readable stream then pipe it to json2csv transform stream and do what you want with the result, perhaps you want to pipe it to writable stream such as file or even http response.
Mongoose supports streaming. More information you can find here
json2csv also supports streaming interface. here is more info about streaming API of json2csv.
UPDATED: final pseudocode should look like:
const csv = fs.createWriteStream('file.csv');
Model.find()
.cursor() // read more [here][1]
.pipe(json2csvTransformStream) // read more in json2csv transform stream API
.pipe(csv); // read more in fs.createWritableStream
Piping will handle all stream flow and you will not be worried about memory leaks or performance.
I have a few huge csv files, what I need to store in a mongo database. Because these files are too big, I need to use stream. I pause the stream while the data writing into the database.
var fs = require('fs');
var csv = require('csv');
var mongo = require('mongodb');
var db = mongo.MongoClient.connect...
var readStream = fs.createReadStream('hugefile.csv');
readStream.on('data', function(data) {
readStream.pause();
csv.parse(data.toString(), { delimiter: ','}, function(err, output) {
db.collection(coll).insert(data, function(err) {
readStream.resume();
});
});
});
readStream.on('end', function() {
logger.info('file stored');
});
But the csv.parse drop an error, because I would need to read the files line by line to handle them as csv, and convert to json for the mongodb. Maybe I should not pause them, but use an interface. I didn't find any solution for this yet.
Any help would be appreciated!
I think you might want to create a stream of lines from your raw data stream.
Here is an example from the split package. https://www.npmjs.com/package/split
fs.createReadStream(file)
.pipe(split())
.on('data', function (line) {
//each chunk now is a seperate line!
})
Adapted to your example it might look like this
var readStream = fs.createReadStream('hugefile.csv');
var lineStream = readStream.pipe(split());
lineStream.on('data', function(data) {
//remaining code unmodified
I'm unsure if bulk() was a thing back in '15, but whosoever is trying to import items from large sources should consider using them.
var fs = require('fs');
var csv = require('fast-csv');
var mongoose = require('mongoose');
var db = mongoose.connect...
var counter = 0; // to keep count of values in the bulk()
const BULK_SIZE = 1000;
var bulkItem = Item.collection.initializeUnorderedBulkOp();
var readStream = fs.createReadStream('hugefile.csv');
const csvStream = csv.fromStream(readStream, { headers: true });
csvStream.on('data', data => {
counter++;
bulkOrder.insert(order);
if (counter === BATCH_SIZE) {
csvStream.pause();
bulkOrder.execute((err, result) => {
if (err) console.log(err);
counter = 0;
bulkItem = Item.collection.initializeUnorderedBulkOp();
csvStream.resume();
});
}
}
});
How do we upload an image received form the mobile app to another server using the request module in nodejs?
I have tried using the multipart module to extract the file and send it in formData attribute with the request module (post method). This doesn't seem to work.
Please Use following code, it has been tested in express. you can modify it according to your requirement
var path = require('path');
var util = require('util');
if (req.files.profile_image !== undefined) {
var file = req.files.profile_image;
var tmp_path = file.path;
var fileName = file.name;
var milliseconds = new Date().getTime();
var file_ext = fileName.substr((Math.max(0, fileName.lastIndexOf(".")) || Infinity) + 1);
var newFileName = requestedUser + '_' + milliseconds + '.' + file_ext;
var pathToFile = require('path').dirname(require.main.filename);
var mainPath = path.dirname(pathToFile)
var target_path = path.join(mainPath, 'public/uploads/users', newFileName);
var readStream = fs.createReadStream(tmp_path)
var writeStream = fs.createWriteStream(target_path);
util.pump(readStream, writeStream, function(err) {
if (err) {
//handle error
} else {
//successfully uploaded
}
});
} else {
//file not recieved
}
Thanks
I have a program that is trying to get the values from the request using curl and store them in a file and serve the stored content back. The decision to store or append the contents in file are based on a query parameter appendFlag
Now when i run this program what i am getting in console is "true" and "appending" This suggests that it indeed reads the flag goes to the if part but somehow the appendFile function is not working.
var http = require('http');
var url = require('url');
var querystring = require('querystring');
var fs = require('fs');
http.createServer(function(request,response){
var str = request.url.split('?')[1];
var query = querystring.parse(str);
var writeStream = fs.createWriteStream(query['fileName']);
console.log("query - ");
console.log(query["appendFlag"]);
request.on('data',function(chunk){
if(query["appendFlag"]=="true"){
console.log("appending");
fs.appendFile(query['fileName'],chunk.toString(),function(err){
if(err) throw err;
});
}else{
var bufferGood = writeStream.write(chunk);
if(!bufferGood) request.pause();
}
});
request.on('end',function(){
response.writeHead(200);
response.write("\n Content with this url is - \n");
var readStream = fs.createReadStream(query['fileName'],{bufferSize:64*1024});
readStream.on('data',function(chunk){
response.write(chunk.toString());
});
readStream.on('end',function(){
response.write("\n");
response.end();
});
});
writeStream.on('drain',function(){
request.resume();
});
}).listen(8080);
Then after reading an answer from SO( How to create appending writeStream in Node.js ) i tried -
// Program to extract url from the request and writing in that particular file
var http = require('http');
var url = require('url');
var querystring = require('querystring');
var fs = require('fs');
http.createServer(function(request,response){
var str = request.url.split('?')[1];
var query = querystring.parse(str);
var writeStream = fs.createWriteStream(query['fileName']);
options = {
'flags': 'a' ,
'encoding' : null,
'mode' : 0666
}
var appendStream = fs.createWriteStream(query['fileName'],[options]);
console.log("query - ");
console.log(query["appendFlag"]);
request.on('data',function(chunk){
if(query["appendFlag"]=="true"){
console.log("appending");
var bufferGood = appendStream.write(chunk.toString());
if(!bufferGood) request.pause();
}else{
var bufferGood = writeStream.write(chunk.toString());
if(!bufferGood) request.pause();
}
});
request.on('end',function(){
response.writeHead(200);
response.write("\n Content with this url is - \n");
var readStream = fs.createReadStream(query['fileName'],{bufferSize:64*1024});
readStream.on('data',function(chunk){
response.write(chunk.toString());
});
readStream.on('end',function(){
response.write("\n");
response.end();
});
});
writeStream.on('drain',function(){
request.resume();
});
}).listen(8080);
That is changed the flag to the 'a' and it also did not append the data?
Your can use your first variant. But before appendFile() you've opened writeStream for the same query["filename"]. The stream is already opened.
var writeStream = fs.createWriteStream(query['fileName']);
options = {
'flags': 'a' ,
'encoding' : null,
'mode' : 0666
}
var appendStream = fs.createWriteStream(query['fileName'],[options]);
May be it's better to do something like:
var options = {
flags: query.appendFile ? 'w' : 'a'
...
Next: why [options]? You should remove the brackets.
Next: there is no guarantee you'll have filename param in querystring. Please handle this situation.
I have a MongoDB collection that I am querying based on a time frame and address number. If the query is successful, then the server will return a CSV file with a number of attributes stored in each query entry. This part is working fine.
The problem is that one of the attributes in my CSV file is the timestamp. I'd like it to return the local date time (ie. "Time":"2014-02-09T06:00:02.000Z")... however the CSV file is returning the date object in milliseconds (ie. "Time":1392040717774). Is there an easy way to transform the query entry before it's written to the CSV file to a local date string? Here's my code snippet:
var JSONStream = require('JSONStream');
var httpserver = http.createServer(function(req, res) {
var pathname = url.parse(req.url).pathname;
if (pathname=="/DownloadUCData") {
var requestData = '';
req.on('data', function (data) {
requestData += data;
});
req.on('end', function () {
var json = JSON.parse(requestData);
var st = new Date(json.startDate);
var et = new Date(json.endDate);
st.setHours(st.getHours()-4); //compensate for GMT offset
et.setHours(et.getHours()-4);
st=st.getTime();
et=et.getTime();
var proj=JSON.parse('{"state":1, "temperature":1, "mode":1, "speed":1, "time":1}');
var cursor = userControlCollection.find({"addr": json.addr.toString(), "time": {$gte:st, $lte:et}}, proj);
var dbstream = cursor.stream();
var tempname = json.type+".csv";
var wstream = fs.createWriteStream(tempname);
wstream.on('error', function(e){console.error(e);});
dbstream.on("end", function() {
wstream.end();
console.log("write end");
res.writeHead(200, {"Content-Type": "application/json"});
res.write(JSON.stringify({fname:tempname}));
res.end();
return;
});
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(jsonToStrings).pipe(wstream);
});
}
So, I figured out one way to solve this problem (although there may be others). Basically, I had to add a transformation into the piping system to convert the .getTime() data into a new Date() object. Here's the code snippet which seemed to resolve the issue:
var Transform = require('stream').Transform;
var parser = new Transform({objectMode: true});
parser._transform = function(data, encoding, done) {
if(data.time) data.time = new Date(data.time);
this.push(data);
done();
};
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(parser).pipe(jsonToStrings).pipe(wstream);