Nodejs CSV data export system for users - node.js

I need to allow users to export their data in csv format. I have written app in nodejs. The export data for users can be huge. So i was wondering How to handle such situation in nodejs. Should i user process.nexttick or child process api of nodejs? Also are there any good module available for nodejs to convert data from mysql to csv.

read line by line from your mysql-db, and append line by line to your file
i dont know that much about the mysqlmodule, so i'm assuming here each line is just an array, therefore the 'row.join(';')'. if thats not the case (maybe its an object), you should fix that.
var fs = require('fs');
var connection = require('mysql').createConnection({yourdbsettingshere});
function processRow (row) {
fs.appendFile('your-file.csv', row.join(';'), function (err) {
connection.resume();
});
}
var query = connection.query('SELECT * FROM WHATEVER');
query
.on('error', function(err) {
// do something when an error happens
})
.on('fields', function(fields) {
processRow(fields);
})
.on('result', function(row) {
// Pausing the connnection is useful if your processing involves I/O
connection.pause();
processRow(row, function (err) {
connection.resume();
});
})
.on('end', function() {
// now you can mail your user
});
if you have a lot of requests, you could use the compute-cluster module for distributing your workload

The accepted answer is not working because CSV files are separated by , not ;. Also there is no newline character \n after the end of each row and the fields object contains information about the column attributes not the data rows. results contains the rows resulted from query. Hence I wrote my own code for generating CSV files. If you need more explanation then please comment, I will provide.
pool.query('SELECT * FROM category', function (error, results, fields) {
var reportFile = Date.now();
fs.closeSync(fs.openSync(__dirname + '/../reports/' + reportFile + '.csv', 'w'));
var attributes = [];
var row = [];
for(var x = 0; x<fields.length; x++) attributes.push(fields[x].name);
fs.appendFile(__dirname + '/../reports/' + reportFile + '.csv', attributes.join(','), function (err) {
if(err) console.log('Error appending fields', err);
fs.appendFileSync(__dirname + '/../reports/' + reportFile + '.csv', '\n');
for(var x = 0; x<results.length; x++) {
row = [];
for(var y = 0; y<attributes.length; y++){
row.push(results[x][attributes[y]]);
}
fs.appendFileSync(__dirname + '/../reports/' + reportFile + '.csv', row.join(','));
fs.appendFileSync(__dirname + '/../reports/' + reportFile + '.csv', '\n');
}
req.reportFile = reportFile;
next();
});
});

Related

Leaflet.js draw line along rivers between two points

I am looking to draw a line along a few rivers (they merge so the river name technically changes) with Leaflet.js. I am currently using a Mapbox custom map style to display the map but I'm at a loss of how to "draw" a line along these rivers, from one marker to another.
EDIT
Thanks to #ghybs who pointed me in the right direction (below).
I now have this code which works perfectly for getting the data. However. The problem is that the nodes are not "in order". I'd like the nodes to be in order with regards to the river, so that I can draw the line. Currently, because they are not sequential, the line is all over the place.
The code is utilising Request to get the data, hence the calls are asynchronous. I think this is what's leading to the ordering issue.
var request = require("request");
var parseString = require("xml2js").parseString;
var fs = require("fs");
var results = [];
request("https://www.openstreetmap.org/api/0.6/relation/5806846", function(error, response, body){
// var body = fs.readFileSync("relation.xml");
var total_requests = 0;
var completed_requests = 0;
parseString(body, function(err, result){
var ways = result.osm.relation[0].member;
console.log("Initial requests: " + ways.length);
total_requests += ways.length;
for (var i = 0; i < ways.length; i++) {
var way = ways[i].$.ref;
(function(way, i){
setTimeout(function(){
request("https://www.openstreetmap.org/api/0.6/way/"+way, function(error, response, body){
completed_requests++;
if (error) {
console.log(error);
console.log("https://www.openstreetmap.org/api/0.6/way/" + way + " failed");
}
else {
parseString(body, function(err, result){
var nodes = result.osm.way[0].nd;
console.log("Total requests " + + nodes.length);
total_requests += nodes.length;
for (var i2 = 0; i2 < nodes.length; i2++){
var node = nodes[i2].$.ref;
(function(node, i){
setTimeout(function(){
request("https://www.openstreetmap.org/api/0.6/node/"+node, function(error, response, body){
completed_requests++;
if (error) {
console.log(error);
console.log("https://www.openstreetmap.org/api/0.6/node/" + node + " failed");
}
else {
parseString(body, function(err, result){
var lat = result.osm.node[0].$.lat;
var long = result.osm.node[0].$.lon;
results.push([lat, long]);
});
console.log(total_requests + "/" + completed_requests);
if (completed_requests == total_requests){
console.log("Done");
console.log("Got " + results.length + " results");
fs.writeFile("little_ouse.json", JSON.stringify(results), function(err) {
if (err) {
return console.log(err);
}
console.log("The file was saved");
});
}
}
});
}, i * 1000);
})(node, i2);
}
});
}
});
}, i * 1000);
})(way, i)
}
});
});
It sounds like you would like to extract your rivers paths from the OSM database (which is what Mapbox studio uses to let you customize your basemap style).
On OpenStreetMap main website, you have a big "Export" button at the top. You can use it to extract all the data contained in a given bounding box, including the coordinates of the paths for your rivers.
Then you can use other tools to convert to GeoJSON and keep only the data related to your rivers (e.g. http://geojson.io/).
Once you have your data as GeoJSON, you can easily display it on Leaflet using L.geoJson(myGeoJSONdata).addTo(map)

Nodejs download multiple files

I need to download ~26k images. The images list and urls are stored in csv file. Im reading the csv file and trying to download the images while looping through the list.
If im using small set ~1-2k it works fine but when i switch to the full set im getting EMFILE error.
Error: EMFILE, open 'S:\images_download\Images\189900008.jpg'
I've noticed that node tries to create all the files at once and this might be the issue but i'm unable to force it to create it one by one. My understanding is the code below should work like this but obviously is not.
(Just to mention that this code is executed on Windows)
Code:
var csv = require("fast-csv");
var fs = require('fs');
var request = require('request');
var async = require('async');
fs.writeFile('errors.txt', '', function(){})
var downloaded = 0;
var totalImages = 0;
var files = [];
csv
.fromPath("Device_Images_List.csv")
.on("data", function(data){
files.push({device: data[0], url: data[1]})
})
.on("end", function(){
totalImages = files.length;
async.each(files, function(file, callback) {
var deviceId = file.device;
var deviceUrl = file.url;
if ( deviceId != 'DEVICE_TYPE_KEY' ) {
try {
writeStream = fs.createWriteStream('./Images/' + deviceId + '.jpg');
proxiedRequest = request.defaults({proxy: "http://proxy:8080"});
proxiedRequest(deviceUrl).pipe(writeStream);
writeStream.on('open', function(fd) {
var rem = proxiedRequest.get(deviceUrl);
rem.on('data', function(chunk) {
writeStream.write(chunk);
});
rem.on('end', function() {
downloaded++;
console.log('Downloaded: ' + deviceId + '; ' + (downloaded + 1) + ' of ' + totalImages);
writeStream.end();
});
});
writeStream.on('close', function(){
callback();
});
} catch (ex) {
fs.appendFile('errors.txt', deviceId + ' failed to download', function (err) {
callback();
});
}
}
}, function(err){
if( err ) {
console.log(err);
} else {
}
});
});
As #slebetman commented the issue can be solved by using async.eachSeries to process the files one by one or async.eachLimit to limit the parallel nodes:
async.eachLimit(files, 5, function(file, callback) {
// ... Process 5 files at the same time
}, function(err){
});

Request makes process out of memory, when downloading big media files

I wrote a simple script to download video files from a CDN, where the direct URLs are simple to generate, for example http://something.com/N.mp4, where N is a number.
The problem is, when downloading files with larger than ~300MB, the files appears perfectly in hard drive, but before the request(...)'s callback, a memory allocation fail happens:
FATAL ERROR: CALL_AND_RETRY_0 Allocation failed - process out of memory
Does this happens because of some serious bad practice? Can request download media files, with this size?
Environment: Win7, 4GB+ free RAM, Node v0.10.31
var request = require('request');
var async = require('async');
var fs = require('fs');
var start = +process.argv[2] || 1;
var end = +process.argv[3] || 50;
var url = 'http://something.com/';
try {
fs.mkdirSync(__dirname + '/videos/');
} catch (e) {}
var index = start;
async.whilst(
function () { return index <= end; },
function (callback) {
var fileName = index + '.mp4';
console.log('Started: ' + fileName);
console.time('Done (' + fileName + ')');
request(url + fileName, function() {
console.timeEnd('Done (' + fileName + ')');
index++;
callback(null);
}).pipe(fs.createWriteStream(__dirname + '/videos/' + fileName));
},
function (err) {
if (err) {
return console.error(err);
}
console.log('Script finished.');
}
);
Example console output:
> node index.js 3
Started: 3.mp4
Done (3.mp4): 296592ms
Started: 4.mp4
Done (4.mp4): 369718ms
Started: 5.mp4
FATAL ERROR: CALL_AND_RETRY_0 Allocation failed - process out of memory
If you use request module with a callback it buffers the whole response body in memory. Try omitting callback and using finish event of fs stream instead.
var writer = fs.createWriteStream(__dirname + '/videos/' + fileName);
writer.on('finish', function() {
// ...
index++;
callback(null);
});
request(url + fileName).pipe(writer);
It looks like you're trying to download videos 3 to 50 all in parallel, so that might be what's causing you to run out of memory. You could try doing them in series and see if that fixes the problem. With async.waterfall your code might look something like this:
var tasks = [];
for (; index < end; index++) {
tasks.push(function(callback) {
var fileName = index + '.mp4';
console.log('Started: ' + fileName);
console.time('Done (' + fileName + ')');
request(url + fileName, function() {
console.timeEnd('Done (' + fileName + ')');
callback(null);
}).pipe(fs.createWriteStream(__dirname + '/videos/' + fileName));
});
}
async.waterfall(tasks, function(err) {
if (err) {
return console.error(err);
}
console.log('Script finished.');
});

encoding and save images

I am a beginner with node.js and I would ask you for advice. Using a selection from the database I get the required data (ID, building, floor, map). The map is stored in jpg format as a byte array 0x89504E470D0A1A0A0000000D4948... . I save to a new database id, building, floor and reduced map (where quality is reduced through https://www.npmjs.com/package/images), which is now Unicode-encoded(http://puu.sh/g6UZ2/90408c7b12.png). Could you please advise me how to convert a reduced size map to a byte array and then store it in the database as a BLOB. Thank you and sorry for my English
var myQueryMaps2 = ' SELECT ' +
'id, budova, patro, mapa ' +
'FROM dbo.cis_mapyBudov';
app.get('/originalMaps', function (req, res) {
var request = new sql.Request(connection);
request.query(' DELETE FROM dbo.cis_mapyNewBudov', function (err) {
if (err) {
console.log(err);
}
});
request.query(myQueryMaps2, function (err, rows, recordset) {
if (!err && res.statusCode === 200) {
for (var i = 0; i < rows.length; i++) {
var insertSQL = "INSERT INTO dbo.cis_mapyNewBudov (id, budova, patro, mapa) VALUES ('" + rows[i].id +
"', '" + rows[i].budova + "', '" + rows[i].patro + "', '" + images(rows[i].mapa).toBuffer('jpg', {quality: 50}) +
"')";
request.query(insertSQL, function (err) {
if (err)
console.log(err);
else
console.log('save');
});
}
} else {
console.log('error original maps', err);
throw new Error('Can not download original maps');
}
});
});
Use input parameters, it's almost always a bad idea to concatenate values into TSQL.
var request = new sql.Request(connection);
request.input('id', rows[i].id);
request.input('budova', rows[i].budova);
request.input('patro', rows[i].patro);
request.input('mapa', sql.VarBinary(MAX), images(rows[i].mapa).toBuffer('jpg', {quality: 50}));
request.query('INSERT INTO dbo.cis_mapyNewBudov (id, budova, patro, mapa) VALUES (#id, #budova, #patro, #mapa)', function (err) {
// ...
});
Hint: Always create a new Request for each query. That's how the driver is designed. Reusing Request object might cause unexpected behavior.

Node.js CSV module

I'm using a CSV module as in the example below.
I was wondering if there is a way to make sure that all the fields aren't inserted as strings, e.g. the number 1.23 is not inserted as string "1.23".
It seems to make everything type string.
var csv = require('csv');
var q = async.queue(myCollection.insert.bind(myCollection), 50);
csv()
.from.path(req.files.myCSV.path, {columns: true})
.transform(function(data, index, cb){
q.push(data, function (err, res) {
if (err) return cb(err);
cb(null, res[0]);
});
})
.on('end', function () {
q.drain = function() {};
})
In csv.transform(), before q.push(), you can convert fields using e.g. parseInt:
data.fieldX = parseInt(data.fieldX);
data.fieldY = parseFloat(data.fieldX);
You could also delete data.fieldY; or add fields data.fullName = data.first + ' ' + data.last;

Resources