How to send a response for large node js request? I have an excel sheet contains 10k items with an updated price. I am uploading the excel from the UI and sending all JSON data in the API for updating the price in the database. Now the request was timed out because of the processing time. How can I handle it in a proper way? How can I send the response after processing all the items?
Use fs.createReadStream to process large files.
i.e. if you're working on csv files you can process each line like this:
const fs = require("fs")
const csv = require("csv-parser")
function processFile() {
fs.createReadStream("./my-file.csv")
.pipe(
csv()
)
.on("data", (data) => {
const value = data.toString()
// processing
})
.on("end", () => {
console.log('Completed.')
// send response
})
}
// watch for file changes. i.e. new. upload
fs.watch('somedir', (eventType, filename) => {
if (filename === 'my-file.csv' && eventType === 'change') {
processFile();
}
});
Related
I am streaming some data from my Node backend to a React frontend, and then am using the streamsaver library to take the stream and save it as a PDF. I was getting the correct number of documents downloaded at the end of the process. However, the pages are blank, suggesting the data is not being encoded properly. So I am trying to use base64 encoding.
My backend code looks like this:
async getNotesPDFStream(args, session, ctx) {
try {
const data = await this.getDataToStream(args, session, ctx);
fs.writeFileSync("./test.pdf", data); // This test works on the backend - creates a PDF
const encodedData = btoa(data); // encoding as a base64 string
const readable = new Readable();
readable.push(encodedData);
readable.push(null);
readable.pipe(ctx.stream);
return Response.Success("PDF now streaming!");
} catch (err) {
console.log("Error 167: ", err);
return Response.Error("Error retrieving progress notes PDF.", err);
}
}
And on the React frontend I do this:
let buffer = '';
dataStream.on('data', (data) => {
buffer += data;
});
dataStream.on('end', () => {
console.log('STREAM FINISHED');
const fileStream = streamSaver.createWriteStream('notes.pdf');
const decodedData = atob(buffer); // Decode the final buffer
console.log('decodedData: ', decodedData);
const pdfBlob = new Blob([decodedData], { type: 'application/pdf' });
const readableStream = pdfBlob.stream();
if (window.WritableStream && readableStream.pipeTo) {
return readableStream
.pipeTo(fileStream)
.then(() => console.log('Done writing'));
}
// Write (pipe) manually
const writer = fileStream.getWriter();
const reader = readableStream.getReader();
const pump = () =>
reader
.read()
.then(res =>
res.done ? writer.close() : writer.write(res.value).then(pump)
);
pump();
After trying this I have the correct number of pages downloaded with the PDF document, but the pages are still blank. What am I missing here? How can I ensure this is encoded properly?
By the way, I tried using some basic html and passed that in the same way, using the same encoding methods, and it reconstructed the html correctly. Is there something specifically about constructing as a PDF that I need to be aware of?
In the end I realized I was making it more complicated than necessary. Using the writer.write() functionality I was able to come up with this terse working solution:
dataStream.on('data', (data) => {
writer.write(data) // should be a Uint8Array
})
dataStream.on('end', () => {
console.log('STREAM FINISHED');
writer.close();
});
Learning how to do large file manipulation with Node and streams I'm stuck in the middle of a file change when passing down the results to a module and I think the process is still in memory when it reaches another module.
I get a zip from an s3 bucket locally and unzip the contents:
try {
const stream = fs.createReadStream(zipFile).pipe(unzipper.Extract({ path }))
stream.on('error', err => console.error(err))
stream.on('close', async () => {
fs.removeSync(zipFile)
try {
const neededFile = await dir(path) // delete files not needed from zip, rename and return named file
await mod1(neededFile) // review file, edit and return info
await mod2(neededFile, data) // pass down data for further changes
return
} catch (err) {
console.log('error')
}
})
} catch (err) {
console.log('stream error')
}
Initial unzip I learned that there is a difference between stream on close and finish because I could pass the file to the first module and start the manipulation but the file, I guess due to the size, output and file never matched. After cleaning the files I dont need I pass the renamed file to mod1 for changes and run a write file sync:
mod1.js:
const fs = require('fs-extra')
module.exports = file => {
fs.readFile(file, 'utf8', (err, data) => {
if (err) return console.log(err)
try {
const result = data.replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
fs.writeFileSync(file, result)
} catch (err) {
console.log(err)
return err
}
})
}
when I tried to do the above with:
const readStream = fs.createReadStream(file)
const writeStream = fs.createWriteStream(file)
readStream.on('data', chunk => {
const data = chunk.toString().replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
writeStream.write(data)
})
readStream.on('end', () => {
writeStream.close()
})
the file would always be blank. After writeFileSync I proceed with the next module to search for a line ref:
mod2.js:
const fs = require('fs-extra')
module.exports = (file, data) => {
const parseFile = fs.readFileSync(file, 'utf8')
parseFile.split(/\r?\n/).map((line, idx) => {
if (line.includes(data)) console.log(idx + 1)
})
}
but the line number returned is that of the initial unzipped file not the file that was modded from the first module. Because I thought the sync process would be for the file it would appear the file being referenced is in memory? My search results for streams when learning about them:
Working with Node.js Stream API
Stream
How to use stream.pipe
Understanding Streams in Node.js
Node.js Streams: Everything you need to know
Streams, Piping, and Their Error Handling in Node.js
Writing to Files in Node.js
Error handling with node.js streams
Node.js Readable file stream not getting data
Node.js stream 'end' event not firing
NodeJS streams not awaiting async
stream-handbook
How should a file be manipulated after an unzip stream and why does the second module reference the file after it was unzipped and not when it was already manipulated? Is it possible to write multiple streams synchronously?
I have a front end react app and a backend node/express app. I want to allow a user to upload a csv file, then parse the file and instantiate a model for each row. However, I am somewhat confused about how to do this, since I am used to simply posting to a route in the API, and persisting the thing from the request body. In this case, the thing from the request body is the file, and I don't want to save the file, just the data inside it. How can I parse the file without saving it to the database? I have tried to use multer to process the upload and csv-parse to parse the contents, but I am not sure this makes sense. Nonetheless, here is the code (app/index):
...
const multer = require('multer');
const upload = multer().single();
const parse = require('csv-parse');
...
router.post('/distributor/:id/files', (req,res) => {
upload(req, res, function (err) {
if (err) {
console.error("An error occurred when uploading. Please try again. Note
that you may only upload one file at a time, and we only support .csv
files.")
return
}
console.log("We have received your file")
})
});
...
// router.get('/distributor/:id/files/:id', (req, res) => {
// File
// .forge({id: req.params.id})
// .fetch()
// .then((file) => {
// if (_.isEmpty(file))
// return res.sendStatus(404);
// return parseJson(file)
// })
// .then((jsonData) => {
// for (var i in jsonData) {
// //save instance of model
// }
// })
// .catch((error) => {
// console.error(error);
// return res.sendStatus(500);
// });
// })
// function parseJson(file) {
// var output = [];
// // Create the parser
// var parser = parse({delimiter: ':'});
// // Use the writable stream api
// parser.on('readable', function(){
// while(record = parser.read()){
// output.push(record);
// }
// });
// // Catch any error
// parser.on('error', function(err){
// console.log(err.message);
// });
// parser.end();
// }
I know this doesn't make sense, since I don't actually want to save the file as a model and table in the database, I just want to save each item inside the file, so I know I cannot make a route called '/distributor/:id/files/:id'. But I am lost as to what to do instead. I hope that what I am trying to do is clear! I am fairly new to node, and programming in general, and I have never come across a situation in which I needed to handle file upload.
You can use this node module to parse the csv file. https://www.npmjs.com/package/csvtojson
For example you have file name users in the request object.
const csv=require('csvtojson');
csv()
.fromString(req.files.users.data.toString('utf8'))
.on('json', (user) => {
console.log(user);
})
.on('done', () => {
console.log('done parsing');
});
You will be able to get every row as a json object.
I have Node App that collects vote submissions and stores them in Cassandra. The votes are stored as base64 encoded encrypted strings. The API has an endpoint called /export that should get all of these votes strings (possibly > 1 million), convert them to binary and append them one after the other in a votes.egd file. That file should then be zipped and sent to the client. My idea is to stream the rows from Cassandra, converting each vote string to binary and writing to a WriteStream.
I want to wrap this functionality in a Promise for easy use. I have the following:
streamVotesToFile(query, validVotesFileBasename) {
return new Promise((resolve, reject) => {
const writeStream = fs.createWriteStream(`${validVotesFileBasename}.egd`);
writeStream.on('error', (err) => {
logger.error(`Writestream ${validVotesFileBasename}.egd error`);
reject(err);
});
writeStream.on('drain', () => {
logger.info(`Writestream ${validVotesFileBasename}.egd error`);
})
db.client.stream(query)
.on('readable', function() {
let row = this.read();
while (row) {
const envelope = new Buffer(row.vote, 'base64');
if(!writeStream.write(envelope + '\n')) {
logger.error(`Couldn't write vote`);
}
row = this.read()
}
})
.on('end', () => { // No more rows from Cassandra
writeStream.end();
writeStream.on('finish', () => {
logger.info(`Stream done writing`);
resolve();
});
})
.on('error', (err) => { // err is a response error from Cassandra
reject(err);
});
});
}
When I run this it is appending all the votes to a file and downloading fine. But there are a bunch of problems/questions I have:
If I make a req to the /export endpoint and this function runs, while it's running all other requests to the app are extremely slow or just don't finish before the export request is done. I'm guessing because the event loop being hogged by all of these events from the Cassandra stream (thousands per second) ?
All the votes seem to write to the file fine yet I get false for almost every writeStream.write() call and see the corresponding logged message (see code) ?
I understand that I need to consider backpressure and the 'drain' event for the WritableStream so ideally I would use pipe() and pipe the votes to a file because that has built in backpressure support (right?) but since I need to process each row (convert to binary and possible add other data from other row fields in the future), how would I do that with pipe?
This the perfect use case for a TransformStream:
const myTransform = new Transform({
readableObjectMode: true,
transform(row, encoding, callback) {
// Transform the row into something else
const item = new Buffer(row['vote'], 'base64');
callback(null, item);
}
});
client.stream(query, params, { prepare: true })
.pipe(myTransform)
.pipe(fileStream);
See more information on how to implement a TransformStream in the Node.js API Docs.
I am basically trying to :
open a csv file as a stream
make some operation on each line
stream the result into a second csv file
in node.js.
Here is my code :
var fs = require("fs");
var csv = require("csv");
var readStream = fs.createReadStream("input.csv");
var writeStream = fs.createWriteStream("output.csv");
var csvStream = csv
.parse()
.on("data", function(data){
//do some stuff with data
return(JSON.stringify(data));
})
.on("end", function(){
console.log("done");
})
.on("error", function(error){
console.log(error)
});
(readStream.pipe(csvStream)).pipe(writeStream);
I am getting "TypeError: Invalid non-string/buffer chunk". What am I doing wrong ? I am totally new to node.js, so please detail your answer.
You are reading correctly the data. However using return is not the correct way to transform your data. CSV Stream cannot at the same time output untransformed data (that you are reading in your data event handler) and transformed data that you would pipe to writeStream.
To use pipe with the writeStream, you would have needed a readableStream outputing your transformed data. That would have meant creating a read/write stream around your transform function, and piping fileReader > csvReader > transformStream > writeStream.
It is way simpler to attach a function to the data event of the csv reader like you did, but you need to manually write to the file.
Correct code code may be more clear this way :
var fs = require("fs");
var csv = require("csv");
var readStream = fs.createReadStream("input.csv"); // readStream is a read-only stream wit raw text content of the CSV file
var writeStream = fs.createWriteStream("output.csv"); // writeStream is a write-only stream to write on the disk
var csvStream = csv.parse(); // csv Stream is a read and write stream : it reads raw text in CSV and output untransformed records
csvStream.on("data", function(data) {
//console.log(data)
writeStream.write(JSON.stringify(data));
})
.on("end", function(){
console.log("done");
})
.on("error", function(error){
console.log(error)
});
readStream.pipe(csvStream)
I have basically done these:
parsed (read csv strings and writes objects and arrays)
stringified (reads objects and arrays and writes csv strings)
piped all that to writeStream to my file
function readingAppendingAndWritingToCSVFile(readStream, writeStream) {
const results = [];
const p = new Promise((resolve, reject) => {
readStream.pipe(csv.parse({ columns: true }))
.on('data', (data) => {
console.log('data --------------', data);
data.name = 'somename'; // will add new column with same data in all rows
console.log('data after pushing ----------', data);
results.push(data);
})
.on('error', (err) => {
console.log('error ------------', err);
reject();
})
.on('finish', () => {
console.log();
console.log('all the csv strings parsed to objects -------------', results);
})
.pipe(csv.stringify({ header: true }))
.pipe(writeStream);
});
return p;