Stream read returning empty in nodejs - node.js

I am using express to create a webservice that will read string data from a stream, and respond to the HTTP POST request with that value. Here is the code for the S3Store.js file that defines the readFileFromS3(.) function:
S3Store.js
S3Store.prototype.readFileFromS3 = function(filename, callback) {
var readConfig = {
'Bucket': 'shubham-test',
'Key': filename
};
var readStream = this.s3.getObject(readConfig).createReadStream();
var allData = '';
readStream.on('data', function(data) {
//data = Buffer.concat([allData, data]);
data = allData + data;
console.log("data: " + data);
});
readStream.on('error', function(err) {
callback(err, null);
});
Now, if I call this method from a terminal like this:
s3Instance.readFileFromS3('123.json', function(err, data) {
console.log(data);
});
I see the appropriate string for data logged to the console. However, when I call the same method from inside one of the routes in express for HTTP POST requests, the service responds with a value of data set to empty string. Code for the POST request:
router.post('/resolve', function(req, res) {
var commandJson = req.body;
var appId = commandJson['appId'];
var command = commandJson['text'];
if (appId == undefined || command == undefined) {
res.status(400).send("Malformed Request: appId: " + appId + ", command: " + command);
};
s3Store.readFileFromS3('123.json', function(err, data) {
res.send(data);
});
});
Why does it return an empty string when calling the readFileFromS3(.) from the HTTP POST method and not when I ran the same method directly from the node console?

You're logging the data but you're not passing anything to the completion callback (see below for some more explanation):
S3Store.prototype.readFileFromS3 = function(filename, callback) {
var readConfig = {
'Bucket': 'shubham-test',
'Key': filename
};
var readStream = this.s3.getObject(readConfig).createReadStream();
var allData = [];
// Keep collecting data.
readStream.on('data', function(data) {
allData.push(data);
});
// Done reading, concatenate and pass to completion callback.
readStream.on('end', function() {
callback(null, Buffer.concat(allData));
});
// Handle any stream errors.
readStream.on('error', function(err) {
callback(err, null);
});
};
I took the liberty to rewrite the data collection to use Buffer's instead of strings, but this obviously isn't a requirement.
The callback argument is a completion function, meant to be called when either reading the S3 stream is done, or when it has thrown an error. The error handling was already in place, but not the part where you would call back when all the data from the stream was read, which is why I added the end handler.
At that point, the readStream is exhausted (everything from it has been read into allData), and you call the completion callback when the collected data as second argument.
The common idiom throughout Node is that completion callbacks take (at least) two arguments: the first is either an error, or null when there aren't errors, and the second is the data you want to pass back to the caller (in your case, the anonymous function in your route handler that calls res.send()).

Related

Node.js function not running in order. Error: Unhandled stream error in pipe

I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)

Piping transformed read stream into request.post()

I'd like parse a log file and POST what is read to a request endpoint. I've managed to build a solution that generates a request for every log line read. However, it doesn't create any back pressure so it just flogs the server and I'd like to slow it down.
This lead me to investigate using stream pipes to see if I could route data from a file directly into request.post(). I can't get the post call to post a body object though.
var stream = require('stream');
var request = require('request');
var liner = new stream.Transform( { objectMode: true } );
liner._transform = function (chunk, encoding, done) {
var data = chunk.toString()
if (this._lastLineData) data = this._lastLineData + data
var lines = data.split('\n')
this._lastLineData = lines.splice(lines.length-1,1)[0]
var that = this;
lines.forEach(function(line) {
var line_obj = JSON.parse(line);
if( line_obj.url === "/api/usages" && line_obj.method === 'POST' ) {
var req_body = line_obj.body.body;
that.push.bind(req_body);
}
});
done();
}
var file_name = process.argv[2];
console.log('Reading from ' + file_name);
var fs = require('fs')
var liner = require('./liner')
var source = fs.createReadStream(file_name)
source.pipe(liner).pipe(request
.post("http://localhost:8081/api/usages")
.on('response', function(response) {
console.log(response.statusCode) // 200
})
.on('error', function(err) {
console.log(err);
}));
The push call in the transform function is working correctly, but it's not posting that object via the body in request.post().
What am I missing?
Will this provide the back pressure I'm looking for to throttle the POST calls before all of the file reads are completed?
I've discovered that you cannot pipe a stream to an HTTP request because you would need the Content-Length known before hand (as per spec). The less pleasant alternative is to multipart the upload - as chunks are read from your transform they would marshal parts to the receiving API. This also means the receiving API needs to be able to receive multipart uploads and reassemble the whole file after all parts have been received and confirmed. AWS S3 has multipart uploads and it might be a nice example: http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html
I wanted to pipe my transform data to another API that I manage but it seems the effort is not likely worth it considering my files really aren't that big. I'll update this answer if I change my mind :)
Although I wasn't able to find a solution to the streaming question, I found a simple solution to the back pressure question.
I used async.queue to push work into a simple task queue.
// build the send queue
var pool = new http.Agent({keepAlive: true, keepAliveMsecs: 10000, maxSockets: Math.floor(send_queue_concurrency*1.5)});
var q = async.queue(function(task, callback){
request({
url : 'http://localhost:8081/xxxxxx',
method : 'POST',
json : task.req_body,
gzip : true,
pool : pool,
timeout: 30000
}, function(error, response, body){
if(error) {
console.log('request error : ' + error);
post_status.fail++;
} else {
if( response.statusCode === 400 ) {
console.dir(body);
}
}
callback();
});
}, send_queue_concurrency);
q.drain = done;
send_queue_concurrency is the primary lever for controlling request pressure.
i'm pushing work into the queue with a file parsing routine :
rl.on('line', function(line) {
line_count++;
try {
var line_object = JSON.parse(line);
var req_body = line_object.body.body;
q.push({req_body:req_body, line_object:line_object}, function(err){
if (err){
console.log('queue error! '+JSON.stringify(err));
}
});
} catch( e ) {
console.dir(e);
}
});
var done = function() {
// print out some reporting stats...
// console.log('xxxxxx');
console.log('\ndone.');
process.exit(0);
};

response is in junked format in node.js

I am developing an API using Node.js. In my application when i hit an URL though browser i gto JSON response in my browser perfectly.. But when i get the response through my node.js code, its coming as junk.
Consider i am hitting the following url in browser:
localhost:2000/xxxxx/rrrrr/ggggg
I am receiving perfect output.
The following is the node.js code:
proxyReq.on("response", function(proxyRes) {
var body = ''
try{
proxyRes.on("data", function(chunk) { //Capture API response here---revisit
body += new Buffer(chunk, 'binary').toString();
//console.log("cccc=" +chunk)
/*zlib.unzip(chunk.toString(), function(err, chunk){
console.log("Inside zliib");
if (!err){
console.log('Response'+chunk.toString())
} else {
console.log("Inside zlib error");
}
});*/
//body = chunk.toString();
//console.log(chunk.toString('utf-8'));
console.log('cccccccc=' +body);
});
}catch(err){
console.log("errrr=" +err.stack);
}
}
Here the 'body' is printing as some junked data. I tried for "utf-8" and "binar" nothing works. Help me to solve this. Thanks in advance.
Mu Junked Data:
Ys��ǿJ����}񛃉/c�؎}�%�ld� a�T��mIL�Z� ]�T��I�r����ߕ�h4��ϕ�7t{�Q4��8���\�L�N؛T��VWM�r
?W&Q��N&/Q����|�W9??W��t�b�n:t>:��(t��G��K��w��=��r\���_��W�N�c��{���u�ۺU���m�^���z�'�ǫ��LFQ�uc���s�>��f
I didn't install using npm:
Third party code regarding ProxyReq:
var ended, mod, proxyReq, req_options;
if (err) {
return _this.error(err, req, res);
}
mod = req.api.data.protocol === "https" ? https : http;
req_options = _this.getHttpProxyOptions(req);
req_options.agent || (req_options.agent = new mod.Agent({
maxSockets: 100,
rejectUnauthorized: req.api.data.strictSSL
}));
_this.logger.debug(("Backend: " + req_options.method + " to ") + ("'" + req.api.data.protocol + "://") + ("" + req_options.host + ":" + req_options.port + req_options.path));
proxyReq = mod.request(req_options);
proxyReq.setTimeout(req.api.data.endPointTimeout * 1000, function() {
var e;
e = new Error("ETIMEDOUT");
e.code = "ETIMEDOUT";
proxyReq.emit("error", e);
return proxyReq.abort();
});
ended = false;
I found your problem. This is the documentation of event response for http/https
Emitted when a response is received to this request. This event is emitted only once. The response argument will be an instance of http.IncomingMessage.
And http.IncomingMessage is a ReadableStream.
After that, you should use this two events of stream to convert this into String :
Event: 'data'
Event: 'end'
For stream.on('data', ...) you should collect your data data into either a Buffer (if it is binary) or into a string.
For on('end', ...) you should call a callback with you completed buffer, or if you can inline it and use return using a Promises library.
Example, for you, you can change your proxyReq.on("response") callback content by this :
var Buffer = require('buffer').Buffer;
proxyReq.on("response", function(chunk) {
//proxyRes.setEncoding ('utf8');
var body = '';
chunk.on('data', function(data) {
body += new Buffer(data, 'binary').toString();
});
chunk.on('end', function () {
// callback
});
chunk.on('error', function (err) {
// catchable error
});
});
To simplify you request part, I recommand you to use the node-request package : https://github.com/mikeal/request

How can I make the results of a csv file-to-multidimensional array available globally?

If I have a global array:
var people = [];
And I have the following function:
function readFile() {
var IN = require('ya-csv');
var filePath = 'data.csv';
var reader = IN.createCsvFileReader(filePath, {
'separator': ','
});
reader.on('data', function(item) {
people.push(item);
});
}
The people array only seems scoped inside reader.on. How can I use the people array globally?
Your code is perfectly right if the people variable is declared outside the readFile function, which seems to be the case.
I guess that your problem is something like this:
var people = [];
function readFile() {
var IN = require('ya-csv');
var filePath = 'data.csv';
var reader = IN.createCsvFileReader(filePath, {
separator: ',' // quotes around property name are optional
});
reader.on('data', function(item) {
people.push(item);
});
}
readFile();
console.log(people); // <- people is empty
This behaviour is absolutely normal. As ya-csv process incoming data asynchronously, you have to wait for processing to be finished.
That's the purpose of the end event, triggered by CsvReader when it has finished (unfortunately not documented on ya-csv documentation)
Refactoring like this will work better:
// make filePath a parameter, and use a callback function
function readFile(filePath, callback) {
// make people scoped to readFile()
var people = [];
var IN = require('ya-csv');
var reader = IN.createCsvFileReader(filePath, {
separator: ',' // quotes around property name are optional
});
// data is emitted for each processed line
reader.on('data', function(item) {
// closure magic: people is accessible because current function is nested into readFile()
people.push(item);
});
// end event
reader.on('end', function() {
// return results to caller, simply by invoking the callback.
// by convention, first argument is an error, which is null it no problem occured
callback(null, people);
});
// error handling
reader.on('error', function(err) {
// stop listening on events, to avoid continuing queuing data
reader.removeAllListeners();
// report to caller the error.
callback(err);
}
}
readFile('data.csv', function(err, results) {
if (err) {
// error handling
return ...
}
// nominal case: use results that contains peoples !
console.dir(results);
});
Please ask question with comments if something is not clear.
== EDIT ==
Alternatively, you can use a variable outside readFile()
// notice: people is declared outside readFile
var people = []
// make filePath a parameter, and use a callback function
function readFile(filePath, callback) {
var IN = require('ya-csv');
var reader = IN.createCsvFileReader(filePath, {
separator: ',' // quotes around property name are optional
});
// data is emitted for each processed line
reader.on('data', function(item) {
// closure magic: people is accessible because current function is nested into readFile()
people.push(item);
});
// end event: directly invoke callback
reader.on('end', callback);
// error handling
reader.on('error', function(err) {
// stop listening on events, to avoid continuing queuing data
reader.removeAllListeners();
// report to caller the error.
callback(err);
}
}
readFile('data.csv', function(err) {
if (err) {
// error handling
return ...
}
// you cannot use people before here, because you have no garantie that read process is finished.
console.dir(people);
});
The drawback of this code is that calling readFile() multiple times will enqueue in the same variable, which is not modular nor reliable.

node.js server handle request callback function ending before writing response

I have an http server with a handleRequest callback that runs another script in vm.runInNewContext for each request. The script that runs inside vm.runInNewContext makes some asynchronous http post requests and writes the server response only after getting the responses from the posts.
As a result, the code of handleRequest callback ends before the server response is written.
Is it safe? or is there a way to avoid this situation?
Here is some code:
var server = http.createServer(handleRequest);
server.listen(8080);
var handleRequest = function (request, response) {
// get request data...
var context = {
ServerRequest : request,
ServerResponse : response
};
var stringScript = // a string with the script that posts data
var script = vm.createScript(stringScript);
script.runInNewContext({ context: context });
}
the script string does this:
var request = require('request');
var options = {....}
var req = request.get(options);
req.on('response', function (res) {
var chunks = [];
res.on('data', function(chunk) {
chunks.push(chunk);
});
res.on('end', function() {
var buffer = Buffer.concat(chunks);
var encoding = res.headers['content-encoding'];
if (encoding == 'gzip') {
zlib.gunzip(buffer, function(err, decoded) {
// set response headers and write the response
context.ServerResponse.end(decoded.toString());
});
} else if (encoding == 'deflate') {
zlib.inflate(buffer, function(err, decoded) {
// set response headers and write the response
context.ServerResponse.end(decoded.toString());
})
} else {
// set response headers and write the response
context.ServerResponse.end(buffer.toString());
}
});
});
Simple solution: Return a promise (e.g. use the Q-library) from the VM-script.
script.runInNewContext will return whatever you return from the VM-script. That way you have a "callback" for when the VM code finishes.
// Script for VM
// I simplified it. Just resolve or reject the promise whenever you are done with your work
'use strict';
var defer = q.defer();
doABarrelRoll(function() {
defer.resolve('RESULT');
});
defer.promise; // This line will return the promise.
When returning a value from a VM-script, you do not need any return construction. Just write the thing you want and let the magic happen.
// Script for current context
'use strict';
var server = http.createServer(handleRequest);
server.listen(8080);
var handleRequest = function (request, response) {
// get request data...
var context = {
ServerRequest : request,
ServerResponse : response
};
var stringScript = // a string with the script that posts data
var script = vm.createScript(stringScript);
var prom = script.runInNewContext({
context: context,
q: require('q'),
});
prom.done(function ($result) {
console.log('VM finished with result: ' + $result);
});
}

Resources