Using promises with download module - node.js

I am using bluebird for promises.
I am trying to promisify the download module.
Here is my implementation:
Promise = require('bluebird'),
download = require('download');
var methodNameToPromisify = ["download"];
function EventEmitterPromisifier(originalMethod) {
// return a function
return function promisified() {
var args = [].slice.call(arguments);
// Needed so that the original method can be called with the correct receiver
var self = this;
// which returns a promise
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = originalMethod.apply(self, args);
emitter
.on("response", function(data) {
resolve(data);
})
.on("data ", function(data) {
resolve(data);
})
.on("error", function(err) {
reject(err);
})
.on("close", function() {
resolve();
});
});
};
};
download = { download: download };
Promise.promisifyAll(download, {
filter: function(name) {
return methodNameToPromisify.indexOf(name) > -1;
},
promisifier: EventEmitterPromisifier
});
Then using it:
return download.downloadAsync(fileURL, copyTo, {});
My problem is that it doesn't download all of the files (I have a list sent to this function), what am I doing wrong?

An emitter does emit multiple data events, one for every chunk it receives. However, a represents only one future value, in your case you want that to be the complete response.
resolve is supposed to be called only once, to fulfill the promise with the passed value, which is then settled. Further calls will have no effect - and that's why you get only the first parts of your list.
Instead, you will need to accumulate all the data, and when the stream ends you can fulfill the promise with all of it.
var Promise = require('bluebird'),
download = require('download'),
Buffer = require('buffer'); // should be global anyway
exports = {
downloadAsync: function promisifiedDownload() {
var args = arguments, self = this;
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = download.apply(self, args);
var buffers = [];
emitter.on("data", function(data) {
buffers.push(data);
}).on("error", function(err) {
reject(err);
}).on("close", function() {
resolve(Buffer.concat(buffers));
});
});
};
};
Notice it's quite nonsensical to use promisifyAll when you only want to promisify a single method. I've omitted it for simplicity
You might also listen for the incoming response object, and attach the data listener directly to it. You can then use the end event instead of close.

Related

Need to confirm how callbacks and errors work in async.each with try-catch blocks

I've been tinkering with this for a few days now and have seen a number of different patterns. In some ways I feel more confused than I did when I began!
itemsArr is a list of item objects (itemObj) with summary information about each item. Each itemObj contains an itemId which doubles as the API slug directory. So, I need to iterate through the itemsArr, make an API call for each item, and return the updated array with all of the details that were retrieved from each API call. When this is finished, I want to log the enriched array, enrichedItemsArr to persistant storage.
It does not matter in what order the API calls return, hence using async.each. I also don't want to interrupt the execution if an error occurs. My questions:
'Done enriching array' is printing before execution of enrichArr() -> why is await async.each... in enrichArr() not blocking?
I am getting TypeError: callback is not a function in the inner try-catch. Not sure why.
If I pass err to callback() in the inner try-catch, will that halt execution?
Should I pass itemsArr to processDone as the 2nd argument? Is there a way to return itemsArr to main() from the processDone() method?
Does err passed to the final callback contain an array of errors?
const main = async () => {
const itemsArr = items.getArr(); // --> retrieves locally cached itemsArr
const enrichedItemsArr = await enrichArr(itemsArr); // --> handling the async iterator stuff below
await logToDB(enrichedItemsArr); // --> helper function to log enriched info to database
console.log('Done enriching array');
};
const enrichArr = async (itemsArr) => {
// Outer try-catch
try {
const processItem = async (item, callback) => {
// Inner try-catch
try {
const res = await doSomethingAsync(itemID);
item.res = res;
callback(); // --> currently getting `TypeError: callback is not a function`
} catch (err) {
item.err = err;
callback(err); // --> not 100% sure what passing err here does...
}
};
const processDone = (err, itemsArr) => {
if (err) console.error(err); // --> Is err an array of errors or something?
return itemsArr; // --> how do I return this to main()?
};
await async.each(itemsArr, processItem, processDone);
} catch (err) {
throw err; // --> if async.each errors, throw
}
};
Hope this is a good answer for you.
why is await async.each... in enrichArr() not blocking?
Based on the docs, the async.each will return a promise only if the callback is omitted
each
You're including the callback, the async.each won't return a promise and won't block your code using async/await
I am getting TypeError: callback is not a function in the inner try-catch. Not sure why.
Your processItem should be a plain function, doing that I was able to use callback, seems that the library is not happy when you use async functions
const processItem = (item, callback) => {
const itemId = item;
// Inner try-catch
try {
doSomethingAsync((res) => {
item.res = res;
callback()
});
} catch (err) {
item.err = err;
callback(err); // --> not 100% sure what passing err here does...
}
};
If I pass err to callback() in the inner try-catch, will that halt execution?
Yes, it will throw an error
Should I pass itemsArr to processDone as the 2nd argument? Is there a way to return itemsArr to main() from the processDone() method?
If you want to let know the main method that needs to wait, you won't be able to use processDone.
ItemsArr is an object, you can mutate the object and the main method should be able to see those changes, there is no other way if you want to use array.each.
Maybe there is another method in the async library that allows you to return a new array.
Maybe Map is a good option map
Does err passed to the final callback contain an array of errors?
No, it's a way to let the library know that needs to throw an error.
I created a snippet to allow you to play with the code
const async = require('async');
const logToDB = async (items) => {
items.forEach((item) => console.log(JSON.stringify(item)))
}
const doSomethingAsync = (callback) => {
setTimeout(() => {
console.log('processing data')
callback()
}, 1000);
}
const main = async () => {
const itemsArr = [
{
itemId: '71b13422-2582-4975-93c9-447b66764daf'
},
// {
// errorFlag: true
// },
{
itemId: '8ad24197-7d30-4514-bf00-8068e216e90c'
}
]; // --> retrieves locally cached itemsArr
const enrichedItemsArr = await enrichArr(itemsArr); // --> handling the async iterator stuff below
await logToDB(enrichedItemsArr); // --> helper function to log enriched info to database
console.log('Done enriching array');
};
const enrichArr = async (itemsArr) => {
// Outer try-catch
try {
const processItem = (item, callback) => {
console.log('item: ', item);
const itemId = item;
// Inner try-catch
try {
if (item.errorFlag) {
return callback('Test error');
}
doSomethingAsync((res) => {
item.res = res;
callback()
});
} catch (err) {
item.err = err;
callback(err); // --> not 100% sure what passing err here does...
}
};
await async.each(itemsArr, processItem);
return itemsArr;
} catch (err) {
console.log('Error occurred');
throw err; // --> if async.each errors, throw
}
};
main();

Question about end of request for node/JS request package

I'm trying to understand what .on('end', ...) does in the node package request.
My code:
const fs = require('fs');
const request = require('request');
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
let writeStream = fs.createWriteStream(fileName);
var remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
let stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
} catch (err) {
reject(err);
}
});
}
What I'm trying to do is download a remote image, get some file statistics, and then resolve the promise so my code can do other things.
What I'm finding is that the promise doesn't always resolve after the file has been downloaded; it may resolve a little before then. I thought that's what .on('end', ... ) was for.
What can I do to have this promise resolve after the image has been downloaded in full?
As the docs say:
The writable.write() method writes some data to the stream, and calls the supplied callback once the data has been fully handled.
So, writable.write() is asynchronous. Just because your last writeStream.write has been called does not necessarily mean that all write operations have been completed. You probably want to call the .end method, which means:
Calling the writable.end() method signals that no more data will be written to the Writable. The optional chunk and encoding arguments allow one final additional chunk of data to be written immediately before closing the stream. If provided, the optional callback function is attached as a listener for the 'finish' event.
So, try calling writeStream.end when the remoteImage request ends, and pass a callback to writeStream.end that resolves the Promise once the writing is finished:
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
const writeStream = fs.createWriteStream(fileName);
const remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
writeStream.end(() => {
const stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
});
} catch (err) {
reject(err);
}
});
}
(also try not to mix var and let/const - in an ES6+ environment, prefer const, which is generally easier to read and has fewer problems, like hoisting)

Having callback within Node Promises

Using child process I execute a Python script does something a spits data back. I used a Node promise to wait until I get the Python data.
The problem I am facing is there is a callback for an anonymous function, the callback takes two parameters one of which is the python data. Code below explains. How do I call the promise, wait until it resolves then call the callback.
Node Promise
var spawn = require("child_process").spawn;
function sensorData()
{
return new Promise(function(resolve, reject)
{
var pythonProcess = spawn ("python",[pythonV1.py"]);
pythonProcess.stdout.on("data", function(data)
{
resolve(data);
});
});
}
Anonymous Function
...
onReadRequest : function(offest, callback)
{
#============DOES NOT WORK=========================
sensorData()
.then(function(data)
{
callback(this.RESULT_SUCCESS, data);
})
#===================================================
#call promise, wait and then call callback passing the python data
callback(this.RESULT_SUCCESS, new Buffer(#python data)
}
...
Many thanks
Unless you know that your pythonProcess will only return one line of data, it's bad practice to call resolve() on every stdout data call. It would be much better to collect data until the process closes, and return it all at once.
I'm also not used to dealing with buffers, so I'm casting stuff to strings here...
var spawn = require("child_process").spawn;
function sensorData()
{
return new Promise(function(resolve, reject)
{
var output = '';
var pythonProcess = spawn ("python",[pythonV1.py"]);
pythonProcess.stdout.on("data", function(data)
{
output += data.toString();
});
// Not sure if all of these are necessary
pythonProcess.on('disconnect', function()
{
resolve(output);
});
pythonProcess.on('close', function(code, signal)
{
resolve(output);
});
pythonProcess.on('exit', function(code, signal)
{
resolve(output);
});
});
}
...
onReadRequest : function(offest, callback)
{
#call promise, wait and then call callback passing the python data
sensorData()
.then(function(data)
{
callback(this.RESULT_SUCCESS, data);
})
.catch(function(err)
{
// Do something, presumably like:
callback(this.RESULT_FAILURE, err);
});
}
...

Array.push is not working with promisified function but callback is not retaining the message

Note - Message variable is not retaining data after calling promisified functions. Callback is giving null array.
Code -
'use strict';
const Promise = require('bluebird');
let _connectResolve, _connectReject, onConnected = new Promise((resolve, reject) => {
_connectResolve = resolve;
_connectReject = reject;
}),
redis = require("redis"),
redisClient = redis.createClient({
host: 'localhost',
port: 6379
});
Promise.promisifyAll(redis.RedisClient.prototype);
redisClient.on('connect', _connectResolve);
const results = Promise.all([
'it/0I0g2I3D312s192u0U3k/10es.zip',
'items/25210B0c0Q1L3u0X462g/10ges.zip',
'items/2x0n440V1A1n3x1y0f1K/Fs.zip',
'items/2l023931u0w1S2a3j/es.zip',
'items/2O2x212i3t0B2h/es.zip',
]);
var message = [];
var a = Promise.promisify(function(callback) {
results.map(function(result) {
redisClient.getAsync(result).then(function(reply) {
if (reply == null) {
message.push({
"key": result,
"bucket_name": 'dsdds'
});
}
//console.log(message);
});
callback(null, message);
});
});
onConnected.then(() => {
Promise.resolve(a()).then(function(message) {
console.log(message);
});
});
Output - message is undefined
There are quite a few things wrong with how you've coded this. Asynchronous operations run on their own schedule and finish some indeterminate time in the future. As such, you can't do something like use a .map() loop with asynchronous operations in it and then expect the results to be ready right after the .map() loop. Instead, you have to use tools to keep track of when all the async operations in the .map() loop have completed and look at the result only when that tool tells you all the operations are done.
In addition, there are some very weird uses of Promise.promisify() which makes it look like you think promisifying a plain function will somehow magically manage the async operations inside it. It will not. You can only use Promise.promisify() on an async function that has a specific calling convention.
Fortunately, since you have the Bluebird promise library, you can use its tools to help you to do something like this:
function a() {
let message = [];
return Promise.map(results, function(result) {
return redisClient.getAsync(result).then(function(reply) {
if (reply == null) {
message.push({
"key": result,
"bucket_name": 'dsdds'
});
}
});
}).then(function() {
// make the message array be the resolved value of the returned promise
return message;
});
});
onConnected.then(() => {
a().then(function(message) {
console.log(message);
});
});

Use promises for multiple node requests

With the request library, is there a way to use promises to simplify this callback?
var context = {};
request.get({
url: someURL,
}, function(err, response, body) {
context.one = JSON.parse(body);
request.get({
url: anotherURL,
}, function(err, response, body) {
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
});
});
Here's a solution using the Bluebird promises library. This serializes the two requests and accumulates the results in the context object and rolls up error handling all to one place:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var context = {};
request.getAsync(someURL).spread(function(response, body) {
context.one = JSON.parse(body);
return request.getAsync(anotherURL);
}).spread(response, body)
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
}).catch(function(err) {
// error here
});
And, if you have multiple URLs, you can use some of Bluebirds other features like Promise.map() to iterate an array of URLs:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var urlList = ["url1", "url2", "url3"];
Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response,body) {
return [JSON.parse(body),url];
});
}).then(function(results) {
// results is an array of all the parsed bodies in order
}).catch(function(err) {
// handle error here
});
Or, you could create a helper function to do this for you:
// pass an array of URLs
function getBodies(array) {
return Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response.body) {
return JSON.parse(body);
});
});
});
// sample usage of helper function
getBodies(["url1", "url2", "url3"]).then(function(results) {
// process results array here
}).catch(function(err) {
// process error here
});
Here is how I would implement chained Promises.
var request = require("request");
var someURL = 'http://ip.jsontest.com/';
var anotherURL = 'http://ip.jsontest.com/';
function combinePromises(context){
return Promise.all(
[someURL, anotherURL].map((url, i)=> {
return new Promise(function(resolve, reject){
try{
request.get({
url: url,
}, function(err, response, body) {
if(err){
reject(err);
}else{
context[i+1] = JSON.parse(body);
resolve(1); //you can send back anything you want here
}
});
}catch(error){
reject(error);
}
});
})
);
}
var context = {"1": "", "2": ""};
combinePromises(context)
.then(function(response){
console.log(context);
//render page
res.render('pages/myPage');
}, function(error){
//do something with error here
});
Doing this with native Promises. It's good to understand the guts.
This here is known as the "Promise Constructor Antipattern" as pointed out by #Bergi in the comments. Don't do this. Check out the better method below.
var contextA = new Promise(function(resolve, reject) {
request('http://someurl.com', function(err, response, body) {
if(err) reject(err);
else {
resolve(body.toJSON());
}
});
});
var contextB = new Promise(function(resolve, reject) {
request('http://contextB.com', function(err, response, contextB) {
if(err) reject(err);
else {
contextA.then(function(contextA) {
res.render('page', contextA, contextB);
});
}
});
});
The nifty trick here, and I think by using raw promises you come to appreciate this, is that contextA resolves once and then we have access to it's resolved result. This is, we never make the above request to someurl.com, but still have access to contextA's JSON.
So I can conceivable create a contextC and reuse the JSON without having to make another request. Promises always only resolve once. You would have to take that anonymous executor function out and put it in a new Promise to refresh that data.
Bonus note:
This executes contextA and contextB in parallel, but will do the final computation that needs both contexts when both A & B are resolved.
Here's my new stab at this.
The main problem with the above solution is none of the promises are reusable and they are not chained which is a key feature of Promises.
However, I still recommend promisifying your request library yourself and abstaining from adding another dependency to your project. Another benefit of promisifying yourself is you can write your own rejection logic. This is important if you're working with a particular API that sends error messages in the body. Let's take a look:
//Function that returns a new Promise. Beats out constructor anti-pattern.
const asyncReq = function(options) {
return new Promise(function (resolve, reject) {
request(options, function(err, response, body) {
//Rejected promises can be dealt with in a `catch` block.
if(err) {
return reject(err);
}
//custom error handling logic for your application.
else if (hasError(body)) {
return reject(toError(body));
}
// typically I just `resolve` `res` since it contains `body`.
return resolve(res);
}
});
};
asyncReq(urlA)
.then(function(resA) {
//Promise.all is the preferred method for managing nested context.
return Promise.all([resA, asyncReq(urlB)]);
})
.then(function(resAB) {
return render('page', resAB[0], resAB[1]);
})
.catch(function(e) {
console.err(e);
});
You can use the request-promise library to do this. In your case, you could have something like this, where you chain your requests.
request
.get({ url: someURL })
.then(body => {
context.one = JSON.parse(body);
// Resolves the promise
return request.get({ url: anotherURL });
})
.then(body => {
context.two = JSON.parse(body);
res.render('pages/myPage');
})
.catch(e => {
//Catch errors
console.log('Error:', e);
});
By far the easiest is to use request-promise library. You can also use use a promise library like bluebird and use its promisify functions to convert the request callback API to a promise API, though you may need to write your own promisify function as request does not use the standard callback semantics. Lastly, you can just make your own promise wrapper, using either native promises or bluebird.
If you're starting fresh, just use request-promise. If you're refactoring existing code, I would just write a simple wrapper for request using bluebird's spread function.

Resources