Why is my upload incomplete in a NodeJS express app - node.js

I need to upload a v8 heap dump into an AWS S3 bucket after it's generated however the file that is uploaded is either 0KB or 256KB. The file on the server is over 70MB in size so it appears that the request isn't waiting until the heap dump isn't completely flushed to disk. I'm guessing the readable stream that is getting piped into fs.createWriteStream is happening in an async manner and the await with the call to the function isn't actually waiting. I'm using the v3 version of the AWS NodeJS SDK. What am I doing incorrectly?
Code
async function createHeapSnapshot (fileName) {
const snapshotStream = v8.getHeapSnapshot();
// It's important that the filename end with `.heapsnapshot`,
// otherwise Chrome DevTools won't open it.
const fileStream = fs.createWriteStream(fileName);
snapshotStream.pipe(fileStream);
}
async function pushHeapSnapshotToS3(fileName)
{
const heapDump = fs.createReadStream(fileName);
const s3Client = new S3Client();
const putCommand = new PutObjectCommand(
{
Bucket: "my-bucket",
Key: `heapdumps/${fileName}`,
Body: heapDump
}
)
return s3Client.send(putCommand);
}
app.get('/heapdump', asyncMiddleware(async (req, res) => {
const currentDateTime = Date.now();
const fileName = `${currentDateTime}.heapsnapshot`;
await createHeapSnapshot(fileName);
await pushHeapSnapshotToS3(fileName);
res.send({
heapdumpFileName: `${currentDateTime}.heapsnapshot`
});
}));

Your guess is correct. The createHeapSnapshot() returns a promise, but that promise has NO connection at all to when the stream is done. Therefore, when the caller uses await on that promise, the promise is resolved long before the stream is actually done. async functions have no magic in them to somehow know when a non-promisified asynchronous operation like .pipe() is done. So, your async function returns a promise that has no connection at all to the stream functions.
Since streams don't have very much native support for promises, you can manually promisify the completion and errors of the streams:
function createHeapSnapshot (fileName) {
return new Promise((resolve, reject) => {
const snapshotStream = v8.getHeapSnapshot();
// It's important that the filename end with `.heapsnapshot`,
// otherwise Chrome DevTools won't open it.
const fileStream = fs.createWriteStream(fileName);
fileStream.on('error', reject).on('finish', resolve);
snapshotStream.on('error', reject);
snapshotStream.pipe(fileStream);
});
}
Alternatively, you could use the newer pipeline() function which does support promises (built-in promise support added in nodejs v15) and replaces .pipe() and has built-in error monitoring to reject the promise:
const { pipeline } = require('stream/promises');
function createHeapSnapshot (fileName) {
const snapshotStream = v8.getHeapSnapshot();
// It's important that the filename end with `.heapsnapshot`,
// otherwise Chrome DevTools won't open it.
return pipeline(snapshotStream, fs.createWriteStream(fileName))
}

Related

AWS Lambda not executing Promise outside Handler until Handler is invoked

I am working on an aws lambda that requires a puppeteer browser to be launched for each new s3 object in a bucket. The browser launch code was taking a very long time on the initial invocation, so I thought I would put the launch code outside the handler and use Provisioned Concurrency to have the browser ready to go when a new file in inserted into the bucket.
It does seem to call the promise because before any actual invocations are made, I'm getting logs saying "Getting executable path from the provisioned concurrency instances. However, it never outputs the message "Launching browser" until an actual invocation of the lambda is made. Why would the promise chromium.executablePath not complete until an invocation is made if it is outside the handler?
let startTime = Date.now();
const chromium = require("chrome-aws-lambda");
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
const { createSSRApp } = require("vue");
const { renderToString } = require("vue/server-renderer");
const path = require("path");
const fs = require("fs");
const manifest = require("../../compiled/ssr-manifest.json");
console.log("Load packages: " + (Date.now() - startTime));
const browserPromise = new Promise((res) => {
const browserStartTime = Date.now();
console.log("Getting executable path");
chromium.executablePath.then((executablePath) => {
console.log("Launching browser");
chromium.puppeteer
.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: executablePath,
headless: true,
})
.then((browser) => {
res(browser);
console.log("Start headless browser: " + (Date.now() - browserStartTime));
});
});
});
browserPromise.then(() => console.log("Started Headless Browser"));
/**
* A Lambda function that logs the payload received from S3.
*/
exports.handler = async (event, context) => {
const bucketName = event.Records[0].s3.bucket.name;
const objectKey = event.Records[0].s3.object.key;
const browser = await browserPromise;
... //use browser code
}
If I require this file locally in another node file it runs the promise fine without calling the handler function, so it must be some lambda environment specific thing I'm not understanding. Does anyone have any insight into this? Thanks in advance.
The issue you describe results from poor control of the async code.
Delay instantiation of browser promise
When you instantiate a Promise using the new keyword, execution of the function you provide starts immediately.
const x = new Promise(res => console.log('test'))
This will print 'test' immediately without needing a .then or await. This is why your code prints 'Getting executable path' right away, vs. waiting for a request event from lambda.
To solve this, don't instantiate this promise until a request actually happens. Move your promise construction to a function that you can call from the handler when a request occurs.
async function startBrowser () {
// code to start browser
return browser
}
exports.handler = async (event, context) => {
const bucketName = event.Records[0].s3.bucket.name;
const objectKey = event.Records[0].s3.object.key;
const browser = await startBrowser();
// use browser
}
Fixing async return flow
Secondly, you need to make your startBrowser function actually return a browser. Because you haven't awaited any of the promises created inside your browserPromise, it will trigger the code to start chromium but resolve immediately. It will take some time for the browser to start, which is why you don't see 'Launching browser' until much later.
To fix this, make sure your browser promise doesn't resolve until the browser is ready, and then return the browser object so it can be used.
function startBrowser () {
const browserStartTime = Date.now();
console.log("Getting executable path");
// await this promise
const browser = await chromium.executablePath.then((executablePath) => {
console.log("Launching browser");
// return browser promise
return chromium.puppeteer
.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: executablePath,
headless: true,
})
console.log("Start headless browser: " + (Date.now() - browserStartTime));
return browser
}
exports.handler = async (event, context) => {
const bucketName = event.Records[0].s3.bucket.name;
const objectKey = event.Records[0].s3.object.key;
const browser = await startBrowser();
// use browser
}
Improving performance by sharing browser across requests
You can make further performance improvements by saving the browser in a singleton so that a new one doesn't need to be instantiated every request cycle.
let browser; // singleton/global browser object
// start a browser to be used for all requests
// this will take a little time, so hold a reference to the promise so we
// can know when it is ready to use
const browserPromise = startBrowser.then(newBrowser => { browser = newBrowser }
exports.handler = async (event, context) => {
const bucketName = event.Records[0].s3.bucket.name;
const objectKey = event.Records[0].s3.object.key;
// if the first request comes before the browser is ready, we should
// wait for the promise to resolve
if (!browser) await browserPromise
// use browser
}
Notes about memory usage
Browsers can use a lot of memory and can also leak memory. If fixing your async code still does not make the lambda work, consider that loading a webpage can consume gigabytes of memory especially for large and complex pages (Google Maps for example). I'm not experienced with lambda but you may find yourself running into memory limits.

Why is it not going inside the function? It only output "finish" and "start" on AWS Lambda

I'm doing a html to pdf function using the phantom-html-to-pdf on AWS Lambda with NodeJS. But I encountered a problem that did not go inside the function. The output only shows Start, Finish and Done is not shown which means it's not going in the function. What is the issue here?
var fs = require('fs')
var conversion = require("phantom-html-to-pdf")();
exports.handler = async (event) => {
console.log("Start")
conversion({ html: "<h1>Hello World</h1>" },
async(err, pdf) => {
var output = fs.createWriteStream('output.pdf')
console.log(pdf.logs);
console.log(pdf.numberOfPages);
// since pdf.stream is a node.js stream you can use it
// to save the pdf to a file (like in this example) or to
// respond an http request.
pdf.stream.pipe(output);
console.log("Done")
});
console.log("Finish")
};
The problem is you have marked your lambda function as async which means your function should return a promise. In your case you are not returning a promise. So you have two choices here
Convert the conversion function from callback to promise based.
OR instead of marking the function async, add callback and execute that. something like this
const fs = require("fs");
const conversion = require("phantom-html-to-pdf")();
exports.handler = (event, context, callback) => {
console.log("Start");
conversion({"html": "<h1>Hello World</h1>"},
// eslint-disable-next-line handle-callback-err
async (err, pdf) => {
const output = fs.createWriteStream("output.pdf");
console.log(pdf.logs);
console.log(pdf.numberOfPages);
// since pdf.stream is a node.js stream you can use it
// to save the pdf to a file (like in this example) or to
// respond an http request.
pdf.stream.pipe(output);
console.log("Done");
callback(null, "done");
});
};

AWS Lambda function flow

I'm having some issues with how my functions flow in lambda. I'm trying to grab value stored in S3, increment it, and put it back. However, my program doesn't flow how I feel it should be. I'm using async waterfall to run the flow of my functions.
Here's my code:
let AWS = require('aws-sdk');
let async = require('async');
let bucket = "MY_BUCKET";
let key = "MY_FILE.txt";
exports.handler = async (event) => {
let s3 = new AWS.S3();
async.waterfall([
download,
increment,
upload
], function (err) {
if (err) {
console.error(err);
} else {
console.log("Increment successful");
}
console.log("test4");
return null;
}
);
console.log("test5");
function download(next) {
console.log("test");
s3.getObject({
Bucket: bucket,
Key: key
},
next);
}
function increment(response, next) {
console.log("test2");
console.log(response.Body);
let newID = parseInt(response.Body, 10) + 1;
next(response.ContentType, newID);
}
function upload(contentType, data, next) {
console.log("test3");
s3.putObject({
Bucket: bucket,
Key: key,
Body: data,
ContentType: contentType
},
next);
}
};
I'm only getting test and test5 on my log. I was under the impression that after the download function, increment should run if it was okay or the callback function at the end of the waterfall should run if there was an error. The program doesn't give an error on execution and it doesn't appear to go to either function.
Could someone guide me to what I'm missing in my understanding?
EDIT: So it seems my issue was related to my function declaration. The default template declared it as async(event). I thought this was odd as usually they are declared as (event, context, callback). Switching to the later (or even just (event) without the async) fixed this. It looks like my issue is with calling the function as asynchronous. This blocked the waterfall async calls?? Can anyone elaborate on this?
Your problem is that your handler is declared as an async function, which will create a promise for you automatically, but since you are not awaiting at all your function is essentially ending synchronously.
There are a couple of ways to solve this, all of which we'll go over.
Do not use promises, use callbacks as the async library is designed to use.
Do not use the async library or callbacks and instead use async/await.
Mix both together and make your own promise and resolve/reject it manually.
1. Do not use promises
In this solution, you would remove the async keyword and add the callback parameter lambda is passing to you. Simply calling it will end the lambda, passing it an error will signal that the function failed.
// Include the callback parameter ────┐
exports.handler = (event, context, callback) => {
const params =[
download,
increment,
upload
]
async.waterfall(params, (err) => {
// To end the lambda call the callback here ──────┐
if (err) return callback(err); // error case ──┤
callback({ ok: true }); // success case ──┘
});
};
2. Use async/await
The idea here is to not use callback style but to instead use the Promise based async/await keywords. If you return a promise lambda will use that promise to handle lambda completion instead of the callback.
If you have a function with the async keyword it will automatically return a promise that is transparent to your code.
To do this we need to modify your code to no longer use the async library and to make your other functions async as well.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const Bucket = "MY_BUCKET";
const Key = "MY_FILE.txt";
async function download() {
const params = {
Bucket,
Key
}
return s3.getObject(params).promise(); // You can await or return a promise
}
function increment(response) {
// This function is synchronous, no need for promises or callbacks
const { ContentType: contentType, Body } = response;
const newId = parseInt(Body, 10) + 1;
return { contentType, newId };
}
async function upload({ contentType: ContentType, newId: Body }) {
const params = {
Bucket,
Key,
Body,
ContentType
};
return s3.putObject(params).promise();
}
exports.handler = async (event) => {
const obj = await download(); // await the promise completion
const data = increment(obj); // call synchronously without await
await upload(data)
// The handlers promise will be resolved after the above are
// all completed, the return result will be the lambdas return value.
return { ok: true };
};
3. Mix promises and callbacks
In this approach we are still using the async library which is callback based but our outer function is promised based. This is fine but in this scenario we need to make our own promise manually and resolve or reject it in the waterfall handler.
exports.handler = async (event) => {
// In an async function you can either use one or more `await`'s or
// return a promise, or both.
return new Promise((resolve, reject) => {
const steps = [
download,
increment,
upload
];
async.waterfall(steps, function (err) {
// Instead of a callback we are calling resolve or reject
// given to us by the promise we are running in.
if (err) return reject(err);
resolve({ ok: true });
});
});
};
Misc
In addition to the main problem of callbacks vs. promises you are encountering you have a few minor issues I noticed:
Misc 1
You should be using const rather than let most of the time. The only time you should use let is if you intend to reassign the variable, and most of the time you shouldn't do that. I would challenge you with ways to write code that never requires let, it will help improve your code in general.
Misc 2
You have an issue in one of your waterfall steps where you are returning response.ContentType as the first argument to next, this is a bug because it will interpret that as an error. The signature for the callback is next(err, result) so you should be doing this in your increment and upload functions:
function increment(response, next) {
const { ContentType: contentType, Body: body } = response;
const newId = parseInt(body, 10) + 1;
next(null, { contentType, newId }); // pass null for err
}
function upload(result, next) {
const { contentType, newId } = result;
s3.putObject({
Bucket: bucket,
Key: key,
Body: newId,
ContentType: contentType
},
next);
}
If you don't pass null or undefined for err when calling next async will interpret that as an error and will skip the rest of the waterfall and go right to the completion handler passing in that error.
Misc 3
What you need to know about context.callbackWaitsForEmptyEventLoop is that even if you complete the function correctly, in one of the ways discussed above your lambda may still hang open and eventually timeout rather than successfully complete. Based on your code sample here you won't need to worry about that probably but the reason why this can happen is if you happen to have something that isn't closed properly such as a persistent connection to a database or websocket or something like that. Setting this flag to false at the beginning of your lambda execution will cause the process to exit regardless of anything keeping the event loop alive, and will force them to close ungracefully.
In the case below your lambda can do the work successfully and even return a success result but it will hang open until it timesout and be reported as an error. It can even be re-invoked over and over depending on how it's triggered.
exports.handler = async (event) => {
const db = await connect()
await db.write(data)
// await db.close() // Whoops forgot to close my connection!
return { ok: true }
}
In that case simply calling db.close() would solve the issue but sometimes its not obvious what is hanging around in the event loop and you just need a sledge hammer type solution to close the lambda, which is what context.callbackWaitsForEmptyEventLoop = false is for!
exports.handler = async (event) => {
context.callbackWaitsForEmptyEventLoop = false
const db = await connect()
await db.write(data)
return { ok: true }
}
The above will complete the lambda as soon as the function returns, killing all connections or anything else living in the event loop still.
Your function terminates before the waterfall is resolved. That is, the asynchronous calls aren't executed at all. That is why you don't see any of the console.log calls you have within the waterfall functions, and only see the one that is called synchronously immediately after the call to async.waterfall.
Not sure how well async.waterfall is supported by AWS Lambda, but since promises are natively supported and perform the same functionality (with fewer loc), you could use promises instead. Your code would look something like this:
module.exports.handler = (event,context) =>
s3.getObject({
Bucket: bucket,
Key: key
}).promise()
.then(response => ({
Body: parseInt(response.Body, 10) + 1,
ContentType: response.contentType,
}))
.then(modifiedResponse => s3.putObject({
Bucket: bucket,
Key: key,
Body: modifiedResponse.data,
ContentType: modifiedResponse.contentType}).promise())
.catch(err => console.error(err));

Question about end of request for node/JS request package

I'm trying to understand what .on('end', ...) does in the node package request.
My code:
const fs = require('fs');
const request = require('request');
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
let writeStream = fs.createWriteStream(fileName);
var remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
let stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
} catch (err) {
reject(err);
}
});
}
What I'm trying to do is download a remote image, get some file statistics, and then resolve the promise so my code can do other things.
What I'm finding is that the promise doesn't always resolve after the file has been downloaded; it may resolve a little before then. I thought that's what .on('end', ... ) was for.
What can I do to have this promise resolve after the image has been downloaded in full?
As the docs say:
The writable.write() method writes some data to the stream, and calls the supplied callback once the data has been fully handled.
So, writable.write() is asynchronous. Just because your last writeStream.write has been called does not necessarily mean that all write operations have been completed. You probably want to call the .end method, which means:
Calling the writable.end() method signals that no more data will be written to the Writable. The optional chunk and encoding arguments allow one final additional chunk of data to be written immediately before closing the stream. If provided, the optional callback function is attached as a listener for the 'finish' event.
So, try calling writeStream.end when the remoteImage request ends, and pass a callback to writeStream.end that resolves the Promise once the writing is finished:
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
const writeStream = fs.createWriteStream(fileName);
const remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
writeStream.end(() => {
const stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
});
} catch (err) {
reject(err);
}
});
}
(also try not to mix var and let/const - in an ES6+ environment, prefer const, which is generally easier to read and has fewer problems, like hoisting)

Refactor synchronous code to unleash the power of node.js asynchronicity

We long-term Python and PHP coders have a tidy bit of synchronous code (sample below). Most of the functions have asynchronous counterparts. We really want to 'get' the power of Javascript and Node, and believe this is an ideal case of where asynchronous node.js would speed things up and blow our socks off.
What is the textbook way to refactor the following to utilize asynchronous Node? Async / await and promise.all? How? (Using Node 8.4.0. Backwards compatibility is not a concern.)
var fs = require('fs');
// This could list over 10,000 files of various size
const fileList = ['file1', 'file2', 'file3'];
const getCdate = file => fs.statSync(file).ctime; // Has async method
const getFSize = file => fs.statSync(file).size; // Has async method
// Can be async through file streams (see resources below)
const getMd5 = (file) => {
let fileData = new Buffer(0);
fileData = fs.readFileSync(file);
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
};
let filesObj = fileList.map(file => [file, {
datetime: getCdate(file),
filesize: getFSize(file),
md5hash: getMd5(file),
}]);
console.log(filesObj);
Notes:
We need to keep the functions modular and re-usable.
There are more functions getting things for filesObj than listed here
Most functions can be re-written to be async, some can not.
Ideally we need to keep the original order of fileList.
Ideally we want to use latest Node and JS features -- not rely on external modules.
Assorted file stream methods for getting md5 asynchronously:
Obtaining the hash of a file using the stream capabilities of crypto module (ie: without hash.update and hash.digest)
How to calculate md5 hash of a file using javascript
There are a variety of different ways you could handle this code asynchronously. You could use the node async library to handle all of the callbacks more elegantly. If you don't want to dive into promises then that's the "easy" option. I put easy in quotes because promises are actually easier if you understand them well enough. The async lib is helpful but it still leaves much to be desired in the way of error propagation, and there is a lot of boilerplate code you'll have to wrap all your calls in.
The better way is to use promises. Async/Await is still pretty new. Not even supported in node 7 (not sure about node 8) without a preprocessor like Bable or Typescript. Also, async/await uses promises under the hood anyway.
Here is how I would do it using promises, even included a file stats cache for maximum performance:
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
// Use Bluebird's Promise.promisifyAll utility to turn all of fs'
// async functions into promise returning versions of them.
// The new promise-enabled methods will have the same name but with
// a suffix of "Async". Ex: fs.stat will be fs.statAsync.
Promise.promisifyAll(fs);
// Create a cache to store the file if we're planning to get multiple
// stats from it.
let cache = {
fileName: null,
fileStats: null
};
const getFileStats = (fileName, prop) => {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
// Return a promise that eventually resolves to the data we're after
// but also stores fileStats in our cache for future calls.
return fs.statAsync(fileName).then(fileStats => {
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
})
};
const getMd5Hash = file => {
// Return a promise that eventually resolves to the hash we're after.
return fs.readFileAsync(file).then(fileData => {
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
});
};
// Create a promise that immediately resolves with our fileList array.
// Use Bluebird's Promise.map utility. Works very similar to Array.map
// except it expects all array entries to be promises that will
// eventually be resolved to the data we want.
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
// This first gets a promise that starts resolving file stats
// asynchronously. When the promise resolves it will store file
// stats in a cache and then return the stats value we're after.
// Note that the final return is not a promise, but returning raw
// values from promise handlers implicitly does
// Promise.resolve(rawValue)
getFileStats(fileName, 'ctime'),
// This one will not return a promise. It will see cached file
// stats for our file and return the stats value from the cache
// instead. Since it's being returned into a Promise.all, it will
// be implicitly wrapped in Promise.resolve(rawValue) to fit the
// promise paradigm.
getFileStats(fileName, 'size'),
// First returns a promise that begins resolving the file data for
// our file. A promise handler in the function will then perform
// the operations we need to do on the file data in order to get
// the hash. The raw hash value is returned in the end and
// implicitly wrapped in Promise.resolve as well.
getMd5(file)
])
// .spread is a bluebird shortcut that replaces .then. If the value
// being resolved is an array (which it is because Promise.all will
// resolve an array containing the results in the same order as we
// listed the calls in the input array) then .spread will spread the
// values in that array out and pass them in as individual function
// parameters.
.spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(error => {
// Any errors returned by any of the Async functions in this promise
// chain will be propagated here.
console.log(error);
});
Here's the code again but without comments to make it easier to look at:
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
Promise.promisifyAll(fs);
let cache = {
fileName: null,
fileStats: null
};
const getFileStats = (fileName, prop) => {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
return fs.statAsync(fileName).then(fileStats => {
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
})
};
const getMd5Hash = file => {
return fs.readFileAsync(file).then(fileData => {
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
});
};
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
getFileStats(fileName, 'ctime'),
getFileStats(fileName, 'size'),
getMd5(file)
]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);
In the end results will be an array like which should hopefully match the results of your original code but should perform much better in a benchmark:
[
['file1', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
['file2', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
['file3', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }]
]
Apologies in advance for any typos. Didn't have the time or ability to actually run any of this. I looked over it quite extensively though.
After discovering that async/await is in node as of 7.6 I decided to play with it a bit last night. It seems most useful for recursive async tasks that don't need to be done in parallel, or for nested async tasks that you might wish you could write synchronously. For what you needed here there isn't any mind-blowing way to use async/await that I can see but there are a few places where the code would read more cleanly. Here's the code again but with a few little async/await conveniences.
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
Promise.promisifyAll(fs);
let cache = {
fileName: null,
fileStats: null
};
async function getFileStats (fileName, prop) {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
let fileStats = await fs.stat(fileName);
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
};
async function getMd5Hash (file) {
let fileData = await fs.readFileAsync(file);
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
};
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
getFileStats(fileName, 'ctime'),
getFileStats(fileName, 'size'),
getMd5(file)
]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);
I would make getCDate, getFSize, and getMd5 all asynchronous and promisified then wrap them in another asynchronous promise-returning function, here called statFile.
function statFile(file) {
return Promise.all([
getCDate(file),
getFSize(file),
getMd5(file)
]).then((datetime, filesize, md5hash) => ({datetime, filesize, md5hash}))
.catch(/*handle error*/);
}
Then you could change your mapping function to
const promises = fileList.map(statFile);
Then it's simple to use Promise.all:
Promise.all(promises)
.then(filesObj => /*do something*/)
.catch(err => /*handle error*/)
This leaves things modular, doesn't require async/await, allows you to plug extra functions into statFile, and preserves your file order.

Resources