I'm trying to understand what .on('end', ...) does in the node package request.
My code:
const fs = require('fs');
const request = require('request');
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
let writeStream = fs.createWriteStream(fileName);
var remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
let stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
} catch (err) {
reject(err);
}
});
}
What I'm trying to do is download a remote image, get some file statistics, and then resolve the promise so my code can do other things.
What I'm finding is that the promise doesn't always resolve after the file has been downloaded; it may resolve a little before then. I thought that's what .on('end', ... ) was for.
What can I do to have this promise resolve after the image has been downloaded in full?
As the docs say:
The writable.write() method writes some data to the stream, and calls the supplied callback once the data has been fully handled.
So, writable.write() is asynchronous. Just because your last writeStream.write has been called does not necessarily mean that all write operations have been completed. You probably want to call the .end method, which means:
Calling the writable.end() method signals that no more data will be written to the Writable. The optional chunk and encoding arguments allow one final additional chunk of data to be written immediately before closing the stream. If provided, the optional callback function is attached as a listener for the 'finish' event.
So, try calling writeStream.end when the remoteImage request ends, and pass a callback to writeStream.end that resolves the Promise once the writing is finished:
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
const writeStream = fs.createWriteStream(fileName);
const remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
writeStream.end(() => {
const stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
});
} catch (err) {
reject(err);
}
});
}
(also try not to mix var and let/const - in an ES6+ environment, prefer const, which is generally easier to read and has fewer problems, like hoisting)
Related
ok i saw this example of reading a stream and returning a promise using new Promise.
function readStream(stream, encoding = "utf8") {
stream.setEncoding(encoding);
return new Promise((resolve, reject) => {
let data = "";
stream.on("data", chunk => data += chunk);
stream.on("end", () => resolve(data));
stream.on("error", error => reject(error));
});
}
const text = await readStream(process.stdin);
My question is why "new Promise" ? can i do it in the 2nd version like
function readStream(stream, encoding = "utf8") {
stream.setEncoding(encoding);
let data = "";
stream.on("data", chunk => data += chunk);
stream.on("end", () => Promise.resolve(data));
stream.on("error", error => Promise.reject(error));
}
const text = await readStream(process.stdin);
Haven't tried it yet, but basically want to avoid the new keyword.
some updates on the 2nd version, since async functions always return a Promise.
A function/method will return a Promise under the following circumstances:
You explicitly created and returned a Promise from it's body.
You returned a Promise that exists outside the method.
You marked it as async.
const readStream = async (stream, encoding = "utf8") => {
stream.setEncoding(encoding);
let data = "";
stream.on("data", chunk => data += chunk);
stream.on("end", () => Promise.resolve(data));
stream.on("error", error => Promise.reject(error));
}
const text = await readStream(process.stdin);
How's this 3rd version ?
If you want readStream to return a promise, you'll have to ... return a promise for readStream (returning a promise in some callback is not doing that).
What the first code is doing, is promisifying the stream API. And that's exactly how it should be done.
The second version of the code is based on a misunderstanding: it seems to hope that returning a promise in the callback passed to the stream.on method, will somehow make readStream return that promise. But when the on callback is called, readStream has already returned. Since readStream has no return statement, it already returned undefined and not a promise.
As a side note, when the stream API calls the callback you passed to the on method, it does not even look at the returned value -- that is ignored.
The third version is an async function, so it now is guaranteed the function will return a promise. But as the function still does not execute a return statement, that promise is immediately resolved with value undefined. Again, the returned values in the callbacks are unrelated to the promise that the async function has already returned.
new keyword
If you want to avoid the new keyword, then realise that anything that can be done with promises can also be done without them. In the end promises are "only" a convenience.
For instance, you could do:
function readStream(stream, success, failure, encoding="utf8") {
let data = "";
stream.setEncoding(encoding);
stream.on("data", chunk => data += chunk);
stream.on("end", () => success(data));
stream.on("error", failure);
}
function processText(text) {
// ... do something with text
}
function errorHandler(error) {
// ... do something with the error
}
readStream(process.stdin, processText, errorHandler);
In typical Node style you would pass one callback, for both purposes, as last argument:
function readStream(stream, encoding="utf8", callback) {
let data = "";
stream.setEncoding(encoding);
stream.on("data", chunk => data += chunk);
stream.on("end", () => callback?.(null, data));
stream.on("error", err => callback?.(err, null));
}
function processText(err, text) {
if (err) {
// do something with err
return;
}
// ... do something with text
}
readStream(process.stdin, "utf8", processText);
And then you could use the util package to turn that into a promise-returning function:
const util = require('util');
const readStream = util.promisify(function (stream, encoding="utf8", callback) {
let data = "";
stream.setEncoding(encoding);
stream.on("data", chunk => data += chunk);
stream.on("end", () => callback?.(null, data));
stream.on("error", err => callback?.(err, null));
});
(async () => {
try {
const text = await readStream(stream, "utf8");
// do something with text
} catch(err) {
// do something with err
}
})();
Of course, under the hood the promisfy function performs new Promise and we're back to where we started.
You need to construct and return a Promise so that the consumer of the function has something to hook into the asynchronous action being performed. (Another option would be to define the function to also take a callback as an argument.)
If you try to do it the way you're doing with the second snippet, readStream will not return anything, so await readStream(process.stdin); will resolve immediately, and it'll resolve to undefined.
Doing
stream.on("end", () => Promise.resolve(data));
and
stream.on("error", error => Promise.reject(error));
constructs new Promises at that point in the code, but you need the consumer of the function to have access to the Promise that resolves (or rejects) - and so you must have return new Promise at the top level of the function.
I am trying to create a script to download pages from multiple urls using node js but the loop didn't want to wait for the request to finish and continued printing, I also got a hint to use the async for loop, but still it didn't work.
here's my code
function GetPage(url){
console.log(` Downloading page ${url}`);
request({
url: `${url}`
},(err,res,body) => {
if(err) throw err;
console.log(` Writing html to file` );
fs.writeFile(`${url.split('/').slice(-1)[0]}`,`${body}`,(err) => {
if(err) throw err;
console.log('saved');
});
});
}
var list = [ 'https://www.someurl.com/page1.html', 'https://www.someurl.com/page2.html', 'https://www.someurl.com/page3.html' ]
const main = async () => {
for(let i = 0; i < list.length; i++){
console.log(` processing ${list[i]}`);
await GetPage(list[i]);
}
};
main().catch(console.error);
Output :
processing https://www.someurl.com/page1.html
Downloading page https://www.someurl.com/page1.html
processing https://www.someurl.com/page2.html
Downloading page https://www.someurl.com/page2.html
processing https://www.someurl.com/page3.html
Downloading page https://www.someurl.com/page3.html
Writing html to file
Writing html to file
saved
saved
Writing html to file
saved
There are a couple of problems with your code.
You are mixing code that uses the callback style programming and code that should be using promises. Also, your getPage function is not async (it doesn't return a promise) so you cannot await on it.
You just have to return a promise from your getPage() function, and correctly resolve it or reject it.
function getPage(url) {
return new Promise((resolve, reject) => {
console.log(` Downloading page ${url}`);
request({ url: `${url}` }, (err, res, body) => {
if (err) reject(err);
console.log(` Writing html to file`);
fs.writeFile(`${url.replace(/\//g,'-')}.html`, `${body}`, (writeErr) => {
if (writeErr) reject(writeErr);
console.log("saved");
resolve();
});
});
});
}
You don't have to change your main() function loop will await for the getPage() function.
For loop doesn't wait for callback to be finished, it will continue executing it. You need to turn either getPage function to promise or use Promise.all as shown below.
var list = [
"https://www.someurl.com/page1.html",
"https://www.someurl.com/page2.html",
"https://www.someurl.com/page3.html",
];
function getPage(url) {
return new Promise((resolve, reject) => {
console.log(` Downloading page ${url}`);
request({ url: `${url}` }, (err, res, body) => {
if (err) reject(err);
console.log(` Writing html to file`);
fs.writeFile(`${url}.html`, `${body}`, (writeErr) => {
if (writeErr) reject(writeErr);
console.log("saved");
resolve();
});
});
});
}
const main = async () => {
return new Promise((resolve, reject) => {
let promises = [];
list.map((path) => promises.push(getPage(path)));
Promise.all(promises).then(resolve).catch(reject);
});
};
main().catch(console.error);
GetPage() is not built around promises and doesn't even return a promise so await on its result does NOTHING. await has no magic powers. It awaits a promise. If you don't give it a promise that properly resolves/rejects when your async operation is done, then the await does nothing. Your GetPage() function returns nothing so the await has nothing to do.
What you need is to fix GetPage() so it returns a promise that is properly tied to your asynchronous result. Because the request() library has been deprecated and is no longer recommended for new projects and because you need a promise-based solution anyway so you can use await with it, I'd suggest you switch to one of the alternative promise-based libraries recommended here. My favorite from that list is got(), but you can choose whichever one you like best. In addition, you can use fs.promises.writeFile() for promise-based file writing.
Here's how that code would look like using got():
const got = require('got');
const { URL } = require('url');
const path = require('path');
const fs = require('fs');
function getPage(url) {
console.log(` Downloading page ${url}`);
return got(url).text().then(data => {
// can't just use an URL for your filename as it contains potentially illegal
// characters for the file system
// so, add some code to create a sanitized filename here
// find just the root filename in the URL
let urlObj = new URL(url);
let filename = path.basename(urlObj.pathname);
if (!filename) {
filename = "index.html";
}
let extension = path.extname(filename);
if (!extension) {
filename += ".html";
} else if (extension === ".") {
filename += "html";
}
console.log(` Writing file ${filename}`)
return fs.promises.writeFile(filename, data);
});
}
const list = ['https://www.someurl.com/page1.html', 'https://www.someurl.com/page2.html', 'https://www.someurl.com/page3.html'];
async function main() {
for (let url of list) {
console.log(` processing ${url}`);
await getPage(url);
}
}
main().then(() => {
console.log("all done");
}).catch(console.error);
If you put real URLs in the array, this is directly runnable in nodejs. I ran it myself with my own URLs.
Summary of Changes and Improvements:
Switched from request() to got() because it's promise-based and not deprecated.
Modified getPage() to return a promise that represents the asynchronous operations in the function.
Switched to fs.promises.writeFile() so we are using only promises for asynchronous control-flow.
Added legal filename generation from the base path of the URL since you can't just use a full URL as a filename (at least in some file systems).
Switched to simpler for/of loop
I am piping a download into a file, but wanting to make sure the file doesn't already exist. I've put the code up here for an easier exploration: https://tonicdev.com/tolmasky/streaming-piping-on-open-tester <-- this will show you the outputs (code also below inline).
So the thing is, it seems to work fine except for the done (end) event. The file ends up on the hard drive fine, each step is followed correctly (the structure is to ensure no "parallel" steps happen that aren't necessary -- if I do got.stream(url).pipe(fs.createWriteStream({ flags: ... })), then the download will actually get kicked off even if the createWriteStream returns an error because the file is already there -- undesirable for the network).
The code is the following:
var fs = require("fs");
var got = require("got");
await download("https://www.apple.com", "./index.html");
function download(aURL, aDestinationFilePath)
{
return new Promise(function(resolve, reject)
{
fs.createWriteStream(aDestinationFilePath, { flags: "wx" })
.on("open", function()
{
const writeStream = this;
console.log("SUCCESSFULLY OPENED!");
got.stream(aURL)
.on("response", function(aResponse)
{
const contentLength = +aResponse.headers["content-length"] || 0;
console.log(aResponse.headers);
console.log("STARTING DOWNLOAD! " + contentLength);
this.on("data", () => console.log("certainly getting data"))
this.pipe(writeStream)
.on("error", reject)
.on("end", () => console.log("DONE!"))
.on("end", resolve);
})
})
.on("error", function(anError)
{
if (anError.code === "EEXIST") { console.log("oh");
resolve();}
else
reject(anError);
});
});
}
According to the stream docs, readable.pipe returns the destination Writable stream, and the correct event emitted when a Writable is done would be Event: 'finish'.
How can I avoid using a recursion like structure when I got several streams to open and I have to get an absolute end event to finish the logic.
var someArray = ['file1', 'file2', 'file3'];
someArray.forEach(function( file ) {
fs
.createReadStream( file )
.pipe( /* do some stuff */ )
.on('data', function( usageInfo ) {
// done?
});
}
I got several files I have to pipe through tp some processes. How can I setup an event that tells me when all of them are done?
Currently what I'm getting is each end event individually.
I can absolutely start each stream at the same time. I just need to somehow collect the end?
I could invoke a function call for each end event and count it... that sounds hacky though?...
I feel like there is a way to do this with promises but I don't know how.
I feel like there is a way to do this with promises but I don't know how.
Yes, there is. As promises do represent asynchronous values, you'd get a promise for the end of one stream:
var p = new Promise(function(resolve, reject) {
fs.createReadStream(file)
.on('error', reject)
.pipe(/* do some stuff */)
.on('end', resolve)
.on('error', reject); // call reject(err) when something goes wrong
});
It could be used like p.then(functio(usageInfo) { console.log("stream ended"); }).
Now if you create multiple promises, one for filename in your array, all the streams will run in parallel and resolve their respective promise when done. You then can use Promise.all to collect - read "await" - all the results from each of them into a new promise for an array of results.
var promises = ['file1', 'file2', 'file3'].map(function(file) {
return new Promise(function(resolve, reject) {
…
});
});
Promise.all(promises).then(function(usageInfos) {
console.log("all of them done", usageInfos),
}, function(err) {
console.error("(At least) one of them failed", err);
});
Use a counter:
var someArray = ['file1', 'file2', 'file3'];
var still_processing = someArray.length;
someArray.forEach(function( file ) {
fs.createReadStream( file )
.pipe( /* do some stuff */ )
.on('end', function() {
still_processing--;
if (!still_processing) {
// done
}
});
}
This is the basic mechanism. This control flow pattern is encapsulated by the async.parallel() function in async.js:
var someArray = ['file1', 'file2', 'file3'];
var streams_to_process = [];
someArray.forEach(function( file ) {
streams_to_process.push(function(callback) {
var result = "";
fs.createReadStream( file )
.pipe( /* do some stuff */ )
.on('end', function() {
callback(null, result);
});
});
});
async.parallel(streams_to_process, function(err, results) {
// all done
});
Internally, async.parallel uses a counter captured in a closure to keep track of when all async processes (in this case the 'end' events) are done.
There are other libraries for this. Most promise libraries for example provide an .all() method that works the same way - internally keeping track of a counter value and firing the .then() callback when all is done.
I am using bluebird for promises.
I am trying to promisify the download module.
Here is my implementation:
Promise = require('bluebird'),
download = require('download');
var methodNameToPromisify = ["download"];
function EventEmitterPromisifier(originalMethod) {
// return a function
return function promisified() {
var args = [].slice.call(arguments);
// Needed so that the original method can be called with the correct receiver
var self = this;
// which returns a promise
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = originalMethod.apply(self, args);
emitter
.on("response", function(data) {
resolve(data);
})
.on("data ", function(data) {
resolve(data);
})
.on("error", function(err) {
reject(err);
})
.on("close", function() {
resolve();
});
});
};
};
download = { download: download };
Promise.promisifyAll(download, {
filter: function(name) {
return methodNameToPromisify.indexOf(name) > -1;
},
promisifier: EventEmitterPromisifier
});
Then using it:
return download.downloadAsync(fileURL, copyTo, {});
My problem is that it doesn't download all of the files (I have a list sent to this function), what am I doing wrong?
An emitter does emit multiple data events, one for every chunk it receives. However, a represents only one future value, in your case you want that to be the complete response.
resolve is supposed to be called only once, to fulfill the promise with the passed value, which is then settled. Further calls will have no effect - and that's why you get only the first parts of your list.
Instead, you will need to accumulate all the data, and when the stream ends you can fulfill the promise with all of it.
var Promise = require('bluebird'),
download = require('download'),
Buffer = require('buffer'); // should be global anyway
exports = {
downloadAsync: function promisifiedDownload() {
var args = arguments, self = this;
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = download.apply(self, args);
var buffers = [];
emitter.on("data", function(data) {
buffers.push(data);
}).on("error", function(err) {
reject(err);
}).on("close", function() {
resolve(Buffer.concat(buffers));
});
});
};
};
Notice it's quite nonsensical to use promisifyAll when you only want to promisify a single method. I've omitted it for simplicity
You might also listen for the incoming response object, and attach the data listener directly to it. You can then use the end event instead of close.