No end event when piping inside "open" - node.js

I am piping a download into a file, but wanting to make sure the file doesn't already exist. I've put the code up here for an easier exploration: https://tonicdev.com/tolmasky/streaming-piping-on-open-tester <-- this will show you the outputs (code also below inline).
So the thing is, it seems to work fine except for the done (end) event. The file ends up on the hard drive fine, each step is followed correctly (the structure is to ensure no "parallel" steps happen that aren't necessary -- if I do got.stream(url).pipe(fs.createWriteStream({ flags: ... })), then the download will actually get kicked off even if the createWriteStream returns an error because the file is already there -- undesirable for the network).
The code is the following:
var fs = require("fs");
var got = require("got");
await download("https://www.apple.com", "./index.html");
function download(aURL, aDestinationFilePath)
{
return new Promise(function(resolve, reject)
{
fs.createWriteStream(aDestinationFilePath, { flags: "wx" })
.on("open", function()
{
const writeStream = this;
console.log("SUCCESSFULLY OPENED!");
got.stream(aURL)
.on("response", function(aResponse)
{
const contentLength = +aResponse.headers["content-length"] || 0;
console.log(aResponse.headers);
console.log("STARTING DOWNLOAD! " + contentLength);
this.on("data", () => console.log("certainly getting data"))
this.pipe(writeStream)
.on("error", reject)
.on("end", () => console.log("DONE!"))
.on("end", resolve);
})
})
.on("error", function(anError)
{
if (anError.code === "EEXIST") { console.log("oh");
resolve();}
else
reject(anError);
});
});
}

According to the stream docs, readable.pipe returns the destination Writable stream, and the correct event emitted when a Writable is done would be Event: 'finish'.

Related

Node JS - createWriteStream

I am going crazy trying to fix this bug so please help :-)
I am using https://pdfkit.org/
This creates a stream that when finished is piped to fs.createWriteStream
My issue is the first time the code runs this works and the PDF is generated.
The next time the Code runs a file with Zero Bytes is created.
I am calling the function from an API running on express.
The issue appears to be the async nature of fs.createWriteStream.
The stream finishes after the API has returned. I cannnot seem to find a way to block while confirming the file has been created.
What is odd is that the first time the code works run again it fails:
Here is the Pipe Function;
async function _writeFile(fileObj) {
let fileStream = fs.createWriteStream(fileObj.fileName)
pipeline(
doc,
fileStream,
async (err) => {
if (err) {
console.error('PDF failed', err);
return ('Pipeline failed', err)
} else {
console.log('PDF succeeded');
}
}
)
}
This is called from:
exports.drawReport = async (payload) => {
var date = new Date();
const timeStamp = date.toJSON();
let path = './controllers/tmp/'
var fileName = path + timeStamp + '.' + payload.type + '.pdf'
try {
// Start Report
await _startReport(payload)
// Check Starting position on page & add status box header
if (device_card_reference == 260) {
await _deviceTitle(payload);
}
// Add Devices
await _reportDevice(payload);
// Call Footer for final page
await _reportFooter()
console.log("PDF Done - Writing File")
// File Meta Data
let fileObj = {
type: payload.type,
siteId: payload.siteId,
fileName: fileName,
timeStamp: timeStamp
}
// Create file to store PDF
await _writeFile(fileObj)
doc.end()
console.log("PDF MADE?")
return (fileObj)
} catch (err) {
console.error('MakePDF ERROR: ' + err.message);
return (err.message)
}
}
pipeline runs asynchronously, so it's not awaited, which is why doc.end() runs before the file is done
try wrapping pipeline in a promise, and then resolve when the stream is done:
// function that returns a promise
function _writeFile(fileObj) {
return new Promise((resolve, reject) => {
const fileStream = fs.createWriteStream(fileObj.fileName);
pipeline(
doc,
fileStream,
async(err) => {
if (err) {
console.error('PDF failed', err);
// err, handle in `.catch`
reject({res:'Pipeline failed', err});
} else {
console.log('PDF succeeded');
// done, resolve, to move to doc.end
resolve('PDF succeeded');
}
}
)
});
}
add .catch() to handle error:
// Create file to store PDF
await _writeFile(fileObj).catch(err => console.log(err));
or even better, use stream promises API
const {pipeline } = require('stream/promises');
async function _writeFile(fileObj) {
const fileStream = fs.createWriteStream(fileObj.fileName);
await pipeline(doc, fileStream);
console.log('PDF succeeded');
}

why the for loop doesn't want to wait for the process to finish on a function at node js?

I am trying to create a script to download pages from multiple urls using node js but the loop didn't want to wait for the request to finish and continued printing, I also got a hint to use the async for loop, but still it didn't work.
here's my code
function GetPage(url){
console.log(` Downloading page ${url}`);
request({
url: `${url}`
},(err,res,body) => {
if(err) throw err;
console.log(` Writing html to file` );
fs.writeFile(`${url.split('/').slice(-1)[0]}`,`${body}`,(err) => {
if(err) throw err;
console.log('saved');
});
});
}
var list = [ 'https://www.someurl.com/page1.html', 'https://www.someurl.com/page2.html', 'https://www.someurl.com/page3.html' ]
const main = async () => {
for(let i = 0; i < list.length; i++){
console.log(` processing ${list[i]}`);
await GetPage(list[i]);
}
};
main().catch(console.error);
Output :
processing https://www.someurl.com/page1.html
Downloading page https://www.someurl.com/page1.html
processing https://www.someurl.com/page2.html
Downloading page https://www.someurl.com/page2.html
processing https://www.someurl.com/page3.html
Downloading page https://www.someurl.com/page3.html
Writing html to file
Writing html to file
saved
saved
Writing html to file
saved
There are a couple of problems with your code.
You are mixing code that uses the callback style programming and code that should be using promises. Also, your getPage function is not async (it doesn't return a promise) so you cannot await on it.
You just have to return a promise from your getPage() function, and correctly resolve it or reject it.
function getPage(url) {
return new Promise((resolve, reject) => {
console.log(` Downloading page ${url}`);
request({ url: `${url}` }, (err, res, body) => {
if (err) reject(err);
console.log(` Writing html to file`);
fs.writeFile(`${url.replace(/\//g,'-')}.html`, `${body}`, (writeErr) => {
if (writeErr) reject(writeErr);
console.log("saved");
resolve();
});
});
});
}
You don't have to change your main() function loop will await for the getPage() function.
For loop doesn't wait for callback to be finished, it will continue executing it. You need to turn either getPage function to promise or use Promise.all as shown below.
var list = [
"https://www.someurl.com/page1.html",
"https://www.someurl.com/page2.html",
"https://www.someurl.com/page3.html",
];
function getPage(url) {
return new Promise((resolve, reject) => {
console.log(` Downloading page ${url}`);
request({ url: `${url}` }, (err, res, body) => {
if (err) reject(err);
console.log(` Writing html to file`);
fs.writeFile(`${url}.html`, `${body}`, (writeErr) => {
if (writeErr) reject(writeErr);
console.log("saved");
resolve();
});
});
});
}
const main = async () => {
return new Promise((resolve, reject) => {
let promises = [];
list.map((path) => promises.push(getPage(path)));
Promise.all(promises).then(resolve).catch(reject);
});
};
main().catch(console.error);
GetPage() is not built around promises and doesn't even return a promise so await on its result does NOTHING. await has no magic powers. It awaits a promise. If you don't give it a promise that properly resolves/rejects when your async operation is done, then the await does nothing. Your GetPage() function returns nothing so the await has nothing to do.
What you need is to fix GetPage() so it returns a promise that is properly tied to your asynchronous result. Because the request() library has been deprecated and is no longer recommended for new projects and because you need a promise-based solution anyway so you can use await with it, I'd suggest you switch to one of the alternative promise-based libraries recommended here. My favorite from that list is got(), but you can choose whichever one you like best. In addition, you can use fs.promises.writeFile() for promise-based file writing.
Here's how that code would look like using got():
const got = require('got');
const { URL } = require('url');
const path = require('path');
const fs = require('fs');
function getPage(url) {
console.log(` Downloading page ${url}`);
return got(url).text().then(data => {
// can't just use an URL for your filename as it contains potentially illegal
// characters for the file system
// so, add some code to create a sanitized filename here
// find just the root filename in the URL
let urlObj = new URL(url);
let filename = path.basename(urlObj.pathname);
if (!filename) {
filename = "index.html";
}
let extension = path.extname(filename);
if (!extension) {
filename += ".html";
} else if (extension === ".") {
filename += "html";
}
console.log(` Writing file ${filename}`)
return fs.promises.writeFile(filename, data);
});
}
const list = ['https://www.someurl.com/page1.html', 'https://www.someurl.com/page2.html', 'https://www.someurl.com/page3.html'];
async function main() {
for (let url of list) {
console.log(` processing ${url}`);
await getPage(url);
}
}
main().then(() => {
console.log("all done");
}).catch(console.error);
If you put real URLs in the array, this is directly runnable in nodejs. I ran it myself with my own URLs.
Summary of Changes and Improvements:
Switched from request() to got() because it's promise-based and not deprecated.
Modified getPage() to return a promise that represents the asynchronous operations in the function.
Switched to fs.promises.writeFile() so we are using only promises for asynchronous control-flow.
Added legal filename generation from the base path of the URL since you can't just use a full URL as a filename (at least in some file systems).
Switched to simpler for/of loop

Promise around event-stream mapSync() not working

Currently i am trying to create a CSV reader that can handle very large CSV files. I chose for a streaming implementation with the event-stream NPM package.
I have created a function getNextp() that should return a promise and give me the next piece of data every time i call it.
"use strict";
const fs = require('fs');
const es = require('event-stream');
const csv = require('csv-parser');
class CsvFileReader {
constructor(file) {
this.file = file;
this.isStreamReading = false;
this.stream = undefined;
}
getNextP() {
return new Promise( (resolve) => {
if (this.isStreamReading === true) {
this.stream.resume();
} else {
this.isStreamReading = true;
// Start reading the stream.
this.stream = fs.createReadStream(this.file)
.pipe(csv())
.pipe(es.mapSync( (row) => {
this.stream.pause();
resolve(row);
}))
.on('error', (err) => {
console.error('Error while reading file.', err);
})
.on("end", () => {
resolve(undefined);
})
}
});
}
}
I call this then with this code.
const csvFileReader = new CsvFileReader("small.csv");
setInterval( () => {
csvFileReader.getNextP().then( (frame) => {
console.log(frame);
})
}, 1000);
However every time when i try this out i only get the first row and the subsequent rows i do not get. I can not figure out why this it not working. I have tried the same with a good old callback function and then it works without any problem.
Update:
So what i basically want is a function (getNext()) that returns me every time when i call it the next row of the CSV. Some rows can be buffered, but yeah until now i could not figure out how to do this with streams. So if somebody could give me a pointer on how to create a correct getNext() function that would be great.
I would like to ask if somebody understands what is going wrong here, and ask kindly to share his/hers knowledge.
Thank you in advance.

Nodejs download binary octet-stream

I am trying to download (meaning create an instance of the file on the server) a .pdf file from a server that returns it to me in binary format, with:
Content-Type = application / octet-stream.
After a bit of online research I came to write:
http.get(url.parse(pdfURL), res => {
let data = [];
console.log(res.statusCode);
res.on('data', chunk => {
data.push(chunk);
}).on('end', () => {
let buffer = Buffer.concat(data);
console.log(buffer.toString('base64'));
fs.open(path, 'w', (e, fd) => {
if (e) throw e;
fs.write(fd, buffer, 0, buffer.length, null, e => {
if (e) throw e;
fs.close(fd, () => console.log('Wrote successfully'));
});
});
});
});
Everything works properly, but when I try to open the generated pdf, it tells me that the file is corrupt and not readable.
Any idea what might have been wrong?
Thanks
Edit:
I noticed that with postman everything works as it should, so I think the way I treat the binary is wrong
Ok, i got it,
I wasn't de-gzipping the response, now works properly
This didn't work for me, tried so many different ways until I found got, an npm library that handles http requests, here's what worked for me:
const stream = require('stream');
const { promisify } = require('util');
const fs = require('fs');
const got = require('got');
const pipeline = promisify(stream.pipeline);
async function downloadImage(url, name) {
await pipeline(
got.stream(url),
fs.createWriteStream(name)
);
}
More info here: https://bleext.com/post/downloading-images-with-nodejs

Question about end of request for node/JS request package

I'm trying to understand what .on('end', ...) does in the node package request.
My code:
const fs = require('fs');
const request = require('request');
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
let writeStream = fs.createWriteStream(fileName);
var remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
let stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
} catch (err) {
reject(err);
}
});
}
What I'm trying to do is download a remote image, get some file statistics, and then resolve the promise so my code can do other things.
What I'm finding is that the promise doesn't always resolve after the file has been downloaded; it may resolve a little before then. I thought that's what .on('end', ... ) was for.
What can I do to have this promise resolve after the image has been downloaded in full?
As the docs say:
The writable.write() method writes some data to the stream, and calls the supplied callback once the data has been fully handled.
So, writable.write() is asynchronous. Just because your last writeStream.write has been called does not necessarily mean that all write operations have been completed. You probably want to call the .end method, which means:
Calling the writable.end() method signals that no more data will be written to the Writable. The optional chunk and encoding arguments allow one final additional chunk of data to be written immediately before closing the stream. If provided, the optional callback function is attached as a listener for the 'finish' event.
So, try calling writeStream.end when the remoteImage request ends, and pass a callback to writeStream.end that resolves the Promise once the writing is finished:
function downloadAsset(relativeAssetURL, fileName) {
return new Promise((resolve, reject) => {
try {
const writeStream = fs.createWriteStream(fileName);
const remoteImage = request(`https:${relativeAssetURL}`);
remoteImage.on('data', function(chunk) {
writeStream.write(chunk);
});
remoteImage.on('end', function() {
writeStream.end(() => {
const stats = fs.statSync(fileName);
resolve({ fileName: fileName, stats: stats });
});
});
} catch (err) {
reject(err);
}
});
}
(also try not to mix var and let/const - in an ES6+ environment, prefer const, which is generally easier to read and has fewer problems, like hoisting)

Resources