Node JS - createWriteStream - node.js

I am going crazy trying to fix this bug so please help :-)
I am using https://pdfkit.org/
This creates a stream that when finished is piped to fs.createWriteStream
My issue is the first time the code runs this works and the PDF is generated.
The next time the Code runs a file with Zero Bytes is created.
I am calling the function from an API running on express.
The issue appears to be the async nature of fs.createWriteStream.
The stream finishes after the API has returned. I cannnot seem to find a way to block while confirming the file has been created.
What is odd is that the first time the code works run again it fails:
Here is the Pipe Function;
async function _writeFile(fileObj) {
let fileStream = fs.createWriteStream(fileObj.fileName)
pipeline(
doc,
fileStream,
async (err) => {
if (err) {
console.error('PDF failed', err);
return ('Pipeline failed', err)
} else {
console.log('PDF succeeded');
}
}
)
}
This is called from:
exports.drawReport = async (payload) => {
var date = new Date();
const timeStamp = date.toJSON();
let path = './controllers/tmp/'
var fileName = path + timeStamp + '.' + payload.type + '.pdf'
try {
// Start Report
await _startReport(payload)
// Check Starting position on page & add status box header
if (device_card_reference == 260) {
await _deviceTitle(payload);
}
// Add Devices
await _reportDevice(payload);
// Call Footer for final page
await _reportFooter()
console.log("PDF Done - Writing File")
// File Meta Data
let fileObj = {
type: payload.type,
siteId: payload.siteId,
fileName: fileName,
timeStamp: timeStamp
}
// Create file to store PDF
await _writeFile(fileObj)
doc.end()
console.log("PDF MADE?")
return (fileObj)
} catch (err) {
console.error('MakePDF ERROR: ' + err.message);
return (err.message)
}
}

pipeline runs asynchronously, so it's not awaited, which is why doc.end() runs before the file is done
try wrapping pipeline in a promise, and then resolve when the stream is done:
// function that returns a promise
function _writeFile(fileObj) {
return new Promise((resolve, reject) => {
const fileStream = fs.createWriteStream(fileObj.fileName);
pipeline(
doc,
fileStream,
async(err) => {
if (err) {
console.error('PDF failed', err);
// err, handle in `.catch`
reject({res:'Pipeline failed', err});
} else {
console.log('PDF succeeded');
// done, resolve, to move to doc.end
resolve('PDF succeeded');
}
}
)
});
}
add .catch() to handle error:
// Create file to store PDF
await _writeFile(fileObj).catch(err => console.log(err));
or even better, use stream promises API
const {pipeline } = require('stream/promises');
async function _writeFile(fileObj) {
const fileStream = fs.createWriteStream(fileObj.fileName);
await pipeline(doc, fileStream);
console.log('PDF succeeded');
}

Related

multiple IF conditions execute one by one in nodejs using Async/await

How can i run this if statements synchronously. I have been trying many times but unable to fixed this. (I am nodejs beginner).
i am trying to use async/await here but it is not work.
how can i check first if condition is completed and then second if statement will run!
Please help.
here is my dummy codes:
record1='10';
record2='20';
function main(){
if(record1){
console.log('-------------------------');
console.log('I am record 1')
val='John',
firstJob(val)
}
if(record2){
console.log('I am record 2')
val='Rahul',
firstJob(val)
}
}
async function firstJob(val){
console.log('Hello, I am ' + val)
await secondJob(val);
}
async function secondJob(val){
console.log(val+ ' is a nodeJs beginner!')
await listFiles()
}
function thirdJob(arg){
if (arg='pass'){
console.log('This is end of the one if condition')
}
}
function listFiles (){
return new Promise((resolve, reject) => {
setTimeout(() => {
const path = require('path');
const fs = require('fs');
const { exit } = require('process');
const directoryPath = path.join(__dirname, '../');
console.log('List of Avaialble Files :');
fs.readdir(directoryPath, { withFileTypes: true },function (err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function (file) {
if (file.isFile()){
console.log(file);
}
});
});
arg='pass'
thirdJob();
}, 2000)
})
}
main();
The short answer is you can't "make them run synchronously".
You have to patiently wait until they're done to get the answer.
So, without making main async, you have to use the promises the old fashioned way, and sequence the actions using then.
record1='10';
record2='20';
function main(){
Promise.resolve()
.then(() => {
if(record1){
console.log('-------------------------');
console.log('I am record 1');
val='John';
return firstJob(val)
}
})
.then(() => {
if(record2){
console.log('I am record 2')
val='Rahul';
return firstJob(val)
}
});
}
async function firstJob(val){
console.log('Hello, I am ' + val)
await secondJob(val);
}
async function secondJob(val){
console.log(val+ ' is a nodeJs beginner!')
await listFiles()
}
main();
I've just included the snippet for the if and promise stuff. The gist here is that you conditionally chain together your calls to firstJob.
Each call to then allows you to (potentially, it's not required) attach another promise to the execution of the one that just finished. In the snippet above, we're doing that only if the condition is truthy by returning the promise from the calls to firstJob.
By the way, your implementation of listFiles isn't ever going to finish because you never invoke resolve to the promise you made inside the function. This solves the problem by resolving your promise once the looping is done.
function listFiles (){
return new Promise((resolve, reject) => {
setTimeout(() => {
const path = require('path');
const fs = require('fs');
const { exit } = require('process');
const directoryPath = path.join(__dirname, '../');
console.log('List of Avaialble Files :');
fs.readdir(directoryPath, { withFileTypes: true },function (err, files) {
if (err) {
console.log('Unable to scan directory: ' + err);
reject(err);
}
files.forEach(function (file) {
if (file.isFile()){
console.log(file);
}
});
resolve();
});
arg='pass'
thirdJob();
}, 2000)
})
}
Note the added call to resolve once you've completed your loop.
I also added a call to reject in the case that readdir returned an error, since that is the proper way to propagate it with your manual promise.
A few more pointers, generally modules are required once at the top of the file, instead of dynamically inside of a function. The penalty for doing it that way you have is not bad, there is a cache for required modules. It's just not idiomatic.
if (arg='pass'){
Doesn't do any equality check, that's an assignment, you need ==, or === if you want to check for equality.

Using fs.read inside promise does not work

I am trying to do a fs.read after the promise job is done by using the .then()
Here is how my code looks like
(async () => {
const feed = await parser.parseURL('https://www.nasa.gov/rss/dyn/breaking_news.rss');
console.log(feed.title);
const items = [];
await Promise.all(feed.items.map(async (currentItem) => {
// some code here to create data
items.push(data);
})).then(
items.forEach((element) => {
const file = downloadFile(element.url);
let checksumValue;
try {
fs.readFileSync(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
} catch (error) {
console.error(error);
// expected output: ReferenceError: nonExistentFunction is not defined
// Note - error messages will vary depending on browse
}
})(),
);
})();
But it doesn't operate this piece of code :
fs.readFileSync(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
How should I read the file?
fs.readFileSync is sync, so it doesn't take a callback.
Either use the non-sync version:
fs.readFile(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
or use it as intended:
const data = fs.readFileSync(file);
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);

Not getting the expected data from file awlays

Using node.js, I have created a 'search(string)' function which sends API request, captures the response payload object and write it to a file.
In my test file when I call that function in two different tests with different parameters and reading the saved response. My assertion is failing sometimes and I could see that data comparison from the file is mixing up from the first function calling with 2nd function calling data or vice versa.
Looks like sometimes reading file is happening before writing to the file. Any thoughts?
Code example:
client.js:
const search = function(t)
{
fetch(`http://www.example.com/?t=${t}&apikey=${apiKey}`)
.then(function (response) {
return response.json();
})
.then( ( function(data){
writeData(data)
}))
.catch(function (err) {
console.log('error: ' + err);
});
function writeData(data)
{
let result = JSON.stringify(data);
fs.writeFileSync('./filepath/resultData.json', result)
}
--------------------------------------------------------------------------
test.js
function 1()
{
search('A')
rawData = fs.readFileSync('./filepath/resultData.json')
r = JSON.parse(rawData);
console.log(r.Title)
}
function 2()
{
search('B')
rawData = fs.readFileSync('./filepath/resultData.json')
r = JSON.parse(rawData);
console.log(r.Title)
}
sometimes I get the same value for r.title and sometimes different.
Your search function has an async fetch call call, which resolves way after your test runs... Have your function return the promise and use the .then to check for validity of data.
const search = function(t) {
return fetch(`http://www.example.com/?t=${t}&apikey=${apiKey}`)
.then(function(response) {
return response.json();
})
.then((function(data) {
writeData(data);
return data; // not really required in your case, but to make it work in a more "promise like" fashion.
}))
.catch(function(err) {
console.log('error: ' + err);
return error;
});
}
function writeData(data) {
let result = JSON.stringify(data);
fs.writeFileSync('./filepath/resultData.json', result);
}
--------------------------------------------------------------------------
test.js
function 1() {
search('A').then(() => {
rawData = fs.readFileSync('./filepath/resultData.json')
r = JSON.parse(rawData);
console.log(r.Title);
});
}
function 2() {
search('B').then(() => {
rawData = fs.readFileSync('./filepath/resultData.json')
r = JSON.parse(rawData);
console.log(r.Title)
});
}
The search method calls fetchwhich is executed asynchrously. This means that the code reading the data might be executed before the result of the fetch has been written to the file.
This is a race condition, it is not predictable which happens first, writing or reading the data.
function 1()
{
search('A') // Fetch data is executed, data gets written in `then` block
// which means that the data gets written once fetching the
// data has been completed.
// The time this operation needs depends on the network
// speed and server load for example.
// The code below might get executed before the fetch has
// finished and the data has been written to disk
rawData = fs.readFileSync('./filepath/resultData.json')
r = JSON.parse(rawData);
console.log(r.Title)
}
One way to solve this is to re-structure the flow so that function 1 and function 2 are passed as callback to the search function as follows (in fact, it is not necessary to have function 2 in this case). Additionally writing/reading to/from disk can be omitted as well by passing the data directly to function1 when calling it as callback inside the search function:
const search = function(t, callback)
{
fetch(`http://www.example.com/?t=${t}&apikey=${apiKey}`)
.then(function (response) {
return response.json();
})
.then( ( function(data){
callback(data)
}))
.catch(function (err) {
console.log('error: ' + err);
});
--------------------------------------------------------------------------
test.js
function function1(data)
{
r = JSON.parse(rawData);
console.log(r.Title)
}
search('A', function1)
search('B', function1)

How to wait for a stream to finish piping? (Nodejs)

I have a for loop array of promises, so I used Promise.all to go through them and called then afterwards.
let promises = [];
promises.push(promise1);
promises.push(promise2);
promises.push(promise3);
Promise.all(promises).then((responses) => {
for (let i = 0; i < promises.length; i++) {
if (promise.property === something) {
//do something
} else {
let file = fs.createWriteStream('./hello.pdf');
let stream = responses[i].pipe(file);
/*
I WANT THE PIPING AND THE FOLLOWING CODE
TO RUN BEFORE NEXT ITERATION OF FOR LOOP
*/
stream.on('finish', () => {
//extract the text out of the pdf
extract(filePath, {splitPages: false}, (err, text) => {
if (err) {
console.log(err);
} else {
arrayOfDocuments[i].text_contents = text;
}
});
});
}
}
promise1, promise2, and promise3 are some http requests, and if one of them is an application/pdf, then I write it to a stream and parse the text out of it. But this code runs the next iteration before parsing the test out of the pdf. Is there a way to make the code wait until the piping to the stream and extracting are finished before moving on to the next iteration?
Without async/await, it's quite nasty. With async/await, just do this:
Promise.all(promises).then(async (responses) => {
for (...) {
await new Promise(fulfill => stream.on("finish", fulfill));
//extract the text out of the PDF
}
})
Something like the following would also work. I use this pattern fairly often:
let promises = [];
promises.push(promise1);
promises.push(promise2);
promises.push(promise3);
function doNext(){
if(!promises.length) return;
promises.shift().then((resolved) =>{
if(resolved.property === something){
...
doNext();
}else{
let file = fs.createWriteStream('./hello.pdf');
let stream = resolved.pipe(file);
stream.on('finish', () =>{
...
doNext();
});
}
})
}
doNext();
or break up the handler to a controller and Promisified handler:
function streamOrNot(obj){
return new Promise(resolve, reject){
if(obj.property === something){
resolve();
return;
}
let file = fs.createWriteStream...;
stream.on('finish', () =>{
...
resolve();
});
}
}
function doNext(){
if(!promises.length) return;
return promises.shift().then(streamOrNot).then(doNext);
}
doNext()
Use await with stream.pipeline() instead of stream.pipe():
import * as StreamPromises from "stream/promises";
...
await StreamPromises.pipeline(sourceStream, destinationStream);
You can write the else part inside a self invoked function. So that the handling of stream will happen in parallel
(function(i) {
let file = fs.createWriteStream('./hello.pdf');
let stream = responses[i].pipe(file);
/*
I WANT THE PIPING AND THE FOLLOWING CODE
TO RUN BEFORE NEXT ITERATION OF FOR LOOP
*/
stream.on('finish', () => {
//extract the text out of the pdf
extract(filePath, {splitPages: false}, (err, text) => {
if (err) {
console.log(err);
}
else {
arrayOfDocuments[i].text_contents = text;
}
});
});
})(i)
Else you can handle the streaming part as part of the original/individual promise itself.
As of now you are creating the promise and adding it to array, instead of that you add promise.then to the array(which is also a promise). And inside the handler to then you do your streaming stuff.

Async/Await not waiting

I'm running into an issue which I don't fully understand. I feel like there are likely concepts which I haven't grasped, code that could be optimized, and possibly a bug thrown in for good measure.
To greatly simplify the overall flow:
A request is made to an external API
The returned JSON object is parsed and scanned for link references
If any link references are found, additional requests are made to populate/replace link references with real JSON data
Once all link references have been replaced, the original request is returned and used to build content
Here, is the original request (#1):
await Store.get(Constants.Contentful.ENTRY, Contentful[page.file])
Store.get is represented by:
async get(type, id) {
return await this._get(type, id);
}
Which calls:
_get(type, id) {
return new Promise(async (resolve, reject) => {
var data = _json[id] = _json[id] || await this._api(type, id);
console.log(data)
if(isAsset(data)) {
resolve(data);
} else if(isEntry(data)) {
await this._scan(data);
resolve(data);
} else {
const error = 'Response is not entry/asset.';
console.log(error);
reject(error);
}
});
}
The API call is:
_api(type, id) {
return new Promise((resolve, reject) => {
Request('http://cdn.contentful.com/spaces/' + Constants.Contentful.SPACE + '/' + (!type || type === Constants.Contentful.ENTRY ? 'entries' : 'assets') + '/' + id + '?access_token=' + Constants.Contentful.PRODUCTION_TOKEN, (error, response, data) => {
if(error) {
console.log(error);
reject(error);
} else {
data = JSON.parse(data);
if(data.sys.type === Constants.Contentful.ERROR) {
console.log(data);
reject(data);
} else {
resolve(data);
}
}
});
});
}
When an entry is returned, it is scanned:
_scan(data) {
return new Promise((resolve, reject) => {
if(data && data.fields) {
const keys = Object.keys(data.fields);
keys.forEach(async (key, i) => {
var val = data.fields[key];
if(isLink(val)) {
var child = await this._get(val.sys.linkType.toUpperCase(), val.sys.id);
this._inject(data.fields, key, undefined, child);
} else if(isLinkArray(val)) {
var children = await* val.map(async (link) => await this._get(link.sys.linkType.toUpperCase(), link.sys.id));
children.forEach((child, index) => {
this._inject(data.fields, key, index, child);
});
} else {
await new Promise((resolve) => setTimeout(resolve, 0));
}
if(i === keys.length - 1) {
resolve();
}
});
} else {
const error = 'Required data is unavailable.';
console.log(error);
reject(error);
}
});
}
If link references are found, additional requests are made and then the resulting JSON is injected into the original JSON in place of the reference:
_inject(fields, key, index, data) {
if(isNaN(index)) {
fields[key] = data;
} else {
fields[key][index] = data;
}
}
Notice, I'm using async, await, and Promise's I believe in their intended manor. What ends up happening: The calls for referenced data (gets resulting of _scan) end up occurring after the original request is returned. This ends up providing incomplete data to the content template.
Additional information concerning my build setup:
npm#2.14.2
node#4.0.0
webpack#1.12.2
babel#5.8.34
babel-loader#5.4.0
I believe the issue is in your forEach call in _scan. For reference, see this passage in Taming the asynchronous beast with ES7:
However, if you try to use an async function, then you will get a more subtle bug:
let docs = [{}, {}, {}];
// WARNING: this won't work
docs.forEach(async function (doc, i) {
await db.post(doc);
console.log(i);
});
console.log('main loop done');
This will compile, but the problem is that this will print out:
main loop done
0
1
2
What's happening is that the main function is exiting early, because the await is actually in the sub-function. Furthermore, this will execute each promise concurrently, which is not what we intended.
The lesson is: be careful when you have any function inside your async function. The await will only pause its parent function, so check that it's doing what you actually think it's doing.
So each iteration of the forEach call is running concurrently; they're not executing one at a time. As soon as the one that matches the criteria i === keys.length - 1 finishes, the promise is resolved and _scan returns, even though other async functions called via forEach are still executing.
You would need to either change the forEach to a map to return an array of promises, which you can then await* from _scan (if you want to execute them all concurrently and then call something when they're all done), or execute them one-at-a-time if you want them to execute in sequence.
As a side note, if I'm reading them right, some of your async functions can be simplified a bit; remember that, while awaiting an async function call returns a value, simply calling it returns another promise, and returning a value from an async function is the same as returning a promise that resolves to that value in a non-async function. So, for example, _get can be:
async _get(type, id) {
var data = _json[id] = _json[id] || await this._api(type, id);
console.log(data)
if (isAsset(data)) {
return data;
} else if (isEntry(data)) {
await this._scan(data);
return data;
} else {
const error = 'Response is not entry/asset.';
console.log(error);
throw error;
}
}
Similarly, _scan could be (assuming you want the forEach bodies to execute concurrently):
async _scan(data) {
if (data && data.fields) {
const keys = Object.keys(data.fields);
const promises = keys.map(async (key, i) => {
var val = data.fields[key];
if (isLink(val)) {
var child = await this._get(val.sys.linkType.toUpperCase(), val.sys.id);
this._inject(data.fields, key, undefined, child);
} else if (isLinkArray(val)) {
var children = await* val.map(async (link) => await this._get(link.sys.linkType.toUpperCase(), link.sys.id));
children.forEach((child, index) => {
this._inject(data.fields, key, index, child);
});
} else {
await new Promise((resolve) => setTimeout(resolve, 0));
}
});
await* promises;
} else {
const error = 'Required data is unavailable.';
console.log(error);
throw error;
}
}

Resources