Node JS - Cannot get stream data without pipe on promise-ftp - node.js

Hi guys I'm facing problem with my Node.js api with Express when I'm trying to get files from FTP and then send then over my API as base64.
I'm using -> promise-ftp (https://www.npmjs.com/package/promise-ftp).
This is how endpoint looks like:
getData = (req, res, next) => {
const ftp = new PromiseFtp();
let data = [];
ftp.connect({host: 'xxxl',user: 'xxx',password: 'xxx'})
.then(() => {
return ftp.get('xxx.pdf');
}).then((stream) => {
return new Promise((resolve, reject) => {
stream.once('close', resolve);
stream.once('error', reject);
stream.pipe(fs.createReadStream('test.pdf'));
stream
.on('error', (err) => {
return res.send({errorMessage: err});
})
.on('data', (chunk) => data.push(chunk))
.on('end', () => {
const buffer = Buffer.concat(data);
label = buffer.toString('base64');
return res.send(label);
});
});
}).then(() => {
return ftp.end();
});
}
The problem is that I don't want to save this file localy next to api files and when I remove line stream.pipe(fs.createReadStream('test.pdf')); it doesn't work.
I'm not sure what pipe is doing here.
May you please help me?

readable.pipe(writable) is part of Node's Stream API, which transparently writes the data that is read from the readable into the writable stream, handling backpressure for you. Piping the data to the filesystem is unnecessary, and Express Response object implements the writable stream interface so you could just pipe the stream returned from the FTP promise directly to the res object.
getData = async (req, res) => {
const ftp = new PromiseFtp();
try {
await ftp.connect({host: 'xxxl',user: 'xxx',password: 'xxx'});
const stream = await ftp.get('xxx.pdf');
res.type('pdf');
await new Promise((resolve, reject) => {
res.on('finish', resolve);
stream.once('error', reject);
stream.pipe(res);
});
} catch(e) {
console.error(e);
} finally {
await ftp.end();
}
}
If you don't have a Node version that supports async/await, here's a Promise-only version:
getData = (req, res) => {
const ftp = new PromiseFtp();
ftp
.connect({host: 'xxxl',user: 'xxx',password: 'xxx'})
.then(() => ftp.get('xxx.pdf'))
.then(stream => {
res.type('pdf');
return new Promise((resolve, reject) => {
res.on('finish', resolve);
stream.once('error', reject);
stream.pipe(res);
});
})
.catch(e => {
console.error(e);
})
.finally(() => ftp.end());
}
Here you have a good use-case for using a Promise's finally()-method or a try/catch/finally block, which will ensure that ftp.end() is called even if an error occurs or not.
Note that I've deliberately left out sending the error back to clients as doing such things could possibly leak sensitive information. A better solution is to setup proper server-side logging with request context.

Related

In node.js, why is my data not getting passed back after a asynchronous file read using a Promise

I know for sure that my pullData module is getting the data back from the file read but the function calling it, though it has an await, is not getting the data.
This is the module (./initialise.js) that reads the data:
const fs = require('fs');
const getData = () => {
return new Promise((resolve, reject) => {
fs.readFile('./Sybernika.txt',
{ encoding: 'utf8', flag: 'r' },
function (err, data) {
if (err)
reject(err);
else
resolve(data);
});
});
};
module.exports = {getData};
And this is where it gets called (app.js):
const init = require('./initialise');
const pullData = async () => {
init.getData().then((data) => {
return data;
}).catch((err) => {
console.log(err);
});
};
const start = async() => {
let data = await pullData();
console.log(data);
}
start();
putting 'console.log(data)' just before return(data) in the resolve part of the call shows the data so I know it's being read OK. However, that final console.log shows my data variabkle as being undefined.
Any suggestions?
It's either
const pullData = async () => {
return init.getData().then((data) => {
return data;
}).catch((err) => {
console.log(err);
});
};
or
const pullData = async () =>
init.getData().then((data) => {
return data;
}).catch((err) => {
console.log(err);
});
Both versions make sure a promise returned by then/catch is passed down to the caller.

Waiting for a write stream to complete before returning a function

I'm trying to grab several JSON files from Google Drive, then export them into objects within my code. I am using a write stream to create the file locally and then parse it with JSON.parse(). This is my current implementation:
function setDB(dbId, dbName) {
let database = {};
drive.files.get({
fileId: dbId,
alt: 'media'
}, {responseType: 'stream'}).then(res => {
console.log(`Writing to ${dbName}.`);
const db = fs.createWriteStream(dbName);
res.data.on('end', () => {
console.log(`Done downloading file ${dbName}.`);
}).on('error', err => {
console.error(`Error downloading file ${dbName}.`);
throw err;
}).pipe(db).on('finish', () => {
database = JSON.parse(fs.readFileSync(`./${dbName}`));
});
});
return database;
}
let variable1 = setDB('variable1Id', 'variable1.json');
let variable2 = setDB('variable2Id', 'variable2.json');
let variable3 = setDB('variable3Id', 'variable3.json');
The issue here is setDB returns undefined since the function returns its value before the file finishes downloading and the stream finishes writing into the file. I understand that I should be using some kind of async/await, but I couldn't find how or where to place those. This might also not be the best way to go about this, and I'll gladly take any advice about making a better implementation of this, or making the code cleaner or more elegant. However, the main issue here is, how do I make it so that setDB returns JSON.parse(fs.readFileSync(`./${dbName}`)) ?
I rewrote your code to return a promise that resolves on stream finish event and rejects on stream error. Your original code doesn't wait for your promise.
function async setDB(dbId, dbName) {
const db = fs.createWriteStream(dbName);
const res = await drive.files.get({
alt: 'media',
fileId: dbId,
}, {responseType: 'stream'});
console.log(`Writing to ${dbName}.`);
return new Promise((resolve, reject) => {
res.data
.on('end', () => {
console.log(`Done downloading file ${dbName}.`);
})
.on('error', (err) => {
console.error(`Error downloading file ${dbName}.`);
reject(err);
})
.pipe(db)
.on('finish', async () => {
try {
const fileContent = await fs.readFile(`./${dbName}`);
resolve(JSON.parse(fileContent));
} catch (err) {
reject(err);
}
});
});
}

How to break out of promise after number of requests-then chain in nodejs

I was trying to read a CSV file (900 000 data) and want to pass it to an POST API. But I am unable to pass the complete data as the data is huge. So I want to pause the promises after every 100k rows and make the axios call and then want to resume the promises back. So that I can reduce the load on the server.
Please correct me if something is wrong as I am very new to nodejs.
async function readcsv(path) {
return new Promise((resolve, reject) => {
let csvdata = [];
fs.createReadStream(path)
.on('error', () => {
// handle error
})
.pipe(csv())
.on('data', (row) => {
csvdata.push([evname,uid,data])
console.log(csvdata)
})
.on('end', async() => {
resolve (csvdata)
})
})
}
readcsv('filename.csv').then(data => {
let token = jwt.sign({payload:data},secrete_key)
axios.get(`${URL}?c=${comp_id}&jwt=${token}`).then(function(res){
console.log(res.data)
}).catch(e => {
console.log(e)
})
})

nodejs async await inside createReadStream

I am reading a CSV file line by line and inserting/updating in MongoDB. The expected output will be
1. console.log(row);
2. console.log(cursor);
3.console.log("stream");
But getting output like
1. console.log(row);
console.log(row); console.log(row); console.log(row); console.log(row); ............ ............
2. console.log(cursor);
3.console.log("stream");
Please let me know what i am missing here.
const csv = require('csv-parser');
const fs = require('fs');
var mongodb = require("mongodb");
var client = mongodb.MongoClient;
var url = "mongodb://localhost:27017/";
var collection;
client.connect(url,{ useUnifiedTopology: true }, function (err, client) {
var db = client.db("UKCompanies");
collection = db.collection("company");
startRead();
});
var cursor={};
async function insertRec(row){
console.log(row);
cursor = await collection.update({CompanyNumber:23}, row, {upsert: true});
if(cursor){
console.log(cursor);
}else{
console.log('not exist')
}
console.log("stream");
}
async function startRead() {
fs.createReadStream('./data/inside/6.csv')
.pipe(csv())
.on('data', async (row) => {
await insertRec(row);
})
.on('end', () => {
console.log('CSV file successfully processed');
});
}
In your startRead() function, the await insertRec() does not stop more data events from flowing while the insertRec() is processing. So, if you don't want the next data event to run until the insertRec() is done, you need to pause, then resume the stream.
async function startRead() {
const stream = fs.createReadStream('./data/inside/6.csv')
.pipe(csv())
.on('data', async (row) => {
try {
stream.pause();
await insertRec(row);
} finally {
stream.resume();
}
})
.on('end', () => {
console.log('CSV file successfully processed');
});
}
FYI, you also need some error handling if insertRec() fails.
That is expected behavior in this case because your on data listener triggers the insertRec asynchronously as and when data is available in stream. So that is why your first line of insert method is getting executed kind of in parallel. If you want to control this behavior you can use highWaterMark (https://nodejs.org/api/stream.html#stream_readable_readablehighwatermark) property while creating the read stream. This way you will get 1 record at a time but I am not sure what your use case is.
something like this
fs.createReadStream(`somefile.csv`, {
"highWaterMark": 1
})
Also you are not awaiting your startRead method. I would wrap it inside the promise and resolve it in end listener else you will not know when the processing got finished. Something like
function startRead() {
return new Promise((resolve, reject) => {
fs.createReadStream(`somepath`)
.pipe(csv())
.on("data", async row => {
await insertRec(row);
})
.on("error", err => {
reject(err);
})
.on("end", () => {
console.log("CSV file successfully processed");
resolve();
});
});
}
From Node 10+ ReadableStream got property Symbol.asyncIterator and is's allow processing stream using for-await-of
async function startRead() {
const readStream = fs.createReadStream('./data/inside/6.csv');
for await (const row of readStream.pipe(csv())) {
await insertRec(row);
}
console.log('CSV file successfully processed');
}

Is it safe to assume read stream 'end' event gets triggered after 'fs.writeFileStream' is done writing and closing a file after a pipe?

I hesitate on what is the best solution between these snippets:
simple_client({
method: 'get',
path: key,
id: 'download'
})
.then(req => new Promise((resolve, reject) => {
const write_stream = fs.createWriteStream(path);
req.on('result', (err, res) => {
if (err) {
reject(err);
}
res.pipe(write_stream);
res.on('end', () => {
console.log('Done!');
resolve();
});
});
req.end();
}))
.then(() => {
console.log(`Is it safe to touch ${path} file here?`);
});
or
simple_client({
method: 'get',
path: key,
id: 'download'
})
.then(req => new Promise((resolve, reject) => {
const write_stream = fs.createWriteStream(path);
req.on('result', (err, res) => {
if (err) {
reject(err);
}
res.pipe(write_stream);
write_stream.on('finish', () => {
console.log('Done!');
resolve();
});
});
req.end();
}))
.then(() => {
console.log(`Is it safe to touch ${path} file here?`);
});
Readable pipe documentation says:
By default, stream.end() is called on the destination Writable stream when the source Readable stream emits 'end'
So am I right to assume the file is fully written and closed when 'end' event has been emitted from the readable stream? Therefore first snippet would be good enough. Thanks for your help!

Resources