readable stream on data event losing first/last character from chunk - node.js

I am using ssh2-sftp-client for nodeJS to connect to sftp server and get a file
But I've meet a problem, when are more chunks than one, the data is not received correctly, it loses one character between chunks:
ex.: file on sftp has 'some string from sftp file'
and if it is splitted in chunks, the received data will be like:
first chunk : 'some string f'
second chink: 'om sftp file'
in this example 'r' is lost
const getFile = readable => new Promise((resolve, reject) => {
let file = '';
readable.on('data', (chunk) => { file += chunk; });
readable.on('end', () => resolve(file));
readable.on('error', reject);
});
const readable = await sftp.get(fileName, false);
sftp.get() return NodeJS.ReadableStream
Does someone meet same problem?

Is it more advisable to store chunks as a array and then concat them into a buffer and get the string value on end:
const fs = require('fs');
const getFile = readable => new Promise((resolve, reject) => {
const file = [];
readable.on('data', chunk => file.push(Buffer.from(chunk, 'base64')));
readable.on('end', () => resolve(Buffer.concat(file).toString()));
readable.on('error', reject);
});
const readable = fs.createReadStream('package.json');
getFile(readable).then(file => console.log('file', file));
Runned it locally and getting the content of my file as expected.

After a long research I found the problem, in module sftp-stream, for readable stream highWaterMark is seted to 64*1024, and the bug is that if a chunk has 64*1024 bites then one bite is lost. And I just setted watermark to 64*1024-1 .

Related

How to get a readStream to point at a different file in a looped promise iteration

I'm trying to parse multiple CSV files into one JSON file.
My problem is that that I cannot reuse the read stream as it does not update the file it's reading from. So for each iteration of the loop, I keep reading the first csv file passed to it.
Here is the code I'm using to read all CSV files in a directory, and then wrapping an instance of createReadStream in a Promise, awaiting the result and then moving on to parse the next CSV file. (It does not read the values of the next CSV file though, it just returns the result of the first CSV file passed to it each time)
Here is the code I'm using
const path = require("path");
const fs = require("fs");
const parse = require("csv-parse");
const parser = parse({
delimiter: ","
});
function parseCSVFile(file) {
return new Promise((resolve, reject) =>
fs
.createReadStream(file)
.pipe(parser)
.on("data", (data) => {
console.log(data);
})
.on("end", () => {
resolve();
})
.on("error", (error) => reject(error))
)
}
async () => {
const csvFiles = fs.readdirSync(path.join(__dirname, "/csvFiles"));
for (let file of csvFiles) {
await parseCSVFile(path.join(file);
}
})();
Any help/advice is much appreciated!

How do decode base64 file when reading from GridFS via Node?

I'm trying to read a file encoded in base64 from a MongoDB GridFS collection using Node. I have been able to get the file saved from MongoDB to my local machine, but it's in base64 format and I want to save it unencoded.
Ideally I would like to decode the file "on-the-fly" without having to save once, to then read > decode > write it back to the filesystem.
My code currently looks like this...
return new Promise(async (resolve, reject) => {
let bucket = new mongodb.GridFSBucket(db, {bucketName: 'Binaries'});
let objectID = new mongodb.ObjectID(fileID);
// create the download stream
bucket.openDownloadStream(objectID)
.once('error', async (error) => {
reject(error);
})
.once('end', async () => {
resolve(downloadPath);
})
// pipe the file to the stream
.pipe(fs.createWriteStream(downloadPath));
});
Any ideas?
Just in case anyone else is looking at this, here's where I landed...
return new Promise(async (resolve, reject) => {
let bucket = new mongodb.GridFSBucket(db, {
bucketName: 'Binaries'
});
let objectID = new mongodb.ObjectID(fileInformation._id);
// temporary variable to hold image
var data = [];
// create the download stream
let downloadStream = bucket.openDownloadStream(objectID);
downloadStream.on('data', (chunk) => {
data.push(chunk);
});
downloadStream.on('error', async (error) => {
reject(error);
});
downloadStream.on('end', async () => {
// convert from base64 and write to file system
let bufferBase64 = Buffer.concat(data)
let bufferDecoded = Buffer.from(bufferBase64.toString(), 'base64');
fs.writeFileSync(fileName, bufferDecoded, 'binary');
resolve(fileName);
});
});
Node has a built-in buffer parser Buffer.from(string[, encoding]) that you can pass the base64 encoded string to it, and get a stream of bytes from the other side, which you can convert it .toString() easily afterwards.
Ex.
let whatYouNeed = Buffer.from(gridFsData, 'base64').toString();
More about Buffer.from() function here.

Nodejs download binary octet-stream

I am trying to download (meaning create an instance of the file on the server) a .pdf file from a server that returns it to me in binary format, with:
Content-Type = application / octet-stream.
After a bit of online research I came to write:
http.get(url.parse(pdfURL), res => {
let data = [];
console.log(res.statusCode);
res.on('data', chunk => {
data.push(chunk);
}).on('end', () => {
let buffer = Buffer.concat(data);
console.log(buffer.toString('base64'));
fs.open(path, 'w', (e, fd) => {
if (e) throw e;
fs.write(fd, buffer, 0, buffer.length, null, e => {
if (e) throw e;
fs.close(fd, () => console.log('Wrote successfully'));
});
});
});
});
Everything works properly, but when I try to open the generated pdf, it tells me that the file is corrupt and not readable.
Any idea what might have been wrong?
Thanks
Edit:
I noticed that with postman everything works as it should, so I think the way I treat the binary is wrong
Ok, i got it,
I wasn't de-gzipping the response, now works properly
This didn't work for me, tried so many different ways until I found got, an npm library that handles http requests, here's what worked for me:
const stream = require('stream');
const { promisify } = require('util');
const fs = require('fs');
const got = require('got');
const pipeline = promisify(stream.pipeline);
async function downloadImage(url, name) {
await pipeline(
got.stream(url),
fs.createWriteStream(name)
);
}
More info here: https://bleext.com/post/downloading-images-with-nodejs

Node download and save gif

I want to download a gif from the net and save it in local drive.
So far I have:
1) getFile function that returns the gif as Buffer:
import fs from 'fs'
import https from 'https'
let url = '//some gif url'
const getFile = function(url) {
return new Promise((res, rej) =>{
https.get(url, (resp) => {
let data = [];
resp.on('data', (chunk) => {
data.push(chunk);
});
resp.on('end', () => {
var buffer = Buffer.concat(data);
res(buffer)
});
}).on("error", (err) => {
rej(err);
});
})
}
2) Saving function:
async function save() {
let data = await getFile(url)
fs.writeFile('./test.gif', data, (e, r) => {
console.log(e,r) // null, undefined
})
}
save()
The file is saved and have contents but when i try to open it it says: Could not load image....
I tried to convert the output as utf8 encoding but it gives me the same result.
NOTE: I want to make it work with raw nodejs without 3-party tools or "pipe" to understand the working parts.
I guess the problem is with encoding but i could't figure what exactly it is.
Thanks

No end event when piping inside "open"

I am piping a download into a file, but wanting to make sure the file doesn't already exist. I've put the code up here for an easier exploration: https://tonicdev.com/tolmasky/streaming-piping-on-open-tester <-- this will show you the outputs (code also below inline).
So the thing is, it seems to work fine except for the done (end) event. The file ends up on the hard drive fine, each step is followed correctly (the structure is to ensure no "parallel" steps happen that aren't necessary -- if I do got.stream(url).pipe(fs.createWriteStream({ flags: ... })), then the download will actually get kicked off even if the createWriteStream returns an error because the file is already there -- undesirable for the network).
The code is the following:
var fs = require("fs");
var got = require("got");
await download("https://www.apple.com", "./index.html");
function download(aURL, aDestinationFilePath)
{
return new Promise(function(resolve, reject)
{
fs.createWriteStream(aDestinationFilePath, { flags: "wx" })
.on("open", function()
{
const writeStream = this;
console.log("SUCCESSFULLY OPENED!");
got.stream(aURL)
.on("response", function(aResponse)
{
const contentLength = +aResponse.headers["content-length"] || 0;
console.log(aResponse.headers);
console.log("STARTING DOWNLOAD! " + contentLength);
this.on("data", () => console.log("certainly getting data"))
this.pipe(writeStream)
.on("error", reject)
.on("end", () => console.log("DONE!"))
.on("end", resolve);
})
})
.on("error", function(anError)
{
if (anError.code === "EEXIST") { console.log("oh");
resolve();}
else
reject(anError);
});
});
}
According to the stream docs, readable.pipe returns the destination Writable stream, and the correct event emitted when a Writable is done would be Event: 'finish'.

Resources