Nodejs download binary octet-stream - node.js

I am trying to download (meaning create an instance of the file on the server) a .pdf file from a server that returns it to me in binary format, with:
Content-Type = application / octet-stream.
After a bit of online research I came to write:
http.get(url.parse(pdfURL), res => {
let data = [];
console.log(res.statusCode);
res.on('data', chunk => {
data.push(chunk);
}).on('end', () => {
let buffer = Buffer.concat(data);
console.log(buffer.toString('base64'));
fs.open(path, 'w', (e, fd) => {
if (e) throw e;
fs.write(fd, buffer, 0, buffer.length, null, e => {
if (e) throw e;
fs.close(fd, () => console.log('Wrote successfully'));
});
});
});
});
Everything works properly, but when I try to open the generated pdf, it tells me that the file is corrupt and not readable.
Any idea what might have been wrong?
Thanks
Edit:
I noticed that with postman everything works as it should, so I think the way I treat the binary is wrong

Ok, i got it,
I wasn't de-gzipping the response, now works properly

This didn't work for me, tried so many different ways until I found got, an npm library that handles http requests, here's what worked for me:
const stream = require('stream');
const { promisify } = require('util');
const fs = require('fs');
const got = require('got');
const pipeline = promisify(stream.pipeline);
async function downloadImage(url, name) {
await pipeline(
got.stream(url),
fs.createWriteStream(name)
);
}
More info here: https://bleext.com/post/downloading-images-with-nodejs

Related

Using fs.read inside promise does not work

I am trying to do a fs.read after the promise job is done by using the .then()
Here is how my code looks like
(async () => {
const feed = await parser.parseURL('https://www.nasa.gov/rss/dyn/breaking_news.rss');
console.log(feed.title);
const items = [];
await Promise.all(feed.items.map(async (currentItem) => {
// some code here to create data
items.push(data);
})).then(
items.forEach((element) => {
const file = downloadFile(element.url);
let checksumValue;
try {
fs.readFileSync(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
} catch (error) {
console.error(error);
// expected output: ReferenceError: nonExistentFunction is not defined
// Note - error messages will vary depending on browse
}
})(),
);
})();
But it doesn't operate this piece of code :
fs.readFileSync(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
How should I read the file?
fs.readFileSync is sync, so it doesn't take a callback.
Either use the non-sync version:
fs.readFile(file, (_err, data) => {
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);
});
or use it as intended:
const data = fs.readFileSync(file);
checksumValue = generateChecksum(data);
console.log(`The checksum is: ${checksumValue}`);
// Delete the downloaded file
deleteFile(file);

Converting HTML to PDF buffer in Nodejs

I am trying to convert an HTML code that is returned by the "returnDefaultOfferLetter" function here into PDF buffer(that I will use for sending attachments in a mail) using html-pdf package. So, the problem is it works on localhost but on AWS elastic beanstalk server it throws me ASSERTION ERROR. So after some research, I got to know I need to specify phantomPath. I tried everything I could, but I haven't got any solution.
BTW one week before it was working on AWS, so don't know what's wrong now. Help me in finding some solution or suggest me any method or package to convert HTML into pdf BUFFER. (Please, don't ignore buffer)
const htmlToBase64Pdf = (req, res) => {
const promise = new Promise((resolve, reject) => {
const offerLetterHTML = returnDefaultOfferLetter(req.body).toString(
"utf8"
);
const pdfOptions = {
format: "A3",
phantomPath: "../../node_modules/phantomjs-prebuilt/bin/phantomjs",
};
pdf.create(offerLetterHTML, pdfOptions).toBuffer(function (
err,
buffer
) {
if (err) {
// console.log("err", err);
reject(err);
} else {
// console.log("buffer", buffer);
const base64Attachment = buffer.toString("base64");
resolve(base64Attachment);
}
});
});
promise
.then((resp) => res.send(resp))
.catch((e) => {
res.send(e);
});
};

Node download and save gif

I want to download a gif from the net and save it in local drive.
So far I have:
1) getFile function that returns the gif as Buffer:
import fs from 'fs'
import https from 'https'
let url = '//some gif url'
const getFile = function(url) {
return new Promise((res, rej) =>{
https.get(url, (resp) => {
let data = [];
resp.on('data', (chunk) => {
data.push(chunk);
});
resp.on('end', () => {
var buffer = Buffer.concat(data);
res(buffer)
});
}).on("error", (err) => {
rej(err);
});
})
}
2) Saving function:
async function save() {
let data = await getFile(url)
fs.writeFile('./test.gif', data, (e, r) => {
console.log(e,r) // null, undefined
})
}
save()
The file is saved and have contents but when i try to open it it says: Could not load image....
I tried to convert the output as utf8 encoding but it gives me the same result.
NOTE: I want to make it work with raw nodejs without 3-party tools or "pipe" to understand the working parts.
I guess the problem is with encoding but i could't figure what exactly it is.
Thanks

No end event when piping inside "open"

I am piping a download into a file, but wanting to make sure the file doesn't already exist. I've put the code up here for an easier exploration: https://tonicdev.com/tolmasky/streaming-piping-on-open-tester <-- this will show you the outputs (code also below inline).
So the thing is, it seems to work fine except for the done (end) event. The file ends up on the hard drive fine, each step is followed correctly (the structure is to ensure no "parallel" steps happen that aren't necessary -- if I do got.stream(url).pipe(fs.createWriteStream({ flags: ... })), then the download will actually get kicked off even if the createWriteStream returns an error because the file is already there -- undesirable for the network).
The code is the following:
var fs = require("fs");
var got = require("got");
await download("https://www.apple.com", "./index.html");
function download(aURL, aDestinationFilePath)
{
return new Promise(function(resolve, reject)
{
fs.createWriteStream(aDestinationFilePath, { flags: "wx" })
.on("open", function()
{
const writeStream = this;
console.log("SUCCESSFULLY OPENED!");
got.stream(aURL)
.on("response", function(aResponse)
{
const contentLength = +aResponse.headers["content-length"] || 0;
console.log(aResponse.headers);
console.log("STARTING DOWNLOAD! " + contentLength);
this.on("data", () => console.log("certainly getting data"))
this.pipe(writeStream)
.on("error", reject)
.on("end", () => console.log("DONE!"))
.on("end", resolve);
})
})
.on("error", function(anError)
{
if (anError.code === "EEXIST") { console.log("oh");
resolve();}
else
reject(anError);
});
});
}
According to the stream docs, readable.pipe returns the destination Writable stream, and the correct event emitted when a Writable is done would be Event: 'finish'.

How to untar file in node.js

There are some untar libraries, but I cannot get them working.
My idea would be something like
untar(bufferStreamOrFilePath).extractToDirectory("/path", function(err){})
Is something like this available?
Just an update on this answer, instead of node-tar, consider using tar-fs which yields a significant performance boost, as well as a neater interface.
var tarFile = 'my-other-tarball.tar';
var target = './my-other-directory';
// extracting a directory
fs.createReadStream(tarFile).pipe(tar.extract(target));
The tar-stream module is a pretty good one:
var tar = require('tar-stream')
var fs = require('fs')
var extract = tar.extract();
extract.on('entry', function(header, stream, callback) {
// make directories or files depending on the header here...
// call callback() when you're done with this entry
});
fs.createReadStream("something.tar").pipe(extract)
extract.on('finish', function() {
console.log('done!')
});
A function to extract a base64 encoded tar fully in memory, with the assumption that all the files in the tar are utf-8 encoded text files.
const tar=require('tar');
let Duplex = require('stream').Duplex;
function bufferToStream(buffer) {
let stream = new Duplex();
stream.push(buffer);
stream.push(null);
return stream;
}
module.exports=function(base64EncodedTar){
return new Promise(function(resolve, reject){
const buffer = new Buffer.from(base64EncodedTar, "base64");
let files={};
try{
bufferToStream(buffer).pipe(new tar.Parse())
.on('entry', entry => {
let file={
path:entry.path,
content:""
};
files[entry.path]=file;
entry.on('data', function (tarFileData) {
file.content += tarFileData.toString('utf-8');
});
// resolve(entry);
}).on('close', function(){
resolve(files);
});
} catch(e){
reject(e);
}
})
};
Expanding on the tar-stream answer, that seemed to be the simplest to use in the browser with jest-tested code. Comparing the node libraries based on my trying to implement a project:
tar: Fairly easy to use on files, or buffers as in or Gudlaugur Egilsson's answer. It had some annoying webpack polyfill issues that I had trouble with when putting it into a react app.
js-untar: This was pretty annoying to set up for jest testing because it uses web workers and blob URLs, which jest does not directly support. I didn't proceed to getting it working in the browser, though it may work fine there. In order to get jest tests working, I had to use jsdom-worker-fix, and it was very slow in that environment. (It may be faster in-browser.)
tar-stream combined with gunzip-maybe seems to be fairly performant in browser, and doesn't have any issues with being used in jest tests. Worked fine on multi-hundred megabyte tarballs I tried.
This code extracts a tar or tar.gz stream:
var tar = require("tar-stream");
const gunzip = require("gunzip-maybe");
exports.unTar = (tarballStream) => {
const results = {};
return new Promise((resolve, reject) => {
var extract = tar.extract();
extract.on("entry", async function (header, stream, next) {
const chunks = [];
for await (let chunk of stream) {
chunks.push(chunk);
}
results[header.name] = Buffer.concat(chunks);
next();
});
extract.on("finish", function () {
resolve(results);
});
tarballStream.pipe(gunzip()).pipe(extract);
});
};
Then in browser, you can use readable-web-to-node-stream to process a tarball fetched in the browser.
const { ReadableWebToNodeStream } = require("readable-web-to-node-stream");
const response = await fetch(url, headers);
const extracted = await unTar(new ReadableWebToNodeStream(response.body));

Resources