Downloading images with node.js [closed] - node.js

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
I'm trying to write a script to download images using node.js. This is what I have so far:
var maxLength = 10 // 10mb
var download = function(uri, callback) {
http.request(uri)
.on('response', function(res) {
if (res.headers['content-length'] > maxLength*1024*1024) {
callback(new Error('Image too large.'))
} else if (!~[200, 304].indexOf(res.statusCode)) {
callback(new Error('Received an invalid status code.'))
} else if (!res.headers['content-type'].match(/image/)) {
callback(new Error('Not an image.'))
} else {
var body = ''
res.setEncoding('binary')
res
.on('error', function(err) {
callback(err)
})
.on('data', function(chunk) {
body += chunk
})
.on('end', function() {
// What about Windows?!
var path = '/tmp/' + Math.random().toString().split('.').pop()
fs.writeFile(path, body, 'binary', function(err) {
callback(err, path)
})
})
}
})
.on('error', function(err) {
callback(err)
})
.end();
}
I, however, want to make this more robust:
Are there libraries that do this and do this better?
Is there a chance that response headers lie (about length, about content type)?
Are there any other status codes I should care about? Should I bother with redirects?
I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
How can I get this to work on windows?
Any other ways you can make this script better?
Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.

I'd suggest using the request module. Downloading a file is as simple as the following code:
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
console.log('done');
});

I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.
var http = require('http'),
Stream = require('stream').Transform,
fs = require('fs');
var url = 'http://www.google.com/images/srpr/logo11w.png';
http.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync('image.png', data.read());
});
}).end();
The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.
*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.

You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:
npm i axios
Then, you can use the following basic example to begin downloading images:
const fs = require('fs');
const axios = require('axios');
/* ============================================================
Function: Download Image
============================================================ */
const download_image = (url, image_path) =>
axios({
url,
responseType: 'stream',
}).then(
response =>
new Promise((resolve, reject) => {
response.data
.pipe(fs.createWriteStream(image_path))
.on('finish', () => resolve())
.on('error', e => reject(e));
}),
);
/* ============================================================
Download Images in Order
============================================================ */
(async () => {
let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');
console.log(example_image_1.status); // true
console.log(example_image_1.error); // ''
let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');
console.log(example_image_2.status); // false
console.log(example_image_2.error); // 'Error: Request failed with status code 404'
let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');
console.log(example_image_3.status); // true
console.log(example_image_3.error); // ''
})();

var fs = require('fs'),
http = require('http'),
https = require('https');
var Stream = require('stream').Transform;
var downloadImageToUrl = (url, filename, callback) => {
var client = http;
if (url.toString().indexOf("https") === 0){
client = https;
}
client.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync(filename, data.read());
});
}).end();
};
downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');

if you want progress download try this:
var fs = require('fs');
var request = require('request');
var progress = require('request-progress');
module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
progress(request(uri))
.on('progress', onProgress)
.on('response', onResponse)
.on('error', onError)
.on('end', onEnd)
.pipe(fs.createWriteStream(path))
};
how to use:
var download = require('../lib/download');
download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
console.log("progress", state);
}, function (response) {
console.log("status code", response.statusCode);
}, function (error) {
console.log("error", error);
}, function () {
console.log("done");
});
note: you should install both request & request-progress modules using:
npm install request request-progress --save

This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.
const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
console.log('done');
});

Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
if (err) callback(err, filename);
else {
var stream = request(uri);
stream.pipe(
fs.createWriteStream(filename)
.on('error', function(err){
callback(error, filename);
stream.read();
})
)
.on('close', function() {
callback(null, filename);
});
}
});
};

Related

node.js save http response as pdf file

I have AWS Lambda function that return pdf file like arr.
I want to call function and save pdf file, but after saving I can't open it, it brocken. I cant undestand why, i tried differente ways to create pdf, by the way i can get arr and using online converter convert arr to file, and its work, but when i create pdf file using node code its always broken. I tried internal node moduls like fs, and external like pdfkit.
`const https = require('https');
const fs = require('fs');
const PDFDocument = require('pdfkit');
const options = {
host: 'uek9w0hztc.execute-api.eu-north-1.amazonaws.com',
path: '/pdfmaker',
method: 'POST',
headers: {
url: 'https://www.linkedin.com'
}
}
const req = https.request(options, res => {
let data = [];
const headerDate = res.headers && res.headers.date ? res.headers.date : 'no response date';
console.log('Status Code:', res.statusCode);
console.log('Date in Response header:', headerDate);
res.on('data', chunk => {
data.push(chunk);
});
res.on('end', () => {
console.log('Response ended: ');
// fs.writeFileSync('index.pdf', Buffer.from(data));
// fs.writeFileSync("index_v2.pdf", Buffer.from(data), 'binary', (err) => {
// if(err) {
// return console.log(err);
// }
// console.log("The file was saved!");
// });
// const doc = new PDFDocument();
// doc.pipe(fs.createWriteStream('output.pdf'));
let writeStream = fs.createWriteStream('pdf123.pdf')
writeStream.once('open', (fd) =>{
writeStream.write(new Buffer.from(data, 'binary'))
writeStream.on('finish', () => {
console.log('wrote all data to file');
});
writeStream.end()
})
});
}).on('error', err => {
console.log('Error: ', err.message);
});
req.end();`
I tried internal node moduls like fs, and external like pdfkit.
I expect someone give me a hint where the problem is.

NodeJS - prompt download CSV file

I'm trying to get - after some promises have been executed - a CSV result back together with a status reponse having details.
The response does get me the data for the CSV but I cannot seem to get the browser to download this data into a CSV file.
router.post('/getSalesOrders', function (request, response) {
var data = request.body;
salesOrderActions.retrieveSalesOrders(data) //
.then(function (result) {
response.setHeader('Content-disposition', 'attachment; filename=testing.csv');
response.set('Content-Type', 'text/csv');
response.json(result[0].message).send(result[0].file);
})
.catch(function (err) {
console.log(err);
if (err.statusCode) {
response.json(err);
}
else {
var error = output.getCriticalErrorResult(c.titles.SERVICE_CRITICAL_ERROR, c.messages.UNKNOWN_ERROR, err.message);
response.json(error);
}
});
});
My result object gets created in the salesOrderActions:
I am here using npm package json2csv
var fields = ['id',.....];
var csv = csvParser({ data: unmatchedLines, fields: fields });
return {
file: csv,
message:
output.getSuccessResult(
titles.SALES_ORDER_SUCCESS_RETRIEVAL,
salesDataForModel.identifier
)
}
My response to the browser is as follows:
So my message isn't sent it seems and I do get the CSV data but not as a file to download.
How can I manage that?
As a sidenote maybe, my front-end is React
EDIT
Response with octed headers:
Try:
sending Content-Type before Content-Disposition
quoting the filename: filename="testing.csv"
Also HTTP headers are case insensitive, so it should not make a difference, but you should write Content-Disposition (capital D).
response.set('Content-Type', 'text/csv');
response.setHeader('Content-Disposition', 'attachment; filename="testing.csv"');
If this does not work you can change the Content-Type to application/octet-stream
This always forces the browser to download the data sent from the server.
Try this code:
router.post('/getSalesOrders', function (request, response) {
var data = request.body;
var fs = require('fs');
salesOrderActions.retrieveSalesOrders(data) //
.then(function (result) {
//**********
var file = "testing.csv";
response.setHeader('Content-disposition', 'attachment; filename=testing.csv');
response.set('Content-Type', 'text/csv');
var filestream = fs.createReadStream(file);
filestream.pipe(res);
//*********
})
.catch(function (err) {
console.log(err);
if (err.statusCode) {
response.json(err);
}
else {
var error = output.getCriticalErrorResult(c.titles.SERVICE_CRITICAL_ERROR, c.messages.UNKNOWN_ERROR, err.message);
response.json(error);
}
});
});
So actually it turns out it is because I'm doing an Ajax request which doesn't - by default - prompt the browser to download any files.
What I did in the end:
router.post('/getSalesOrders', function (request, response) {
var data = request.body;
salesOrderActions.retrieveSalesOrders(data)
.then(function (result) {
response.json(result);
})
.catch(function (err) {
//...
});
});
And then in my front-end, when receiving the result:
salesOrderService.retrieveSalesOrderData()
.then(function (result) {
self.convertAndDownloadCsv(result.unmatchedLines);
});
convertAndDownloadCsv: function (data) {
if (data && data.length > 0) {
var csvData = csvProcessor({ //using csv2json node js package
data: data,
quotes: '',
del: ';'
});
var filename = "unmatchedLinesFromSalesOrders.csv";
var blob = new Blob([csvData], { type: 'text/csv;charset=utf-8;' });
if (navigator.msSaveBlob) { // IE 10+
navigator.msSaveBlob(blob, filename);
} else {
var link = document.createElement("a");
if (link.download !== undefined) { // feature detection
// Browsers that support HTML5 download attribute
var url = URL.createObjectURL(blob);
link.setAttribute("href", url);
link.setAttribute("download", filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
}
}
More info can be found here

Download Image with Node-Request with URL scan from CSV

Pardon me as the code is messy. I'm still learning.
I need to download the image with the URL scan from a CSV file. However i have 2000+ of URL with the same domain, and i don't think the server will let me to pull everything in a go hence i always get error after some images. Problem that i need to solve - 1) How to make sure the images are downloaded completely then only the code move on to the next URL 2) How to write a better code
Your help is appreciated. Thank You
var csv = require('fast-csv');
var Promise = require('bluebird');
var fs = require('fs');
var request = require('request');
var path = "test.csv";
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
var records = [];
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
promiseCSV(path).then(function (records) {
for(i=0;i<records.length;i++)
{
download(records[i][0],'img/'+records[i][1], function(){
});
}
});
This will throttle your requests to one at a time. Another option is to use throttled-request to limit by requests per unit time.
var i = 0;
promiseCSV(path).then(function (records) {
next();
function next(){
download(records[i][0],'img/'+records[i][1], function(){
i++;
if (i < records.length) next();
});
}
});
Also, your records variable is out of scope, you need to move it out in order to access it:
var records = []; // move out to global scope to access from elsewhere
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});

Get File from external URL with fs.readFile

I have links on a page that when clicked I want a external docx file to open. Unfortunately fs.readFile only reads local paths.
I tried
app.get('/getfile', function (req, res) {
var externalURL = 'http://www.examplesite.com/example.docx';
// var externalURL = req.query.external;
fs.readFile(externalURL, function(err, data) {
var fileData = new Buffer(data).toString('base64');
res.send(fileData);
});
});
Try this:
const http = require("http");
const file = fs.createWriteStream("file.docx");
http.get("http://www.example.com/test.docx", response => {
response.pipe(file);
});
Try node-fetch.
It follows regular client syntax for the fetch command (MDN).
you can use get from http package provided by nodejs library.
in my case i created a function that return a promise that is fulfilled when the file is completely fetched:
getFileFromURL(pathFile: string): Promise<Buffer> {
return new Promise(function(resolve, reject) {//create a promise
http.get(pathFile.replace('https', 'http'), function(res) {
let bufferImage = Buffer.from(''); // create an empty buffer
res.on('data', function(chunk) { // listen to 'data' event and concatenate each chunk when it is received
bufferImage = Buffer.concat([bufferImage, chunk]);
});
res.on('end', function() {
resolve(bufferImage); // fulfil promise
});
res.on('error', function(err) {
reject(err); // reject promise
})
})
})
}
and finally you can use the function like this:
async function() {
const fileBuffer = await this.getFileFromURL('your external url');
// here you can you do what you want with your **filerBuffer**
// you can for example convert it to string like this:
const fileString = fileBuffer.toString('utf-8');
// or may be send it in the response like this;
yourResponse.end(fileBuffer);
// ....
}
I'd recommend to use request to do it.
I don't rly know you mean this...
TRY:
const request = require('request');
request.get('http://examplesite.com/example.docx', function(err, res, body) {
// Body is the example.docx data.
})

fs.createWriteStream does not immediately create file?

I have made a simple download from http function as below (error handling is omitted for simplifcation):
function download(url, tempFilepath, filepath, callback) {
var tempFile = fs.createWriteStream(tempFilepath);
http.request(url, function(res) {
res.on('data', function(chunk) {
tempFile.write(chunk);
}).on('end', function() {
tempFile.end();
fs.renameSync(tempFile.path, filepath);
return callback(filepath);
})
});
}
However, as I call download() tens of times asynchronously, it seldom reports error on fs.renameSync complaining it cannot find file at tempFile.path.
Error: ENOENT, no such file or directory 'xxx'
I used the same list of urls to test it, and it failed about 30% of time. The same list of urls worked when downloaded one by one.
Testing some more, I found out that the following code
fs.createWriteStream('anypath');
console.log(fs.exist('anypath'));
console.log(fs.exist('anypath'));
console.log(fs.exist('anypath'));
does not always print true, but sometimes the first answer prints false.
I am suspecting that too many asynchronous fs.createWriteStream calls cannot guarantee the file creation. Is this true? Are there any methods to guarantee file creation?
You shouldn't call write on your tempFile write stream until you've received the 'open' event from the stream. The file won't exist until you see that event.
For your function:
function download(url, tempFilepath, filepath, callback) {
var tempFile = fs.createWriteStream(tempFilepath);
tempFile.on('open', function(fd) {
http.request(url, function(res) {
res.on('data', function(chunk) {
tempFile.write(chunk);
}).on('end', function() {
tempFile.end();
fs.renameSync(tempFile.path, filepath);
return callback(filepath);
});
});
});
}
For your test:
var ws = fs.createWriteStream('anypath');
ws.on('open', function(fd) {
console.log(fs.existsSync('anypath'));
console.log(fs.existsSync('anypath'));
console.log(fs.existsSync('anypath'));
});
The accepted answer didn't download some of the last bytes for me.
Here's a Q version that works correctly (but without the temp file).
'use strict';
var fs = require('fs'),
http = require('http'),
path = require('path'),
Q = require('q');
function download(url, filepath) {
var fileStream = fs.createWriteStream(filepath),
deferred = Q.defer();
fileStream.on('open', function () {
http.get(url, function (res) {
res.on('error', function (err) {
deferred.reject(err);
});
res.pipe(fileStream);
});
}).on('error', function (err) {
deferred.reject(err);
}).on('finish', function () {
deferred.resolve(filepath);
});
return deferred.promise;
}
module.exports = {
'download': download
};
Note I'm listening to finish on file stream instead of end on response.
Here is what I use to get it done:
function download(url, dest) {
return new Promise((resolve, reject) => {
http.get(url, (res) => {
if (res.statusCode !== 200) {
var err = new Error('File couldn\'t be retrieved');
err.status = res.statusCode;
return reject(err);
}
var chunks = [];
res.setEncoding('binary');
res.on('data', (chunk) => {
chunks += chunk;
}).on('end', () => {
var stream = fs.createWriteStream(dest);
stream.write(chunks, 'binary');
stream.on('finish', () => {
resolve('File Saved !');
});
res.pipe(stream);
})
}).on('error', (e) => {
console.log("Error: " + e);
reject(e.message);
});
})
};
I am working on uploading and downloading file (docx, pdf, text, etc) through nodejs request-promise and request libraries.
Problem with request-promise is that they don't promisify pipe method from request package. Hence, we need to do it in the old way.
I was able to come up with the hybrid solution, where I was able to use async/await and Promise() at same time. Here is the example:
/**
* Downloads the file.
* #param {string} fileId : File id to be downloaded.
* #param {string} downloadFileName : File name to be downloaded.
* #param {string} downloadLocation : File location where it will be downloaded.
* #param {number} version : [Optional] version of the file to be downloaded.
* #returns {string}: Downloaded file's absolute path.
*/
const getFile = async (fileId, downloadFileName, downloadLocation, version = undefined) => {
try {
const url = version ? `http://localhost:3000/files/${fileId}?version=${version}` :
`${config.dms.url}/files/${fileUuid}`;
const fileOutputPath = path.join(downloadLocation, fileName);
const options = {
method: 'GET',
url: url,
headers: {
'content-type': 'application/json',
},
resolveWithFullResponse: true
}
// Download the file and return the full downloaded file path.
const downloadedFilePath = writeTheFileIntoDirectory(options, fileOutputPath);
return downloadedFilePath;
} catch (error) {
console.log(error);
}
};
As you can see in above getFile method, we are using latest ES supported async/await functionality for asynchronous programming. Now, lets look into writeTheFileIntoDirectory method.
/**
* Makes REST API request and writes the file to the location provided.
* #param {object} options : Request option to make REST API request.
* #param {string} fileOutputPath : Downloaded file's absolute path.
*/
const writeTheFileIntoDirectory = (options, fileOutputPath) => {
return new Promise((resolve, reject) => {
// Get file downloaded.
const stream = fs.createWriteStream(fileOutputPath);
return request
.get(options.url, options, (err, res, body) => {
if (res.statusCode < 200 || res.statusCode >= 400) {
const bodyObj = JSON.parse(body);
const error = bodyObj.error;
error.statusCode = res.statusCode;
return reject(error);
}
})
.on('error', error => reject(error))
.pipe(stream)
.on('close', () => resolve(fileOutputPath));
});
}
The beauty of nodejs is that it support backward compatibility of different asynchronous implementation. If a method is returning promise, then await will be kicked in and will wait for the method to be completed.
Above writeTheFileIntoDirectory method will download the file and will return positively when the stream is closed successfully, else it will return error.

Resources