Download Image with Node-Request with URL scan from CSV - node.js

Pardon me as the code is messy. I'm still learning.
I need to download the image with the URL scan from a CSV file. However i have 2000+ of URL with the same domain, and i don't think the server will let me to pull everything in a go hence i always get error after some images. Problem that i need to solve - 1) How to make sure the images are downloaded completely then only the code move on to the next URL 2) How to write a better code
Your help is appreciated. Thank You
var csv = require('fast-csv');
var Promise = require('bluebird');
var fs = require('fs');
var request = require('request');
var path = "test.csv";
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
var records = [];
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
promiseCSV(path).then(function (records) {
for(i=0;i<records.length;i++)
{
download(records[i][0],'img/'+records[i][1], function(){
});
}
});

This will throttle your requests to one at a time. Another option is to use throttled-request to limit by requests per unit time.
var i = 0;
promiseCSV(path).then(function (records) {
next();
function next(){
download(records[i][0],'img/'+records[i][1], function(){
i++;
if (i < records.length) next();
});
}
});
Also, your records variable is out of scope, you need to move it out in order to access it:
var records = []; // move out to global scope to access from elsewhere
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});

Related

Calling an API in parallel using Node

I want to stress test my API and call it multiple times in parallel to get an idea of performance.
I am almost there with the code, and as someone new to JS, I am hoping to fix my code as follows:
Call each request in Parallel
Log out when each individual request is finished
Log out the time taken for each individual request
At the minute, the function waits until all calls have finished before it logs out the data. I assume this is because I am using Promise.all()?
import fetch from 'node-fetch';
var requestAsync = function(url) {
return new Promise((resolve, reject) => {
var req = request(url, (err, response, body) => {
if (err) return reject(err, response, body);
resolve(JSON.parse(body));
});
});
};
const urls = ["url1", "url2", "url3", "url4"];
var getParallelData = async function() {
//transform requests into Promises, await all
var startTime = performance.now()
try {
var data = await Promise.all(urls.map(requestAsync));
} catch (err) {
console.error("ERROR: " + err);
}
var endTime = performance.now()
console.log(data);
console.log(`Call took ${(endTime - startTime) / 1000} seconds`)
}
getParallelData();
Edit: I have been continually trying to evolve this to suit my needs and this is what I have now. I think its doing what I expect but as long as its not waiting for the first request to finish before starting the next.
import request from 'request'
var requestAsync = function(url) {
return new Promise((resolve, reject) => {
var req = request.get({
url : url,
time : true
},function(err, response){
console.log('Request time in seconds', (response.elapsedTime / 1000));
});
});
};
const urls = ["url1", "url2", "url3", "url4"];
var getParallelData = async function() {
for(const url of dev_urls) {
requestAsync(url)
}
}
getParallelData();

promise gives undefined result in callback

fileUpload.js
var Uploader = require('s3-image-uploader');
var express = require('express');
var config = require('./../config.js');
var Promise = require('promise');
var uploader = new Uploader({
aws: {
key: config.awsKey,
secret: config.awsSecret
},
websockets: false
});
exports.fileUpload = function(fileName) {
new Promise(function(resolve, reject) {
uploader.upload({
fileId: 'someUniqueIdentifier',
bucket: 'quflip',
source: './public/images/' + fileName,
name: fileName
},
function(data) { // success
console.log('upload success:', resolve);
resolve(data);
},
function(errMsg, errObject) { //error
reject(false);
});
});
};
logintest.js
var awsUpload = require('./../config/fileUpload.js');
var userData = function(req, res) {
console.log("true/false" + awsUpload.fileUpload(profile_image));
Here what is happening is, I am getting undefined value in console.log. So it means that callback is not passing the data. How can I pass the data effectively for further purpose
You need to chain to the promise with .then and do the log there. First, make sure you return the promise
return new Promise(function(resolve, reject) {
Then, you can do:
var userData = function (req, res) {
awsUpload.fileUpload(profile_image).then(data => {
console.log("true/false" + data);
});
}
If you are working in an environment that supports async/await, you can do this a bit more simply:
var userData = async function (req, res) {
console.log("true/false" + await awsUpload.fileUpload(profile_image));
};
Keep in mind that this will return a Promise.
I don't see where profile_image gets set, though.
If you want your function to actually return the promise then change this:
exports.fileUpload = function (fileName) {
new Promise(function(resolve, reject) {
// ...
});
};
to this:
exports.fileUpload = function (fileName) {
return new Promise(function(resolve, reject) {
// ...
});
};
Without the return statement the promise that you're creating is not going to get returned by your function.
But even then you won't be able to use it like this:
console.log("true/false" + awsUpload.fileUpload(profile_image));
You will have to use something like this:
awsUpload.fileUpload(profile_image).then(result => {
console.log("true/false" + result);
});
Or this, if you're using async/await and you're inside async function:
console.log("true/false" + await awsUpload.fileUpload(profile_image));

Get File from external URL with fs.readFile

I have links on a page that when clicked I want a external docx file to open. Unfortunately fs.readFile only reads local paths.
I tried
app.get('/getfile', function (req, res) {
var externalURL = 'http://www.examplesite.com/example.docx';
// var externalURL = req.query.external;
fs.readFile(externalURL, function(err, data) {
var fileData = new Buffer(data).toString('base64');
res.send(fileData);
});
});
Try this:
const http = require("http");
const file = fs.createWriteStream("file.docx");
http.get("http://www.example.com/test.docx", response => {
response.pipe(file);
});
Try node-fetch.
It follows regular client syntax for the fetch command (MDN).
you can use get from http package provided by nodejs library.
in my case i created a function that return a promise that is fulfilled when the file is completely fetched:
getFileFromURL(pathFile: string): Promise<Buffer> {
return new Promise(function(resolve, reject) {//create a promise
http.get(pathFile.replace('https', 'http'), function(res) {
let bufferImage = Buffer.from(''); // create an empty buffer
res.on('data', function(chunk) { // listen to 'data' event and concatenate each chunk when it is received
bufferImage = Buffer.concat([bufferImage, chunk]);
});
res.on('end', function() {
resolve(bufferImage); // fulfil promise
});
res.on('error', function(err) {
reject(err); // reject promise
})
})
})
}
and finally you can use the function like this:
async function() {
const fileBuffer = await this.getFileFromURL('your external url');
// here you can you do what you want with your **filerBuffer**
// you can for example convert it to string like this:
const fileString = fileBuffer.toString('utf-8');
// or may be send it in the response like this;
yourResponse.end(fileBuffer);
// ....
}
I'd recommend to use request to do it.
I don't rly know you mean this...
TRY:
const request = require('request');
request.get('http://examplesite.com/example.docx', function(err, res, body) {
// Body is the example.docx data.
})

process out of memory on big file load to mongo in node

I'm trying to save and parse large .csv files and save the data in MongoDB, keeping the results in string type. So I'm trying to pipe the .csv file data, through a parser and then write the data to MongoDB.
I tried parsing the .csv to a json file and using mongoimport to upload it to MongoDB, which worked fine, but the values weren't kept as strings and you cant set values when using mongoimport.
I also don't want to set the memory for node and try and use as little memory as possible.
My problem at the moment is: the program runs out of memory and throws:
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
var fs = require('fs');
var parse = require('csv-parse');
var async = require('async');
var queue, stream;
var headers = fileData.subText.meta.fields;
MongoClient.connect(url, function (err, db) {
if (err) throw err;
var collection = db.collection(fileData.collectionName);
var parser = parse({columns: fileData.subText.meta.fields, delimiter: fileData.delimiter});
stream = fs.createReadStream("filepath" + fileData.name).pipe(parser);
var data;
queue = async.queue(function (task, next) {
data = task.data;
collection.insert(data, function (err, result) {
if (err) {
db.close();
console.log(err);
} else {
next();
}
});
}, 50);
stream.on('data', function (data) {
stream.pause();
queue.push({
data: data
});
});
queue.drain = function () {
stream.resume();
};
stream.on('end', function () {
return queue.drain = function () {
db.close();
return console.log('Process Done');
};
});
});
I got the idea from this link: https://bassnutz.wordpress.com/2012/09/09/processing-large-files-with-nodejs/
Any help would be appreciated.

Downloading images with node.js [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
I'm trying to write a script to download images using node.js. This is what I have so far:
var maxLength = 10 // 10mb
var download = function(uri, callback) {
http.request(uri)
.on('response', function(res) {
if (res.headers['content-length'] > maxLength*1024*1024) {
callback(new Error('Image too large.'))
} else if (!~[200, 304].indexOf(res.statusCode)) {
callback(new Error('Received an invalid status code.'))
} else if (!res.headers['content-type'].match(/image/)) {
callback(new Error('Not an image.'))
} else {
var body = ''
res.setEncoding('binary')
res
.on('error', function(err) {
callback(err)
})
.on('data', function(chunk) {
body += chunk
})
.on('end', function() {
// What about Windows?!
var path = '/tmp/' + Math.random().toString().split('.').pop()
fs.writeFile(path, body, 'binary', function(err) {
callback(err, path)
})
})
}
})
.on('error', function(err) {
callback(err)
})
.end();
}
I, however, want to make this more robust:
Are there libraries that do this and do this better?
Is there a chance that response headers lie (about length, about content type)?
Are there any other status codes I should care about? Should I bother with redirects?
I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
How can I get this to work on windows?
Any other ways you can make this script better?
Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.
I'd suggest using the request module. Downloading a file is as simple as the following code:
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
console.log('done');
});
I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.
var http = require('http'),
Stream = require('stream').Transform,
fs = require('fs');
var url = 'http://www.google.com/images/srpr/logo11w.png';
http.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync('image.png', data.read());
});
}).end();
The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.
*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.
You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:
npm i axios
Then, you can use the following basic example to begin downloading images:
const fs = require('fs');
const axios = require('axios');
/* ============================================================
Function: Download Image
============================================================ */
const download_image = (url, image_path) =>
axios({
url,
responseType: 'stream',
}).then(
response =>
new Promise((resolve, reject) => {
response.data
.pipe(fs.createWriteStream(image_path))
.on('finish', () => resolve())
.on('error', e => reject(e));
}),
);
/* ============================================================
Download Images in Order
============================================================ */
(async () => {
let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');
console.log(example_image_1.status); // true
console.log(example_image_1.error); // ''
let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');
console.log(example_image_2.status); // false
console.log(example_image_2.error); // 'Error: Request failed with status code 404'
let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');
console.log(example_image_3.status); // true
console.log(example_image_3.error); // ''
})();
var fs = require('fs'),
http = require('http'),
https = require('https');
var Stream = require('stream').Transform;
var downloadImageToUrl = (url, filename, callback) => {
var client = http;
if (url.toString().indexOf("https") === 0){
client = https;
}
client.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync(filename, data.read());
});
}).end();
};
downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');
if you want progress download try this:
var fs = require('fs');
var request = require('request');
var progress = require('request-progress');
module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
progress(request(uri))
.on('progress', onProgress)
.on('response', onResponse)
.on('error', onError)
.on('end', onEnd)
.pipe(fs.createWriteStream(path))
};
how to use:
var download = require('../lib/download');
download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
console.log("progress", state);
}, function (response) {
console.log("status code", response.statusCode);
}, function (error) {
console.log("error", error);
}, function () {
console.log("done");
});
note: you should install both request & request-progress modules using:
npm install request request-progress --save
This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.
const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
console.log('done');
});
Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
if (err) callback(err, filename);
else {
var stream = request(uri);
stream.pipe(
fs.createWriteStream(filename)
.on('error', function(err){
callback(error, filename);
stream.read();
})
)
.on('close', function() {
callback(null, filename);
});
}
});
};

Resources