Downloads are not complete while creating write stream - node.js

I have function with a callback to make sure the image download is finished.
If I have multiple images to be downloaded(around 11), It downloads all but the file size is not correct. Therefore some are corrupted.
I thought using the callback will make sure the downloads are done completely before moving on.
This is the code below I am using:
imageExtractor(imageId,function(){
zipmaker(imageId, function () {
});
});
imageIndex =0;
function imageExtractor(imageId, callback) {
images.ImageIds.foreach(function (image){
imageIndex++;
// Here I call the download image function
downloadImageFromURLAndSave(imageURL, imageId, category,callback) {
if(images.length=seriesIndex){
callback();
}
}
}
function downloadImageFromURLAndSave(imageURL, imageId, category,callback) {
console.log("Download has started.");
console.log(imageURL);
request
.get(imageURL)
.pipe(fs.createWriteStream(__dirname + category+
'/digital/' + imageId)
.on('finish', function() {
console.log("Download has finished for " + imageId+ " Congratulations.");
callback();
}));
}
It seems the function after this(zipmaker) is called early before the images are downloaded even though I have a callback as you can see.

If I where you, here is how I would structure your code:
index.js:
var fs = require('fs'),
request = require('request');
var config = require('./test.json');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
for (var key in config) {
if (config.hasOwnProperty(key)) {
download(key, config[key], function(){
console.log('done');
});
}
}
test.json:
{
"https://www.google.com/images/srpr/logo3w.png": "google.png",
"http://www.jqueryscript.net/images/Simplest-Responsive-jQuery-Image-Lightbox-Plugin-simple-lightbox.jpg": "panda.jpg"
}
Hope this helps!

Related

How to force all stream done before we continue with other task?

I'm using node.js code to create a function to download an image from A repository and then upload to B repository. I want to force all streams to complete before it continues with other tasks. I have tried this way, but I have not been successful.
Example: When I run it, it will run into getImage. When getImage is not completed, it will loop through A->B->C until they are complete and then it completes getImage. How can I force all streams to complete before it continues with other tasks? I mean I want getImage to be finished before running A->B->C.
PS: I am using pkgCloud to upload the image to IBM Object Storage.
function parseImage(imgUrl){
var loopCondition = true;
while(loopCondition ){
getImages(imgUrl,imgName);
Do task A
Do task B
Do task C
}
}
function getImages(imgUrl, imgName) {
//Download image from A repository
const https = require('https');
var imgSrc;
var downloadStream = https.get(imgUrl, function (response) {
// Upload image to B repository.
var uploadStream = storageClient.upload({container: 'images', remote: imgName});
uploadStream.on('error', function (error) {
console.log(error);
});
uploadStream.on('success', function (file) {
console.log("upload Stream>>>>>>>>>>>>>>>>>Done");
console.log(file.toJSON());
imgSrc = "https://...";
});
response.pipe(uploadStream);
});
downloadStream.on('error', function (error) {
console.log(error);
});
downloadStream.on('finish', function () {
console.log("download Stream>>>>>>>>>>>>>>>>>Done");
});
return imgSrc;
}
You should understand the difference between sync and async function. The getImages function is executing async code and therefore if you want to use the results of this function you have to pass a callback that will be called when the streaming will finish. Something like that:
function parseImage(imgUrl) {
getImages(imgUrl, imgName, function (err, imgSrc) {
if (imgSrc) {
Do task A
} else {
Do task B
}
});
}
function getImages(imgUrl, imgName, callback) {
//Download image from A repository
const https = require('https');
var imgSrc;
var downloadStream = https.get(imgUrl, function (response) {
// Upload image to B repository.
var uploadStream = storageClient.upload({ container: 'images', remote: imgName });
uploadStream.on('error', function (error) {
console.log(error);
return callback(error);
});
uploadStream.on('success', function (file) {
console.log("upload Stream>>>>>>>>>>>>>>>>>Done");
console.log(file.toJSON());
imgSrc = "https://...";
return callback(null, imgSrc);
});
response.pipe(uploadStream);
});
downloadStream.on('error', function (error) {
console.log(error);
return callback(error);
});
downloadStream.on('finish', function () {
console.log("download Stream>>>>>>>>>>>>>>>>>Done");
});
}

S3 upload sending events is working locally with deferred.notify() but not on server

I am sending an object to s3 to upload and locally I get back an 'httpUploadProgress' event. This notifies a .progress call on the function and then updates a record with its progress. It works fine locally but not on the server. Any ideas would be appreciated. I've included two snippets of code. I'm using google servers, not sure if it's an issue with the code or the server settings :/
s3.uploadVideo = function (filepath, videoName, publisher_id) {
var deferred = q.defer();
var body = fs.createReadStream(filepath);
var s3obj = new AWS.S3({
params: {
Bucket: transcodeConfig.NonTranscodedVideoBucket,
Key: publisher_id + '/' + videoName
}
});
s3obj.upload({Body: body}).
on('httpUploadProgress', function (evt) {
deferred.notify(evt);
return
}).
send(function (err, data) {
if (!err) {
deferred.resolve(data);
return
} else {
deferred.reject(err);
return
}
});
//deferred.resolve({})
return deferred.promise;
}
aws_api.s3.uploadVideo(file.path, fileName, publisher_id ).progress(function(progress){
return models.videos.findOneAndUpdate({_id : trackingId},{uploadProgress: progress, file_type:fileExtension},function(err,data){
if(err){
return next(err);
}else{
return data;
}
})
})

Node async.series trouble

While building a fairly complex scraper i stumbled upon a problem with a control flow of my code.
What's going on in code below:
1) request a URL
2) scrape NEWURL from the results
3) pass it to readability API as first async function
4) here comes the trouble — i never get the next async function which saves readabilityData to DB
How to solve this problem?
I'm new to JS, so please feel free to point out at any issues with my code.
request(URL, function(error, response, html) {
if (!error) {
var $ = cheerio.load(html);
NEWURL = data.find('a').attr('href');
readabilityData = {}
var articleUrl = 'https://readability.com/api/content/v1/parser?url=' + NEWURL + token;
async.series([
function(){
request(articleUrl, function(error, response, html) {
if (!error) {
readabilityData = response.toJSON();
}
});
},
function(readabilityData){
Article.findOne({
"link": url // here's the
}, function(err, link){
if(link) {
console.log(link)
} else {
var newArticle = new Article({
// write stuff to DB
});
newArticle.save(function (err, data) {
// save it
});
}
});
}
],
function(err){
console.log('all good — data written')
});
});
}
});
You need to call the callback parameter that's passed into the functions of the async.series call when each function's work is complete. That's how async.series knows that it can proceed to the next function. And don't redefine readabilityData as a function parameter when you're trying to use it to share data across the functions.
So something like:
var readabilityData = {};
async.series([
function(callback){
request(articleUrl, function(error, response, html) {
if (!error) {
readabilityData = response.toJSON();
}
callback(error);
});
},
function(callback){
Article.findOne({
"link": url // here's the
}, function(err, link){
if(link) {
console.log(link);
callback();
} else {
var newArticle = new Article({
// write stuff to DB
});
newArticle.save(function (err, data) {
// save it
callback(err);
});
}
});
}
],
function(err){
console.log('all good — data written')
});

Async parallel in NodeJS

I'd like to know how to execute async functions, my goal is to uploads files and compress them... But it doesn't work because my files are not yet uploading when I write the .zip...
SO I get an empty .zip file...
var asyncTasks = [];
selectedPhotos.forEach(function(id){
asyncTasks.push(function(callback){
var newFileName = pathDir + '/' + id + '.jpg';
api.media(id, function(err, media, remaining, limit) {
gm()
.in('-page', '+0+0')
.in('./public/images/instabox.jpg')
.in('-page', '+10+10')
.in(media.images.thumbnail.url)
.mosaic()
.minify()
.write(newFileName, function (err) {
if (!err) console.log('done');
if (err) console.log(err);
});
});
callback();
});
});
async.parallel(asyncTasks, function(){
var admZip = new AdmZip();
var pathDir = './public/uploads/'+reference;
admZip.addLocalFolder(pathDir);
var willSendthis = admZip.toBuffer();
admZip.writeZip('./public/uploads/'+reference+'.zip');
});
You're calling the callback() too early. Move callback(); inside of your .write() callback like so:
.write(newFileName, function (err) {
if (!err) console.log('done');
if (err) console.log(err);
callback(err);
});
When you execute the callback, that signifies that the task is finished. So without the above change, you're basically telling async that you're done immediately.

Move File in ExpressJS/NodeJS

I'm trying to move uploaded file from /tmp to home directory using NodeJS/ExpressJS:
fs.rename('/tmp/xxxxx', '/home/user/xxxxx', function(err){
if (err) res.json(err);
console.log('done renaming');
});
But it didn't work and no error encountered. But when new path is also in /tmp, that will work.
Im using Ubuntu, home is in different partition. Any fix?
Thanks
Yes, fs.rename does not move file between two different disks/partitions. This is the correct behaviour. fs.rename provides identical functionality to rename(2) in linux.
Read the related issue posted here.
To get what you want, you would have to do something like this:
var source = fs.createReadStream('/path/to/source');
var dest = fs.createWriteStream('/path/to/dest');
source.pipe(dest);
source.on('end', function() { /* copied */ });
source.on('error', function(err) { /* error */ });
Another way is to use fs.writeFile. fs.unlink in callback will remove the temp file from tmp directory.
var oldPath = req.files.file.path;
var newPath = ...;
fs.readFile(oldPath , function(err, data) {
fs.writeFile(newPath, data, function(err) {
fs.unlink(oldPath, function(){
if(err) throw err;
res.send("File uploaded to: " + newPath);
});
});
});
Updated ES6 solution ready to use with promises and async/await:
function moveFile(from, to) {
const source = fs.createReadStream(from);
const dest = fs.createWriteStream(to);
return new Promise((resolve, reject) => {
source.on('end', resolve);
source.on('error', reject);
source.pipe(dest);
});
}
This example taken from: Node.js in Action
A move() function that renames, if possible, or falls back to copying
var fs = require('fs');
module.exports = function move (oldPath, newPath, callback) {
fs.rename(oldPath, newPath, function (err) {
if (err) {
if (err.code === 'EXDEV') {
copy();
} else {
callback(err);
}
return;
}
callback();
});
function copy () {
var readStream = fs.createReadStream(oldPath);
var writeStream = fs.createWriteStream(newPath);
readStream.on('error', callback);
writeStream.on('error', callback);
readStream.on('close', function () {
fs.unlink(oldPath, callback);
});
readStream.pipe(writeStream);
}
}

Resources