I am trying to write some items I pushed into an array into a JSON file in node.js but I can't figure out how to wait for the array to contain the items before writing the JSON file. As a result the file is always empty. Do i need to have a callback? If so, how? NB:I'm still new to node.js
This is the code below:
var getLinks = require('./news_archive/news_links.js');
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var saveNews = './news_archive/news.json';
var jsonObj = [];
var i;
var number_of_links = getLinks.links.length;
for(i=0; i<number_of_links; i++){
//GET ARTICLE LINK FROM link.js
var url = "http://www.times.co.sz/"+getLinks.links[i];
request(url, function(err, resp, body){
var $ = cheerio.load(body);
//GET ARTICLE HEADLINE
var storyHeadline = $('#article_holder h1');
var storyHeadlineText = storyHeadline.text();
//GET DATE POSTED
var datePosted = $('.metadata_time');
var datePostedText = datePosted.text();
//GET ARTICLE REPORTER'S NAME
var reporterName = $('.article_metadata a');
var reporterNameText = reporterName.text();
//GET ARTICLE SUMMARY
var fullStory = $('#article_body span');
var fullStoryText = fullStory.text();
//PUSH ITEMS TO jsonObj ARRAY
jsonObj.push({
id: i,
storyHeadline: storyHeadlineText,
datePosted: datePostedText,
reporterName: reporterNameText,
fullStory: fullStoryText
})
});
} //END for LOOP
//WRITE TO news.json file
fs.writeFile(saveNews, JSON.stringify(jsonObj, null, 4), function(err) {
if(err) {
console.log(err);
} else {
console.log("JSON saved to " + saveNews);
}
});
The issue is that request is asyncronous and you cannot use syncronous loop to iterate through. You can use async lib for that
var getLinks = require('./news_archive/news_links.js');
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var saveNews = './news_archive/news.json';
var number_of_links = getLinks.links.length;
var async = require('async');
async.times(number_of_links, function (i, next) {
var url = "http://www.times.co.sz/"+getLinks.links[i];
request(url, function(err, resp, body){
var $ = cheerio.load(body);
//GET ARTICLE HEADLINE
var storyHeadline = $('#article_holder h1');
var storyHeadlineText = storyHeadline.text();
//GET DATE POSTED
var datePosted = $('.metadata_time');
var datePostedText = datePosted.text();
//GET ARTICLE REPORTER'S NAME
var reporterName = $('.article_metadata a');
var reporterNameText = reporterName.text();
//GET ARTICLE SUMMARY
var fullStory = $('#article_body span');
var fullStoryText = fullStory.text();
//PUSH ITEMS TO jsonObj ARRAY
next(err, {
id: i,
storyHeadline: storyHeadlineText,
datePosted: datePostedText,
reporterName: reporterNameText,
fullStory: fullStoryText
});
});
}, function (err, res) {
// do not forget to handle error
fs.writeFile(saveNews, JSON.stringify(res, null, 4), function(err) {
if(err) {
console.log(err);
} else {
console.log("JSON saved to " + saveNews);
}
});
})
Related
I have an API at https://dev-qaboomapi.azurewebsites.net/randmovie. I have a txt file that lists all movie IDs from https://themoviedb.org and one is selected at random and the information is presented at the API address above. What I can't work out is how to get it to check if the result is an english language movie and then try again if it isn't.
I have an If Else statement that checks the result is an english language movie but I can't work out if it isn't how to start the process again.
var appRouter = function (app) {
app.get("/randMovie", function (req, res) {
const fetchURL = require('fetch').fetchUrl;
const fs = require('fs');
const data = fs.readFileSync('./db/MovieIDs.txt') + '';
const splitData = data.split('\n');
const randomNumber = Math.floor(Math.random() * splitData.length);
const line = splitData.splice(randomNumber, 1);
const lineDATA = line.toString()
const api = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
var movieID = lineDATA
var URL = 'https://api.themoviedb.org/3/movie/' + movieID + '?api_key=' + api + "&append_to_response=videos"
fetchURL(URL, function(error, meta, body){
var movieString = body.toString()
var movieJSON = JSON.parse(movieString)
if(movieJSON.original_language == "en") {
res.status(200).send(movieJSON);
} else {
// Not sure what to put here
}
})
});
}
module.exports = appRouter;
I would like it to only return the details of the movie when it is:
if(movieJSON.original_language == "en")
and retry when it isn't.
Here is what you can try, but make sure that function does not run infinity if result is not in English.
var appRouter = function (app) {
app.get("/randMovie", function (req, res) {
retrieveData(req, res);
}
Definition of retrieveData
retrieveData(req, res){
const fetchURL = require('fetch').fetchUrl;
const fs = require('fs');
const data = fs.readFileSync('./db/MovieIDs.txt') + '';
const splitData = data.split('\n');
const randomNumber = Math.floor(Math.random() * splitData.length);
const line = splitData.splice(randomNumber, 1);
const lineDATA = line.toString()
const api = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
var movieID = lineDATA
var URL = 'https://api.themoviedb.org/3/movie/' + movieID + '?api_key=' + api + "&append_to_response=videos"
fetchURL(URL, function(error, meta, body){
var movieString = body.toString()
var movieJSON = JSON.parse(movieString)
if(movieJSON.original_language == "en") {
res.status(200).send(movieJSON);
} else {
// Not sure what to put here
retrieveData(req, res);
}
})
}
I'm trying to make a webscraper, but I can't get my function to wait for the second request to fill the name key on my object. It always return undefined.
const request = require('request');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
function getItems(category) {
var items = [];
return new Promise(function(resolve, reject) {
request.get(shop_url + category, function(err, res, body) {
if(err) {
reject(err);
} else {
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
request.get(url, function(err, res, html) {
var $ = cheerio.load(html);
name = $('h1', 'div').text();
})
items.push({name: name, url: url, isSoldout: isSoldout});
})
resolve(items);
}
})
})
}
I expect the name key to be fill but no, i get undefined
Use the request-promise package which wraps request in Promise. Then you can use async/await to wait for result like:
const rp = require('request-promise');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
// notice async keyword
async function getItems(category) {
var items = [];
try {
// using await to wait for promise to resolve
const body = await rp.get(shop_url + category)
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
try {
const html = await rp.get(url)
var $ = cheerio.load(html);
name = $('h1', 'div').text();
items.push({name: name, url: url, isSoldout: isSoldout});
} catch (err) {
throw err;
}
})
} catch (e) {
throw e;
}
return items;
}
More about async/await at MDN
I am unable to get proper output when trying to read data from redis and then send it out to user in json format.
This is my code
//API to get slot for a particular date range
app.get('/listcapacity/:ticketid/:fromdate/:todate', function(req, res) {
var id = req.params.ticketid;
var fromdate = req.params.fromdate;
var todate = req.params.todate;
var result = {};
var data_output = [];
var asyncTasks = [];
var currentDate = new Date(fromdate);
var between = [];
var end = new Date(todate);
while (currentDate <= end) {
var tempdate = new Date(currentDate).toISOString();
var dump = tempdate.toString().split("T");
between.push(dump[0]);
currentDate.setDate(currentDate.getDate() + 1);
}
between.forEach(function(entry) {
asyncTasks.push(function(callback) {
client.exists(id+entry, function (err, reply) {
if (reply === 1) {
console.log("Found");
client.get(id+entry, function (err, reply) {
var output = JSON.parse(reply);
data_output = data_output.concat(output);
});
}
callback();
});
});
});
async.parallel(asyncTasks, function(){
// All tasks are done now
result['data'] = data_output;
result['response'] = 1;
result['message'] = 'vacancies list fetched successfully!';
res.json(result);
});
});
I am always getting the output {"data":[],"response":1,"message":"vacancies list fetched successfully!"}
Even though I have checked that the keys are there in redis and it is always being Found by the application too.
For some reason the redis part is getting executed after the data has been sent to user. Why is this happening? I know redis works in async hence used the async.parallel function to take care of this but even then I endup with the same issue that I would have if I wouldnt have used async. What is the reason for this?
The client.get is an async operation and you have to callback in there. Also, you need an else statement and you need a callback in there as well:
//API to get slot for a particular date range
app.get('/listcapacity/:ticketid/:fromdate/:todate', function (req, res) {
var id = req.params.ticketid;
var fromdate = req.params.fromdate;
var todate = req.params.todate;
var result = {};
var data_output = [];
var asyncTasks = [];
var currentDate = new Date(fromdate);
var between = [];
var end = new Date(todate);
while (currentDate <= end) {
var tempdate = new Date(currentDate).toISOString();
var dump = tempdate.toString().split("T");
between.push(dump[0]);
currentDate.setDate(currentDate.getDate() + 1);
}
between.forEach(function (entry) {
asyncTasks.push(function (callback) {
client.exists(id + entry, function (err, reply) {
if (reply === 1) {
console.log("Found");
client.get(id + entry, function (err, reply) {
var output = JSON.parse(reply);
data_output = data_output.concat(output);
return callback();
});
} else {
return callback();
}
});
});
});
async.parallel(asyncTasks, function () {
// All tasks are done now
result['data'] = data_output;
result['response'] = 1;
result['message'] = 'vacancies list fetched successfully!';
res.json(result);
});
});
I am new to Node.js world, kind of stuck in situation.
below code is for reference:
var http = require('http');
var step = require('step');
var request = require('request');
exports.readimage2 = function(req, res){
//res.send(200,'OK');
//var image_url = 'http://www.letsgodigital.org/images/artikelen/39/k20d-image.jpg'; //--- 10mb
//var image_url = 'http://upload.wikimedia.org/wikipedia/commons/2/2d/Snake_River_(5mb).jpg';
//var image_url = 'http://www.sandia.gov/images2005/f4_image1.jpg'; //--- 2mb
var image_url = 'http://www.fas.org/nuke/guide/pakistan/pakistan.gif'; // --- some KB
http.get(image_url,
function(responseData) {
var data = new Buffer(parseInt(responseData.headers['content-length'],10));
var pos = 0;
responseData.on('data', function(chunk) {
chunk.copy(data, pos);
pos += chunk.length;
});
responseData.on('end', function () {
res.send(200, data);
});
});
};
Above code fails working for large files if i use it with step module.
Anyone suggest how to do it properly with step.
Here how i did it using step..... although the request module did same for image buffer download thanks to a post on stackoverflow just need to set encoding to null in request to work for buffer response.
var canvas = new Canvas(3000, 3000),
ctx = canvas.getContext('2d'),
Image = Canvas.Image;
var image_url = "http://www.a2hosting.com/images/uploads/landing_images/node.js-hosting.png";
//var image_url = 'http://upload.wikimedia.org/wikipedia/commons/1/16/AsterNovi-belgii-flower-1mb.jpg';
step(
function() {
request.get({
url: image_url,
encoding: null
}, this);
},
function(err, response, body) {
var img = new Image;
img.src = body;
ctx.drawImage(img, 0, 0, img.width, img.height);
//res.send(200, data);
res.send(200, '<img src="' + canvas.toDataURL() + '" />');
}
);
Below is the code working for simple http module of node.
var http = require('http');
var step = require('step');
var request = require('request');
exports.imagedownload = function(req, res){
step(
function(){
console.log('*********** image download start ***********');
fndownload(this);
},
function(err, result){
if(err) {
}
console.log('*********** image download end ***********');
res.send(200, result);
}
);
};
function fndownload(callback) {
var image_url = 'http://upload.wikimedia.org/wikipedia/commons/2/2d/Snake_River_(5mb).jpg'; // --- some KB
http.get(image_url,
function(responseData) {
var data = new Buffer(parseInt(responseData.headers['content-length'],10));
var pos = 0;
responseData.on('data', function(chunk) {
chunk.copy(data, pos);
pos += chunk.length;
});
responseData.on('end', function () {
//res.send(200, data);
callback(null, data);
});
});
};
I have the code below and am trying to access the all_records array once the _.each function has completed. However as it is asynchronous I was wondering if was possible to force a callback onto the underscores each?
var request = require('request'),
cheerio = require('cheerio'),
_ = require('underscore');
var all_records = [];
_.each([0,100], function(start) {
var base_url = "http://www.example.com/search?limit=100&q=foobar&start=";
var url = base_url + start;
request(url, function(err, res, body) {
var $ = cheerio.load(body),
links = $('#results .row');
$(links).each(function(i, link) {
var $link = $(link);
var record = {
title: $link.children('.title').text().trim()
};
all_records.push(record);
});
});
});
// Need to run this once _.each has completed final iteration.
console.log(all_records);
Here is a simple solution using a simple synchronization method:
var count = 101;//there are 101 numbers between 0 and 100 including 0 and 100
_.each([0,100], function(start) {
var base_url = "http://www.example.com/search?limit=100&q=foobar&start=";
var url = base_url + start;
request(url, function(err, res, body) {
var $ = cheerio.load(body),
links = $('#results .row');
$(links).each(function(i, link) {
var $link = $(link);
var record = {
title: $link.children('.title').text().trim()
};
all_records.push(record);
count--;
if(count===0){//101 iterations done
console.log(all_records);
}
});
});
});
A more elegant solution can be accomplied by using async's .parallel method.
var requests = []; //an array for all the requests we will be making
for(var i=0;i<=100;i++){
requests.push((function(done){ //create all the requests
//here you put the code for a single request.
//After the push to all_records you make a single done() call
//to let async know the function completed
}).bind(null,i));//the bind is that so each function gets its own value of i
}
async.parallel(requests,function(){
console.log(all_records);
});
async.each ended up being the easiest to implement.
async.each([0,100], function(start) {
var base_url = "http://www.example.com/search?limit=100&q=foobar&start=";
var url = base_url + start;
request(url, function(err, res, body) {
var $ = cheerio.load(body),
links = $('#results .row');
$(links).each(function(i, link) {
var $link = $(link);
var record = {
title: $link.children('.title').text().trim()
};
all_records.push(record);
});
});
}, function(err){
console.log(all_records);
});