Node.js file write issue, incomplete writing - node.js

I am new to node.js I wrote a scraper as below and result it produces is not fine. All entries are not being written and incomplete broken data is being added to file, though individual data extraction if fine in console log.
The original file is complex sample from all code parts I have added to show my logic please tell what is being done wrong.
var request = require('request');
var cheerio = require('cheerio');
var url = 'http://example.com/index.html';
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
var categoryname = $('#mcat span').html();
var subcategoryname = $('span.arrow').html();
$('.listing').each(function() {
var companyname = $(this).find('.company-name > span').html();
var compwebsite = $(this).find('.company-link > a').html();
var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
var data = categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber;
var fs = require('fs');
fs.writeFile("data.txt", data, function(err) {
if(err) {
console.log("Error: "+err);
} else {
console.log("Success!");
}
});
});
});

.each is called synchronously, hence it is blocking. But the fs.writeFile is called asynchronously so it makes your data to shuffle, but no way it is going to be incomplete.
Solutions:
Use Callback
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
var categoryname = $('#mcat span').html();
var subcategoryname = $('span.arrow').html();
var count = 0;
var len = $('.listing').length;
var data = '';
$('.listing').each(function() {
count++;
var companyname = $(this).find('.company-name > span').html();
var compwebsite = $(this).find('.company-link > a').html();
var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
data += categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber + "\r\n";
if(count == len)
writeData(data);
});
});
function writeData(data) {
var fs = require('fs');
fs.writeFile("data.txt", data, function(err) {
if (err) {
console.log("Error: " + err);
} else {
console.log("Success!");
}
});
}
Use async module. It has various usable functions to apply callback and get the necessary result.

I think you could also do it easier (just call the writedata function after the each loop (because cherio's each() is synchronous, so there will be no problem)
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
var categoryname = $('#mcat span').html();
var subcategoryname = $('span.arrow').html();
var data = '';
$('.listing').each(function() {
var companyname = $(this).find('.company-name > span').html();
var compwebsite = $(this).find('.company-link > a').html();
var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
data += categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber + "\r\n";
});
writeData(data);
});
function writeData(data) {
var fs = require('fs');
fs.writeFile("data.txt", data, function(err) {
if (err) {
console.log("Error: " + err);
} else {
console.log("Success!");
}
});
}

Related

Create and download text files- Node.js & React.Js

As part of my project, I have to create text files which have to be downloaded as a ".txt".
I am using Node.js and React JavaScript, and I have already tried using the Node.js "fs.writeFile", but the browser doesn't recognize the download, the file name is always being called as the folder name and the file is always empty although the variable is a string and not empty.
I'm calling from the client to this function:
app.post("/downloadnetworks", async (req, res) => {
let selectedApps=req.body.selectedApps;
let arr=await sqlFunctions.createByIds(selectedApps);
res.send();
module.exports.createByIds = (productsArray) => {
return new Promise(function(resolve, reject) {
var bulkedString = '';
var product;
for (let obj of productsArray) {
let query = "select * from...........";
con.query(query, function(err, result, fields) {
if (err) throw err;
let stringifiedJson = JSON.stringify(result)
let parsedJson = JSON.parse(stringifiedJson)
The DB data is being added into the variable 'stringifiedJson', and it continues from here:
let parsedJson = JSON.parse(stringifiedJson) //has all the data from the DB
for (let network of parsedJson) {
if (network.certification_Id) {
bulkedString += network.domain_Name + ", " + network.publisher_Id + ", " + network.relationship + ", " + network.certification_Id;
} else {
bulkedString += network.domain_Name + ", " + network.publisher_Id + ", " +
network.relationship;
}
bulkedString += "\n";
product = network.product;
}
})
fs.writeFile('C:\Work\App ads.txt\App-Ads Files\'' + product + '.txt', bulkedString, 'utf8', (err) => {
if (err) throw err;
console.log('The file has been saved!');
});
}
resolve(bulkedString)
})
}

nodejs loop curl, async or blocking

how to loop curl/request when each request is done? like php/blocking.
its currently using loop foreach async-foreach and request.
my code:
forEach(array_lines, function(page_url, index, arr) {
request(page_url, function (err, resp, body) {
if (err) {
console.log("Error!: " + err + " using ("+j+")" + page_url);
throw err;
}
var $ = cheerio.load(body,{ decodeEntities: false,xmlMode: true });
console.log('page_url: ',page_url);
build_json.items[j] = {};
var id = $("#xx").val();
console.log('id: ',id);
build_json.items[j].id = id;
build_json.items[j].source_url = page_url;
var title = $("h1.title").text();
console.log('title: ',title);
build_json.items[j].title = title;
});
});

NodeJS API: having trouble passing 2 parameters to request response

I am having trouble passing 2 ids via request response function. I can pass 1 id/argument without any issue but having trouble passing 2 arguments below is a snippet of my code:
server:
var http = require("http");
var org = require("../controllers/org");
var school = require("../controllers/school");
var academicSession = require("../controllers/academicSession");
var term = require("../controllers/term");
var gradingperiod = require("../controllers/gradingperiod");
var course = require("../controllers/course");
var stu = require("../controllers/student");
var cls = require("../controllers/class");
var user = require("../controllers/user");
var demo = require("../controllers/demographic");
var enroll = require("../controllers/enrollment");
var teach = require("../controllers/teacher");
var crscls = require("../controllers/schoolcs");
var enr = require("../controllers/enrollqrys");
var settings = require("../settings");
var httpMsgs = require("../core/httpMsgs");
http.createServer(function (req, resp) {
switch (req.method) {
case "GET":
if (req.url === "/") {
httpMsgs.showHome(req, resp);
}
else if (req.url === "/orgs") {
org.getOrgs(req, resp);
}
else if (req.url.match("/orgs/[0-9]+$")) {
var idorg = "[0-9]+";
var patt = new RegExp("/orgs/" + idorg);
if (patt.test(req.url)) {
patt = new RegExp(idorg);
var id = patt.exec(req.url);
org.getOrg(req, resp, id);
}
else {
httpMsgs.show404(req, resp);
}
}
***else if (req.url.match("/schools/[0-9]+[A-Za-z0-9\-\_]+/classes/[0-9]+[A-Za-z0-9\-\_]+/enrollments$")) {
var idcl = "[0-9]+[A-Za-z0-9\-\_]+";
var idsc = "[0-9]+[A-Za-z0-9\-\_]+";
var str = idcl + "[0-9]+[A-Za-z0-9\-\_]+" + idsc;
var patt = new RegExp("/schools/" + idcl + "/classes/" + idsc + "/enrollments");
if (patt.test(req.url)) {
patt = new RegExp(str);
var id = patt.exec(req.url);
enr.getEnrollmentsForClassInSchools(req, resp, id, arg2);
}
else {
httpMsgs.show404(req, resp);
}
}***
enrollqrys.js:
var db = require("../core/db");
var httpMsgs = require("../core/httpMsgs");
exports.getEnrollmentsForClassInSchools = function (req, resp, id, arg2) {
db.executeSql("EXEC dbo.getEnrollmentsForClassInSchools #clid = '" + id + "'" + ", #scid = '" + arg2 + "'", function (data, err) {
if (err) {
httpMsgs.show500(req, resp, err);
}
else {
//resp.writeHead(200, { "Content-Type": "application/json" });
//resp.write(JSON.stringify(data));
//resp.end();
httpMsgs.sendJson(req, resp, data);
}
});
};
exports.getStudentsForClassInSchool = function (req, resp, id, id) {
db.executeSql("EXEC dbo.getEnrollmentsForClassInSchools #clid = '" + clid + "'" + ", #scid =" + scid, function (data, err) {
if (err) {
httpMsgs.show500(req, resp, err);
}
else {
//resp.writeHead(200, { "Content-Type": "application/json" });
//resp.write(JSON.stringify(data));
//resp.end();
httpMsgs.sendJson(req, resp, data);
}
});
};
I get a not referenced error when testing this GET. Any help would be greatly appreciated. I will add that I am new to both javascript and nodejs
Error below:
Started listening at: 9000
C:\Users\THOMMA02\source\repos\NodejsConsoleApp1\NodejsConsoleApp1\core\server.js:281
enr.getEnrollmentsForClassInSchools(req, resp, id, arg2);
^
ReferenceError: arg2 is not defined
at Server. (C:\Users\THOMMA02\source\repos\NodejsConsoleApp1\NodejsConsoleApp1\core\server.js:281:72)
at emitTwo (events.js:126:13)
at Server.emit (events.js:214:7)
at parserOnIncoming (_http_server.js:619:12)
at HTTPParser.parserOnHeadersComplete (_http_common.js:112:17)
Waiting for the debugger to disconnect...

Node missing ) after argument list

Its probably a stupid mistake, but its taking me too long to find the answer
When i run this simple program (node index.js) i get an error:
SyntaxError: missing ) after argument list (line 55)
You can see sublimelinter found nothing either
I've tried to delete and install all packages again
I've triple checked everything
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var readLine = require('readline');
var URL_SEED= "https://g1.globo.com/";
var MAX_PAGES_TO_VISIT = 100;
var pagesVisited = []; //array, {objeto}
var numPagesVisited = 0;
var pagesToVisit = [];
var allAbsoluteLinks = [];
var url = new URL(URL_SEED); //pesq
var urlBase = url.protocol + "//" + url.hostname; //pesq
pagesToVisit.push(URL_SEED);
crawl();
//pegar href vs a
function crawl() {
if(numPagesVisited >= MAX_PAGES_TO_VISIT) {
console.log("Limites de páginas que posso visitar atingido (100)");
return;
}
var nextPage = pagesToVisit.pop(); //peq
if (nextPage in pagesVisited) { //peq
// pagina ja visitada
crawl();
} else {
// pagina nao visitada
visitaPagina(nextPage, crawl);
}
}
function visitaPagina(url, callback) {
// Add page to our set
pagesVisiteu[url] = true;
numPagesVisited++;
console.log("Visitando a página " + url);
// Faz requisicao
request(url, function(error, response, body) {
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
callback();
return;
} else {
// Parse do body
var $ = cheerio.load(body);
coletaLinks($);
}
}
}
function coletaLinks($) {
//var linksRelativos = $("a\[href^='/'\]"); //NAO TA PEGANDO DO JEITO QUE
var linksRelativos = $("a\[href^='/'\]");
console.log("Achei " + linksRelativos.length + " links relativos nessa página");
linksRelativos.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
console.log("\n\nPAGES TO VISIT = " + pagesToVisit + "\n\n");
var key = [];
for (key in linksRelativos) {
if(linksRelativos.hasOwnProperty(key)) { //realmente preciso ver essa property?
console.log(key, linksRelativos\[key\].attribs.href);
}
}
fs.writeFile('relativos.txt', linksRelativos, function(err) {
if(err) {
return console.log(err);
//throw err;
}
});
}
I'm not sure why your linter isn't complaining, but you are missing a closing bracket in the request call inside visitaPagina. Second line from the bottom:
function visitaPagina(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
console.log("Visitando a página " + url);
// Faz requisicao
request(url, function(error, response, body) {
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
callback();
return;
} else {
// Parse do body
var $ = cheerio.load(body);
coletaLinks($);
}
} // should be })
}
Also, something went wrong with copying the code on SO, you might want to check what's up with that for any future submissions. In this case the screenshots helped, but you should also know that images of code are generally frowned upon on SO.
Good luck!

Nodejs/Async: How does callback work in iteratee function for async.map as mentioned in code snippet

Being new to nodejs ans async following is the code that I came across.
app = express();
/*
other express use calls like - app.use(bodyParser.json());
*/
var async = require("async");
var server;
app.post('/callType/call', function(req, res) {
var startTime = Date.now();
server = req.body.server;
//async.map asynchronuously call enrollStep1 for every element in the req.body.nodes array
//HOW DOES THIS WORK??!! - WHERE IS THE CALLBACK DEFINED OR SOURCED FROM???
//******************************************************
async.map(req.body.nodes, function(node, callback) {
someFunc(node.property1,node.property2,callback)
},
//This function is called when every task triggered by async.map has called its callback.
function(err, results) {
var response = {};
if (err) {
response.success = false;
response.error = err;
console.log("ERROR returned: " + JSON.stringify(response));
res.json(response);
} else {
var returnResults = [];
//Results is an array of array - flatten it
var flattenedResults = [].concat.apply([], results);
//then remove duplicates
for (var i = 0; i < flattenedResults.length; i++){
var obj = flattenedResults[i];
var isDup = returnResults.some(function(element) {
return element.tid === obj.tid;
});
if (!isDup) {
returnResults.push(obj);
}
}
response.success = true;
response.results = returnResults;
res.json(response);
}
});
});
function someFunc(property1, property2, callback) {
var url = '/'+callTypes +'/'+ call +'/'+ property1 +'/'+ property2
urClient
.get(server + url)
.header('Content-Type', 'application/json')
.end(
function(response) {
if (response.code !== 200) {
callback("Error " + ". Code: " + response.code + " Response: " + JSON.stringify(response));
} else {
callback("Success " + ". Code: " + response.code + " Response: " + JSON.stringify(response));
}
}
);
}
The iteratee function for async.map has a definition starting function(node, callback) { but the callback function is never assigned. How does the callback work over here.
Isn't it supposed to be assigned somewhere like callback = myCallbackFunction;
The async.map takes 3 arguments, the array/object, the function to map the data and the callback function, so your code should be:
async.map(req.body.nodes, someFunc , function(err, results) {
if (err) return console.log(err);
console.log(results);
});
And your someFunc should be:
function someFunc(item, callback) {
// do something with item
// it's each item in the original array/object
callback('The results');
}
This is a basic example: http://code.runnable.com/UyR-6c2DZZ4SmfSh/async-map-example-for-node-js

Resources