How to get promises from nested arrays? - node.js

Can somebody help me with this?
I am trying to scrape a website and store the collected data in a Json file. I'm using cheerios and request-promise.
The Json structure goes like that: companys > packages > cities
"companies": [
{
"id": 0,
"name": "companyName",
"url": "https://www.url-company.net/",
"packages": [
{
"id": 0,
"name": "general",
"url": "https://www.url-company.net/package",
"cities": [
{
"id": 0,
"name": "cityName",
"url": "https://www.url-company.net/package/city",
},
...]
}
...]
}
..]
I have extracted the array of companies from this site.
Each COMPANY has a specific url --> from every url I scraped the
packages for each company.
Each PACKAGE has a specific url --> from
every url I want to scrape the cities for each package but I am NOT
able to do it.
I am only able to populate companies and packagesByCompany, but I'm lost when trying to populate citiesByPackage:
const rp = require('request-promise');
const cheerio = require('cheerio');
const jsonfile = require('jsonfile');
const baseUrl = 'https://www.base-url-example.net';
scrapeAll();
function scrapeAll() {
return scrapeCompanies().then(function (dataCompanys) {
//Map every endpoint so we can make a request with each URL
var promises = dataCompanys.map(function (company) {
return scrapePackagesByCompany(company) // Populate each company with all the array of packages from this company
});
return Promise.all(promises);
})
.then(function(promiseArray) { // Need help here!!!!
var promise4all = Promise.all(
promiseArray.map(function(company) {
return Promise.all( // This is NOT working, I do not know how to get promises from nested arrays
company.packages.map(function(package) {
return Promise.all(
scrapeCitiesByPackage(package) // Try to populate each package with all the array of cities from this package
);
})
);
})
);
return promise4all;
})
.then(function (data) {
saveScrapedDateIntoJsonFile(data);
return data;
})
.catch(function (err) {
return Promise.reject(err);
});
}
function scrapeCompanies() {
return rp(baseUrl)
.then(function(html){
const data = [];
let companysImg = '#content section .elementor-container > .elementor-row > .elementor-element.elementor-top-column .elementor-widget-wrap .elementor-widget-image >.elementor-widget-container > .elementor-image';
let $ = cheerio.load(html);
$(companysImg).each(function(index, element){
const urlCompany = $(element).find('a').attr('href');
const imgCompany = $(element).find('img').data('lazy-src');
if (urlCompany && imgCompany) {
const nameCompany = urlCompany;
const company = {
id : index,
name: nameCompany,
url : baseUrl + urlCompany,
img: imgCompany,
};
data.push(company);
}
});
return data;
})
.catch(function(err){
//handle error
console.error('errorrr2', err);
});
}
function scrapePackagesByCompany(company) {
return rp(company.url)
.then(function(html){
company.packages = [];
let packagesImg = '#content section .elementor-container > .elementor-row > .elementor-element.elementor-top-column .elementor-widget-wrap .elementor-widget-image >.elementor-widget-container > .elementor-image';
let $ = cheerio.load(html);
$(packagesImg).each(function(index, element){
const urlPackage = $(element).find('a').attr('href');
const imgPackage = $(element).find('img').data('lazy-src');
if (urlPackage && imgPackage) {
const namePackage = urlPackage.text();
const package = {
id : index,
name: namePackage,
url : urlPackage,
img: imgPackage,
};
company.packages.push(package);
}
});
return company;
})
.catch(function(err){
//handle error
console.error('errorrr2', err);
});
}
function scrapeCitiesByPackage(insurancePackage) {
return rp(insurancePackage.url)
.then(function(html){
insurancePackage.cities = [];
let citiesLinks = '#content section .elementor-container > .elementor-row > .elementor-element .elementor-widget.elementor-widget-posts .elementor-posts-container article';
let $ = cheerio.load(html);
$(citiesLinks).each(function(index, element) {
const $linkCity = $(element).find('a');
const urlCity = $linkCity.attr('href');
const nameCity = $linkCity.text();
if (urlCity && nameCity) {
const city = {
id : index,
name: nameCity,
url : urlCity,
};
insurancePackage.cities.push(city);
}
});
return insurancePackage;
})
.catch(function(err){
//handle error
console.error('errorrr2', err);
});
}
function saveScrapedDateIntoJsonFile(data) {
jsonfile.writeFile(
'./data/company.json',
{companies : data },
//data,
{spaces: 2},
function(err) {
console.error('errorrr', err);
});
}
Thanks in advance :)

What you are trying could be made to work but it's arguably better for scrapePackagesByCompany() and scrapeCitiesByPackage() simply to deliver data, and to perform all the "assembly" work (ie bundling the delivered arrays into higher level objects) in scrapeAll().
You can write something like this:
scrapeAll()
.catch(function(err) {
console.log(err);
});
function scrapeAll() {
return scrapeCompanies()
.then(function(companies) {
return Promise.all(companies.map(function(company) {
return scrapePackagesByCompany(company)
.then(function(packages) {
company.packages = packages; // assembly
return Promise.all(packages.map(function(package) {
return scrapeCitiesByPackage(package)
.then(function(cities) {
package.cities = cities; // assembly
});
}));
});
}))
.then(function() {
return saveScrapedDateIntoJsonFile(companies);
});
});
}
Then it's fairly trivial to simplify scrapePackagesByCompany() and scrapeCitiesByPackage(package) such that they deliver packages array and cities array respectively.

Related

Why does my recieved from database data is undefined?

I`m creating API on my express.js server, so when it takes "get" request, some function placed in module file asking data from graph db:
module.js file:
function getGraphData() {
const cityName = 'Milan';
const readQuery = `MATCH (City {name: $cityName})-[:LINKED*1..3]-(city:City) RETURN city`;
const cities = [];
session
.run(readQuery, { cityName })
.then(function (result) {
result.records.forEach(function (record) {
cities.push({
title: record._fields[0].properties.name,
});
});
return cities;
})
.catch((err) => console.log(err));
}
module.exports.getGraphData = getGraphData;
After receiving data it stores in array named cities and looks like this:
cities: [ { title: 'City1' }, { title: 'City2' }, { title: 'City3' } ]
So, function is returning this array, and then I import function from module and use it in my router file:
const { getGraphData } = require('./../db/neo4j');
router.get('/', async (req, res) => {
try {
const p = await getGraphData();
console.log('p:', p); //p is undefined
//return res.status(200).send(p);
return res.status(206).json(p); // empty response, not any data only status code
} catch (e) {
console.log(e);
});
So, what I'm doing wrong? Why does api response is empty?
Im use debugger. Data realy comes to function in module, but doesn`t passing to api response to "p" variable.
Your getGraphData function is using .then . When it executes it makes the session call and returns immediately that is why it returns empty.
Although, you are doing await on getGraphData, your getGraphData function needs to be defined as async and use await with session for it to work.
async function getGraphData() {
const cityName = 'Milan';
const readQuery = `MATCH (City {name: $cityName})-[:LINKED*1..3]-(city:City) RETURN city`;
const cities = [];
try{
const result = await session.run(readQuery, { cityName });
result.records.forEach(function (record) {
cities.push({
title: record._fields[0].properties.name,
});
});
return cities;
}
catch(err){
console.log(err);
return err;
}
}

mongoose find multiple filters not working

Good day guys, I am trying to set up a store filtering API with node and MongoDB in the format below.
getProduct = async (req, res) => {
try {
const {
name,
category,
brand,
minPrice,
maxPrice,
minRating,
field,
sort,
} = req.query;
//queryObjects
const queryObjects = {};
if (name) {
queryObjects.name = { $regex: name, $options: 'i' };
}
// if(category){
// queryObjects.category={$regex:category,$options:'i'}
// }
// if(brand){
// queryObjects.brand={$regex:brand,$options:'i'}
// }
queryObjects.price = { $gt: minPrice, $lt: maxPrice };
queryObjects.rating = { $gt: minRating };
var result = Product.find(queryObjects);
//sorting result
if (sort) {
const sortList = sort.split(',').join(' ');
result = result.sort(sortList);
} else {
result = result.sort('rating');
}
//show specific fields
if (field) {
const fieldList = field.split(',').join(' ');
result = result.select(fieldList);
}
console.log(queryObjects);
const page = Number(req.query.page);
const limit = Number(req.query.limit);
const skip = (page - 1) * limit;
result = result.skip(skip).limit(limit);
const products = await result;
res.send({ data: products });
} catch (error) {
res.send(error);
}
};
when I make a request(get) I get an empty array as my responsepostman response
when I log queryObject in the console I getqueryObject
everything works when I add the filters one by one.
what am I doing wrong and how can I pass in multiple filters to the find method in mongoose.
Please help, thanks

Why the nodejs heap out of memory for creating Excel file with big data?

I am creating an excel file at nodejs end and returning base64 data to reactJS to download the file. At nodejs end, I am using promise all and fetch data from a server in chunks and append data into Excel as
worksheet.addRows(data);
For data around 20-30k, it is working fine but for data like 100k, it shows me an error heap out of memory at nodejs end.
I have increase memory allocate to nodejs also but same error
node --max_old_space_size=5000 app.js
What I am doing wrong any suggestions?
Nodejs
const axios = require('axios');
var excel = require("exceljs");
const workbook = new excel.Workbook();
const worksheet = workbook.addWorksheet("My Sheet");
worksheet.columns = [
{ header: "TicketId", key: "ticketId" },
{ header: "Email", key: 'user_email' },
{ header: "User", key : 'user_name' },
{ header: "Subject", key: "subject" },
...//many more headers
];
exports.getTicketData = async (req, res, next) => {
res.connection.setTimeout(0);
const { body } = req;
const token = body.token;
const organization_id = body.organization_id;
const server = body.server;
const sideFilter = body.sideFilter;
let baseurl = 'url for server end to fetch data';
if (baseurl) {
let data = new Array();
let limit = 3000;
const promises = [];
try {
let count = await getCount(token,limit, organization_id, baseurl, sideFilter);
for(var i = 1;i<=count;i++) {
promises.push(getData(i,limit,organization_id,token, baseurl, sideFilter));
}
await Promise.all(promises).then((results) => {
}).catch((e) => {
throw e;
});
var base64File = await writeExcelAndUpload(workbook);
return res.status(200).json({ file:base64File });
} catch (err) {
return res.status(400).json({ type:'error', msg:'File not generated please contact support staff' });
}
} else {
return res.status(400).json({ type:'error', msg:'please define server name' });
}
};
let getData = (page,limit, organization_id,token, baseurl, sideFilter) =>{
return new Promise((resolve, reject) => {
axios.post(baseurl+`/v2/get-export`, {
page:page,
organization_id:organization_id,
per_page:limit,
filter: "",
sorted:"",
...sideFilter
},{ headers: {"Authorization" : `Bearer ${token}`} }).then(function (response) {
let dataTemp = response.data.data.data.map((t,i)=>{
return {
...t,
name:t.name,
...//many more columns like 70
}
});
worksheet.addRows(dataTemp);
resolve(true);
}).catch(function (error) {
reject(error);
});
});
}
let getCount = (token,limit, organization_id, baseurl, sideFilter) => {
// run an api and return count against limit
}
let writeExcelAndUpload = async (workbook) => {
const fileBuffer = await workbook.xlsx.writeBuffer();
let base64File = Buffer.from(fileBuffer).toString('base64');
base64File = 'data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,'+base64File;
return base64File;
}
Client side reactjs
exportLink = () => {
postData ={
...
};
return axios.post(`${baseurl}/api/ticketing/get-ticket`, postData).then(function (response) {
const downloadLink = document.createElement("a");
const fileName = "export.xlsx";
downloadLink.href = response.data.file;
downloadLink.download = fileName;
downloadLink.click();
}).catch(function(error){
throw error;
});
}
Well, it is kinda expected that you may get a heap out of memory when working with such an amount of entries like 100k.
I could suggest you start using pagination, and instead of fetching e.g. 100k of entries at once fetch 1k of entries do what you need with them, then fetch the next 1k of entries repeat until you processed all entries.

How to have rows from related tables as nested json objects in a node.js API

I'm trying to build my first Rest-API using node.js, swagger and mysql.
I have two tables in mysql database: Banco (0:1) --> Conta (1:N).
I want my API to return an array of a JSON objects of Banco rows, each containing a field "Conta" containing all related Conta objects, like:
{
"bancos": [
{
"id": 1,
"nome": "Banco 1",
"Conta": [{"nome": "Conta 1"},{"nome": "Conta 2"}]
},
{
"id": 2,
"nome": "Banco 2",
"Conta": [{"nome": "Conta 3"},{"nome": "Conta 4"}]
}
]}
I'm using mysql pooling object.
// const sql = require('tedious');
const dbConfig = require('./dbConn').config;
const pool = new sql.createPool(dbConfig);
exports.execSql = function(sqlquery, params) {
// var params = [];
// params.push(id);
sqlquery = sql.format(sqlquery, params);
return new Promise(function(res, rej) {
pool.getConnection(function(err, connection) {
if (err) rej(err);
connection.query(sqlquery, function(err, rows) {
if (err) rej(err);
else res(rows);
connection.release();
});
});
});
}
This is my Banco controller, that uses the Banco and Conta models. Each one return a promise with the their queries (simple selects).
'use strict';
var Banco = require('../models/banco');
var Conta = require('../models/conta');
module.exports = {getBancos};
function getBancos(req, res, next) {
let idUsuario = 1; //example
Banco.getAll().then(result => {
if (!result.error){
if (result.length){
result.map(banco => {
Conta.getAll(idUsuario, banco.id)
.then(resultado => {
banco.Conta = resultado
// res.json({bancos: resultado}); --> note1: if I use this, the result is quiet ok, but it raises an error of resending the headers.
})
});
res.json({bancos: result}); //--> note2 --> here results just the Bancos objects with empty .Contas [], because it does not wait the promise to be resolved
}
else
res.status(204).send();
}
else
res.status(result.error.status || 500).send(result);
});
}
Assuming my structure is ok, I wonder how to manage the flow as I could have the objects Bancos with their Contas atributes populated with the result of Conta.getAll()?
Any idea?
Thanks for helping...
Rafael
(let me know if more information are required)
I've tried more stuff and could realize how to solve this: basically it is about to put the .map() inside a Promisse.all() and an await for the function that brings the rows. It ends up like this:
function getBancos(req, res, next) {
let idUsuario = 1;
Banco.getAll().then(result => {
if (!result.error){
if (result.length){
Promise.all(result.map(async banco => {
banco.Contas = await Conta.getAll(idUsuario, banco.id)
.then(resultado => {
if (!resultado.error && resultado.length >0){
return resultado;
}
else return []
});
})).then(() =>{
res.json({bancos: result});
});
}
else
res.status(204).send();
}
else
res.status(result.error.status || 500).send(result);
});
}

passing json formatted with information from multiple json documents nodeJs

I have a function where I would like to return an arrays of JSON objects with the necessary information. to send directly to the front everything ready.
async listProcessByOffice(req, res, next) {
try {
const actualPlayer = await PlayerOffice.findById(req.userId);
const actualOffice = await Office.findById(actualPlayer.Office);
const listProcesses = await Processes.find({ '_id': { $in: actualOffice.processes } });
const infosTable = {
protocol: ''
};
for (let i in listProcesses) {
this.protocol = listProcesses[i].prc_protocol;
console.log(this.protocol)
}
return res.status(200).json({ infosTable });
} catch (err) {
return next(err);
}
Not sure what you are looking for but i am assuming that you want to response back with array list of objects. So simple answer will be,
const infosTable = [];
for (let i in listProcesses) {
this.protocol = listProcesses[i].prc_protocol;
infosTable.push({protocol:listProcesses[i].prc_protocol})
console.log(this.protocol)
}

Resources