How to get data after loop using promise - node.js

I am working on an async problem. I'm making a web scraper and after I scrape the web, I need to put the data in my MongoDB database after putting it in. I need to send it into the frontend, but since I have a loop the elements I can't put the res.json() inside, as it'll gave an error (you can only send once after res.json()).
I'm stuck here. I've used promises before, but this is confusing.
router.get('/scrape', (req, res) => {
request('http://www.nytimes.com', function test(error, response, html) {
const $ = cheerio.load(html);
// An empty array to save the data that we'll scrape
const results = [];
$('h2.story-heading, p.summary').each(function(i, element) {
const link = $(element)
.children()
.attr('href');
const title = $(element)
.children()
.text();
const summary = $(element)
.children()
.text();
const data = {
title: title,
link: link,
summary: summary,
};
articles
.create(data)
.then((resp) => results.push(resp))
// .then((resp) => Promise.resolve(results)) //
// .then((jsonDta ) => res.json(jsonData)) // error you can only give response once.
.catch((err) => reject(err));
});
console.log(results); // empty array
res.json(results)// empty
});
});
My plan is:
to scrape a site (loop the elements)
then save into MongoDB (push the data into an array)
then after the loop pass it to the frontend.
I need to put the query method create... inside the loop because I need each data to have an id.

Instead of trying to accumulate results directly, you can map the elements contained in $('h2.story-heading, p.summary') to an array of promises, then aggregate with Promise.all(). The results you want will be delivered by Promise.all(...).then(...).
router.get('/scrape', (req, res) => {
request('http://www.nytimes.com', function test(error, response, html) {
const $ = cheerio.load(html);
const promises = $('h2.story-heading, p.summary')
.get() // as in jQuery, .get() unwraps Cheerio and returns Array
.map(function(element) { // this is Array.prototype.map()
return articles.create({
'title': $(element).children().text(),
'link': $(element).children().attr('href'),
'summary': $(element).children().text()
})
.catch(err => { // catch so any one failure doesn't scupper the whole scrape.
return {}; // on failure of articles.create(), inject some kind of default object (or string or whatever).
});
});
// At this point, you have an array of promises, which need to be aggregated with Promise.all().
Promise.all(promises)
.then(results => { // Promise.all() should accept whatever promises are returned by articles.create().
console.log(results);
res.json(results);
});
});
});
If you want any single failure to scupper the whole scrape, then omit the catch() and add catch() to the Promise.all().then() chain.
Notes:
For .get() (and most other methods), the jQuery documentation is better than the Cheerio documentation (but be careful because Cheerio is a lean version of jQuery).
At no point do you need new Promise(). All the promises you need are returned by articles.create().

Something like this might work (code not tested)
router.get('/scrape', (req, res) => {
request('http://www.nytimes.com', function test(error, response, html) {
const $ = cheerio.load(html);
// An empty array to save the data that we'll scrape
const results = [];
$('h2.story-heading, p.summary').each(function(i, element) {
const link = $(element)
.children()
.attr('href');
const title = $(element)
.children()
.text();
const summary = $(element)
.children()
.text();
const data = {
title: title,
link: link,
summary: summary,
};
const articleCreate = articles.create(data);
results.push(articleCreate);
});
console.log(results); // this is array of promise functions.
Promise.all(results).then(allResults => {
res.json(allResults)
});
// or you could use array.reduce for sequantial resolve instead of Promise.all
});
});

Use .map function to return all promises to Promise.all and then return the results.
request('http://www.nytimes.com', function test(error, response, html) {
const $ = cheerio.load(html);
var summary = $('h2.story-heading, p.summary')
Promise.all(summary.map((i, element) =>{
const data = {
title: $(element).children().text(),
link: $(element).children().attr('href'),
summary: $(element).children().text(),
};
return articles
.create(data)
}).get())
.then((result)=>{
console.log(result);
res.json(result);
});
})

Related

Should I do database change inside each fetch request, or inside Promise.all?

I want to loop through the first 151 Pokemon using the PokeAPI, and adding each Pokemon to my mongo database.
I already have the schema for the pokemon, where I am just saving their string name, and an array of their moves.
I am looping through the axios calls, and storing them into an array of promises, and waiting for them to resolve
app.get('/', async (req, res) => {
const promises = []
for (let i = 1; i <= 151; i++) {
promises.push(axios.get(`https://pokeapi.co/api/v2/pokemon/${i}`))
}
await Promise.all(promises).then(async (p) => {
const newPokemon = new Pokemon({
name: p.name,
moves: p.moves,
})
await newPokemon.save()
})
})
Is this at all correct? Where should I be doing my database queries, inside the individual axios calls or inside the promise.all?
Promise.all returns an array of values. You should use the insertMany function to insert all Pokemon at once:
await Promise.all(promises).then(async (pokemons) => {
try {
await Pokemon.insertMany(pokemons);
} catch (err) {
console.error(err);
}
})

Why does Async firebase fetching is not working? (NODE JS)

Building a NodeJS REST API.
Trying to send load data from FireBase collection, then sending it to the user (as API response).
Looks like the problem is that it's not waits for the firebase fetch to resolve, but send back a response without the collection data. (tried to use ASYNC-AWAIT but its not working)
exports.getChatMessages = async (req, res, next) => {
const chatId = req.params.chatId
const getChatData = () => {
db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot((snapshot) => {
snapshot.docs.forEach(msg => {
console.log(msg.data().messageContent)
return {
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
}
})
})
}
try {
const chatData = await getChatData()
console.log(chatData)
res.status(200).json({
message: 'Chat Has Found',
chatData: chatData
})
} catch (err) {
if (!err.statusCode) {
err.statusCode(500)
}
next(err)
}
}
As you can see, I've used 2 console.logs to realize what the problem, Terminal logs looks like:
[] (from console.logs(chatData))
All messages (from console.log(msg.data().messageContent))
Is there any way to block the code unti the firebase data realy fetched?
If I correctly understand, you want to send back an array of all the documents present in the messages subcollection. The following should do the trick.
exports.getChatMessages = async (req, res, next) => {
const chatId = req.params.chatId;
const collectionRef = db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc');
try {
const chatsQuerySnapshot = await collectionRef.get();
const chatData = [];
chatsQuerySnapshot.forEach((msg) => {
console.log(msg.data().messageContent);
chatData.push({
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
});
});
console.log(chatData);
res.status(200).json({
message: 'Chat Has Found',
chatData: chatData,
});
} catch (err) {
if (!err.statusCode) {
err.statusCode(500);
}
next(err);
}
};
The asynchronous get() method returns a QuerySnapshot on which you can call forEach() for enumerating all of the documents in the QuerySnapshot.
You can only await a Promise. Currently, getChatData() does not return a Promise, so awaiting it is pointless. You are trying to await a fixed value, so it resolves immediately and jumps to the next line. console.log(chatData) happens. Then, later, your (snapshot) => callback happens, but too late.
const getChatData = () => new Promise(resolve => { // Return a Promise, so it can be awaited
db.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot(resolve) // Equivalent to .onSnapshot((snapshot) => resolve(snapshot))
})
const snapshot = await getChatData();
console.log(snapshot)
// Put your transform logic out of the function that calls the DB. A function should only do one thing if possible : call or transform, not both.
const chatData = snapshot.map(msg => ({
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
}));
res.status(200).json({
message: 'Chat Has Found',
chatData
})
Right now, getChatData is this (short version):
const getChatData = () => {
db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot((snapshot) => {}) // some things inside
}
What that means is that the getChatData function calls some db query, and then returns void (nothing). I bet you'd want to return the db call (hopefully it's a Promise), so that your await does some work for you. Something along the lines of:
const getChatData = async () =>
db
.collection('chats')
// ...
Which is the same as const getChatData = async() => { return db... }
Update: Now that I've reviewed the docs once again, I see that you use onSnapshot, which is meant for updates and can fire multiple times. The first call actually makes a request, but then continues to listen on those updates. Since that seems like a regular request-response, and you want it to happen only once - use .get() docs instead of .onSnapshot(). Otherwise those listeners would stay there and cause troubles. .get() returns a Promise, so the sample fix that I've mentioned above would work perfectly and you don't need to change other pieces of the code.

Node.js - Http get will not display data from MongoDb query in Postman or browser

I'm working on a node.js backend project that uses a MongoDb database. After I query the database and received the data it will not display the data in my browser using res.send(). I also tried res.json(). However, the data does display on my console,but just will not display in postman or my browser. Is the query data from mongoDB not json or an array? I did a little reading and it states it's a cursor pointing to the data. Can this data not be converted to display in a broswer?
mycode as well as console and browser display
ProductRouter.js
router.get('/', (req, res) => {
const products = allProducts.plantProducts();
setTimeout(() => {
if (products === "400") {
res.status("400").send("Error querying database");
}else{
console.log(products);
res.status("200").send(products);
}
}, 1000);
ProductController.js
async function plantProducts(){
try {
const products = await getProducts();
return products;
} catch(err) {
let code = "400";
return code;
//res.status('400').send(err.message);
}
}
plantProducts();
//Search mongodb for all products.
function getProducts() {
return new Promise((resolve, reject) => {
const products = Product
.find()
resolve(products);
});
}
The problem is that products yields a promise which express' res.json will not automatically await and convert to a json-response. You need await the promise and then send the response. Something like:
router.get('/', async (req, res) => {
const products = await allProducts.plantProducts();
res.json(products);
});

How to push an object into an array in async function

i have been trying to insert an object into an array in async function ,but it
return an empty array as output in nodejs ,mongoose
var data = [];
app.get("/api/post", async (req, res) => {
const post = await UserPost.find();
post.forEach(async element => {
const email = await element.userid;
const user = await Account.find({ email });
const usern = await user[0].username;
var userobject = {
element,
usern
};
//Promise.all(userobject)
data.push(userobject);
});
console.log(data);
res.send({ data });
});
It seems you are struggling with promises. In order to achieve this specific scenario, you can use Promise.all and Array.map.
Here is a code I edited for you:
(*please note that this is just a dummy code for the sake of explanation)
app.get("/api/post", async (req, res) => {
try {
const posts = await dummyPromiseResolver(); // first promise
const promises = posts.map(async element => {
const user = await dummyEmailReturn(element.userid); // second promise
const usern = user[0].username;
return {
usern,
...element
};
});
const fresult = await Promise.all(promises);
res.send(fresult);
} catch (error) {
console.error("error in posts fetch:" + error);
}
});
If I describe this code, posts.map is creating an Array of promises since we need to iterate through every object in the array and needs to add values from separate promises.
Then Promise.all can execute your promise array and return final results array with your desired results.
Note: You can also use for … of as well but when we need to happen things parallelly we use Promise.all. You can find more information from this thread.
here is a link for code sandbox: https://codesandbox.io/embed/serverless-cookies-nu4h0
Please note that I have added dummyPromiseResolver and dummyEmailReturn which would be equal to UserPost.find() and Account.find() functions respectively. In addition to that, I removed a few unnecessary awaits in your code. I added a try catch block to catch any exceptions. You can change that try catch as you please.
hope this will help you. let me know if you need more clarifications.

How to use Promises correctly with multiple requests

I am using twitter's API to receive recent tweets by querying specific hash tags. The first GET request is to search/tweets which takes the hashtag and others queries in it's parameters. The response for this returns an array of tweets (objects). I push each of these id's into an external array. Now I want to loop through this array and make another call to statuses/show for each of these IDs so that I can get the location data of the user posting the tweet. The statuses/show end-point only takes a single tweet id but I have an array. How would I go about using the same getRequest function for an array of IDs?
I tried to implement it by reading online about Promises but it's not working.
Here's my code:
function getRequest(url, params) {
return new Promise(function (success, failure) {
twitterSearch.get(url, params, function (error, body, response) {
if (!error) {
success(body);
} else {
failure(error);
}
});
});
}
app.post('/twitter', (req, res) => {
console.log("body", req.body);
var tweetIDs = [];
var bounds = [];
getRequest('search/tweets', {q: req.body.tag, result_type:'recent', count:'100'})
.then((tweets) => {
tweets.statuses.map((status) => {
if(status.user.geo_enabled) {
console.log("ID: ", status.id);
tweetIDs.push(status.id);
}
});
return getRequest('statuses/show', tweetIDs); //I know tweetIDs is wrong, dont know what to do from here.
}).then((data) => {
console.log("User data for the ID")
console.log(data);
}).catch((error) => {
console.log(error)
});
res.send(bounds);
bounds.length = 0;
});
You nearly got it right! Map all tweets to Promises returned by your getRequest() function. Then, the Promise.all() method will return you a single Promise that resolves when all of the promises in your array resolves.
By the way, you can use Array.reduce instead of map to both filter and map your array at the same time, this factor your code a little better.
getRequest('search/tweets', {q: req.body.tag, result_type:'recent', count:'100'})
.then( tweets => {
let requests = tweets.statuses.reduce( (ret,status) => {
if(status.user.geo_enabled)
// The following is still wrong though, I guess. Format the params of getRequest accordingly.
ret.push( getRequest('statuses/show', status.id) );
return ret;
}, []);
return Promise.all(requests);
})
.then( results => {
results.forEach( res => {
console.log("User data for the ID");
console.log(res);
})
})
EDIT : Related jsfiddle
Replace the line
return getRequest('statuses/show', tweetIDs); //I know tweetIDs is wrong, dont know what to do from here.
with
return Promisel.all( tweetIDs.map( (tId) => getRequest('statuses/show', tId) );

Resources