i have an array of variable number of urls and i must merge the data get with axion
the problem is then every axios call is relative to the data of the previus
if i have a fixed number of ulrs i can nest axion calls and live with that
i think to use something like this
var urls = ["xx", "xx", "xx"];
mergeData(urls);
function mergeData(myarray, myid = 0, mydata = "none") {
var myurl = "";
if (Array.isArray(mydata)) {
myurl = myarray[myid];
// do my stuff with data and modify the url
} else {
myurl = myarray[myid];
}
axios.get(myurl)
.then(response => {
// do my stuff and get the data i need and put on an array
if (myarray.length < myid) {
mergeData(myarray, myid + 1, data);
} else {
// show result on ui
}
})
.catch(error => {
console.log(error);
});
}
but i dont like it
there is another solution?
(be kind, i'm still learning ^^)
just to be clear
i need to optain
http request to "first url", parse the the json, save some data(some needed for the output)
another http request to "second url" with one or more parameter from previous data, parse the the json, save some data(some needed for the output)
... and so on, for 5 to 10 times
If your goal is to make subsequent HTTP calls based on information you get from previous calls, I'd utilize async/await and for...of to accomplish this instead of relying on a recursive solution.
async function mergeData(urls) {
const data = [];
for (const url of urls) {
const result = await axios.get(url).then(res => res.data);
console.log(`[${result.id}] ${result.title}`);
// here, do whatever you want to do with
// `result` to make your next call...
// for now, I am just going to append each
// item to `data` and return it at the end
data.push(result);
}
return data;
}
const items = [
"https://jsonplaceholder.typicode.com/posts/1",
"https://jsonplaceholder.typicode.com/posts/2",
"https://jsonplaceholder.typicode.com/posts/3"
];
console.log("fetching...")
mergeData(items)
.then(function(result) {
console.log("done!")
console.log("final result", result);
})
.catch(function(error) {
console.error(error);
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/axios/0.19.2/axios.min.js"></script>
Using async/await allows you to utilize for...of which will wait for each call to resolve or reject before moving onto the next one.
To learn more about async/await and for...of, have a look here:
developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function
developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...of
Hope this helps.
Related
I need some advice on how to structure this function as at the moment it is not happening in the correct order due to node being asynchronous.
This is the flow I want to achieve; I don't need help with the code itself but with the order to achieve the end results and any suggestions on how to make it efficient
Node routes a GET request to my controller.
Controller reads a .csv file on local system and opens a read stream using fs module
Then use csv-parse module to convert that to an array line by line (many 100,000's of lines)
Start a try/catch block
With the current row from the csv, take a value and try to find it in a MongoDB
If found, take the ID and store the line from the CSV and this id as a foreign ID in a separate database
If not found, create an entry into the DB and take the new ID and then do 6.
Print out to terminal the row number being worked on (ideally at some point I would like to be able to send this value to the page and have it update like a progress bar as the rows are completed)
Here is a small part of the code structure that I am currently using;
const fs = require('fs');
const parse = require('csv-parse');
function addDataOne(req, id) {
const modelOneInstance = new InstanceOne({ ...code });
const resultOne = modelOneInstance.save();
return resultOne;
}
function addDataTwo(req, id) {
const modelTwoInstance = new InstanceTwo({ ...code });
const resultTwo = modelTwoInstance.save();
return resultTwo;
}
exports.add_data = (req, res) => {
const fileSys = 'public/data/';
const parsedData = [];
let i = 0;
fs.createReadStream(`${fileSys}${req.query.file}`)
.pipe(parse({}))
.on('data', (dataRow) => {
let RowObj = {
one: dataRow[0],
two: dataRow[1],
three: dataRow[2],
etc,
etc
};
try {
ModelOne.find(
{ propertyone: RowObj.one, propertytwo: RowObj.two },
'_id, foreign_id'
).exec((err, searchProp) => {
if (err) {
console.log(err);
} else {
if (searchProp.length > 1) {
console.log('too many returned from find function');
}
if (searchProp.length === 1) {
addDataOne(RowObj, searchProp[0]).then((result) => {
searchProp[0].foreign_id.push(result._id);
searchProp[0].save();
});
}
if (searchProp.length === 0) {
let resultAddProp = null;
addDataTwo(RowObj).then((result) => {
resultAddProp = result;
addDataOne(req, resultAddProp._id).then((result) => {
resultAddProp.foreign_id.push(result._id);
resultAddProp.save();
});
});
}
}
});
} catch (error) {
console.log(error);
}
i++;
let iString = i.toString();
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(iString);
})
.on('end', () => {
res.send('added');
});
};
I have tried to make the functions use async/await but it seems to conflict with the fs.openReadStream or csv parse functionality, probably due to my inexperience and lack of correct use of code...
I appreciate that this is a long question about the fundamentals of the code but just some tips/advice/pointers on how to get this going would be appreciated. I had it working when the data was sent one at a time via a post request from postman but can't implement the next stage which is to read from the csv file which contains many records
First of all you can make the following checks into one query:
if (searchProp.length === 1) {
if (searchProp.length === 0) {
Use upsert option in mongodb findOneAndUpdate query to update or upsert.
Secondly don't do this in main thread. Use a queue mechanism it will be much more efficient.
Queue which I personally use is Bull Queue.
https://github.com/OptimalBits/bull#basic-usage
This also provides the functionality you need of showing progress.
Also regarding using Async Await with ReadStream, a lot of example can be found on net such as : https://humanwhocodes.com/snippets/2019/05/nodejs-read-stream-promise/
I'm working on a proxy that caches files and I'm trying to add some logic that prevents multiple clients from downloading the same files before the proxy has a chance to cache them.
Basically, the logic I'm trying to implement is the following:
Client 1 requests a file. The proxy checks if the file is cached. If it's not, it requests it from the server, caches it, then sends it to the client.
Client 2 requests the same file after client 1 requested it, but before the proxy has a chance to cache it. So the proxy will tell client 2 to wait a few seconds because there is already a download in progress.
A better approach would probably be to give client 2 a "try again later" message, but let's just say that's currently not an option.
I'm using Nodejs with the anyproxy library. According to the documentation, delayed responses are possible by using promises.
However, I don't really see a way to achieve what I want using Promises. From what I can tell, I could do something like this:
module.exports = {
*beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
return new Promise((resolve, reject) => {
setTimeout(() => { // delay
resolve({ response: responseDetail.response });
}, 10000);
});
}
}
};
But that would mean simply waiting for a maximum amount of time and hoping the download finishes by then.
And I don't want that.
I would prefer to be able to do something like this (but with Promises, somehow):
module.exports = {
*beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
var i = 0;
for(i = 0 ; i < 10 ; i++) {
JustSleep(1000);
if(!thereIsADownloadInProgressFor(requestDetail.url))
return { response: responseDetail.response };
}
}
}
};
Is there any way I can achieve this with Promises in Nodejs?
Thanks!
You can use a Map to cache your file downloads.
The mapping in Map would be url -> Promise { file }
// Map { url => Promise { file } }
const cache = new Map()
const thereIsADownloadInProgressFor = url => cache.has(url)
const getCachedFilePromise = url => cache.get(url)
const downloadFile = async url => {/* download file code here */}
const setAndReturnCachedFilePromise = url => {
const filePromise = downloadFile(url)
cache.set(url, filePromise)
return filePromise
}
module.exports = {
beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
return getCachedFilePromise(requestDetail.url).then(file => ({ response: file }))
} else {
return setAndReturnCachedFilePromise(requestDetail.url).then(file => ({ response: file }))
}
}
};
You don't need to send a try again response, simply serve the same data to both requests. All you need to do is store the requests somewhere in the caching system and trigger all of them when the fetching is done.
Here's a cache implementation that does only a single fetch for multiple requests. No delays and no try-laters:
export class class Cache {
constructor() {
this.resultCache = {}; // this object is the cache storage
}
async get(key, cachedFunction) {
let cached = this.resultCache[key];
if (cached === undefined) { // No cache so fetch data
this.resultCache[key] = {
pending: [] // This is the magic, store further
// requests in this pending array.
// This way pending requests are directly
// linked to this cache data
}
try {
let result = await cachedFunction(); // Wait for result
// Once we get result we need to resolve all pending
// promises. Loop through the pending array and
// resolve them. See code below for how we store pending
// requests.. it will make sense:
this.resultCache[key].pending
.forEach(waiter => waiter.resolve(result));
// Store the result of the cache so later we don't
// have to fetch it again:
this.resultCache[key] = {
data: result
}
// Return result to original promise:
return result;
// Note: yes, this means pending promises will get triggered
// before the original promise is resolved but normally
// this does not matter. You will need to modify the
// logic if you want promises to resolve in original order
}
catch (err) { // Error when fetching result
// We still need to trigger all pending promises to tell
// them about the error. Only we reject them instead of
// resolving them:
if (this.resultCache[key]) {
this.resultCache[key].pending
.forEach((waiter: any) => waiter.reject(err));
}
throw err;
}
}
else if (cached.data === undefined && cached.pending !== undefined) {
// Here's the condition where there was a previous request for
// the same data. Instead of fetching the data again we store
// this request in the existing pending array.
let wait = new Promise((resolve, reject) => {
// This is the "waiter" object above. It is basically
// It is basically the resolve and reject functions
// of this promise:
cached.pending.push({
resolve: resolve,
reject: reject
});
});
return await wait; // await response form original request.
// The code above will cause this to return.
}
else {
// Return cached data as normal
return cached.data;
}
}
}
The code may look a bit complicated but it is actually quite simple. First we need a way to store the cached data. Normally I'd just use a regular object for this:
{ key : result }
Where the cached data is stored in the result. But we also need to store additional metadata such as pending requests for the same result. So we need to modify our cache storage:
{ key : {
data: result,
pending: [ array of requests ]
}
}
All this is invisible and transparent to code using this Cache class.
Usage:
const cache = new Cache();
// Illustrated with w3c fetch API but you may use anything:
cache.get( URL , () => fetch(URL) )
Note that wrapping the fetch in an anonymous function is important because we want the Cache.get() function to conditionally call the fetch to avoid multiple fetch being called. It also gives the Cache class flexibility to handle any kind of asynchronous operation.
Here's another example for caching a setTimeout. It's not very useful but it illustrates the flexibility of the API:
cache.get( 'example' , () => {
return new Promise((resolve, reject) => {
setTimeout(resolve, 1000);
});
});
Note that the Cache class above does not have any invalidations or expiry logic for the sake of clarity but it's fairly easy to add them. For example if you want the cache to expire after some time you can just store the timestamp along with the other cache data:
{ key : {
data: result,
timestamp: timestamp,
pending: [ array of requests ]
}
}
Then in the "no-cache" logic simply detect the expiry time:
if (cached === undefined || (cached.timestamp + timeout) < now) ...
I am trying to:
Poll a public API every 5 seconds
Store the resulting JSON in a variable
Store the next query to this same API in a second variable
Compare the first variable to the second
Print the second variable if it is different from the first
Else: Print the phrase: 'The objects are the same' if they haven't changed
Unfortunately, the comparison part appears to fail. I am realizing that this implementation is probably lacking the appropriate variable scoping but I can't put my finger on it. Any advice would be highly appreciated.
data: {
chatters: {
viewers: {
},
},
},
};
//prints out pretty JSON
function prettyJSON(obj) {
console.log(JSON.stringify(obj, null, 2));
}
// Gets Users from Twitch API endpoint via axios request
const getUsers = async () => {
try {
return await axios.get("http://tmi.twitch.tv/group/user/sixteenbitninja/chatters");
} catch (error) {
console.error(error);
}
};
//Intended to display
const displayViewers = async (previousResponse) => {
const usersInChannel = await getUsers();
if (usersInChannel.data.chatters.viewers === previousResponse){
console.log("The objects are the same");
} else {
if (usersInChannel.data.chatters) {
prettyJSON(usersInChannel.data.chatters.viewers);
const previousResponse = usersInChannel.data.chatters.viewers;
console.log(previousResponse);
intervalFunction(previousResponse);
}
}
};
// polls display function every 5 seconds
const interval = setInterval(function () {
// Calls Display Function
displayViewers()
}, 5000);```
The issue is that you are using equality operator === on objects. two objects are equal if they have the same reference. While you want to know if they are identical. Check this:
console.log({} === {})
For your usecase you might want to store stringified version of the previousResponse and compare it with stringified version of the new object (usersInChannel.data.chatters.viewers) like:
console.log(JSON.stringify({}) === JSON.stringify({}))
Note: There can be issues with this approach too, if the order of property changes in the response. In which case, you'd have to check individual properties within the response objects.
May be you can use npm packages like following
https://www.npmjs.com/package/#radarlabs/api-diff
Im trying to extract the contents of variable topPost and place it into const options under url. I cant seem to get it to work. Im using the snoowrap/Reddit API and image-downloader.
var subReddit = r.getSubreddit('dankmemes');
var topPost = subReddit.getTop({time: 'hour' , limit: 1,}).map(post => post.url).then(console.log);
var postTitle = subReddit.getTop({time: 'hour' , limit: 1 }).map(post => post.title).then(console.log);
const options = {
url: topPost,
dest: './dank_memes/photo.jpg'
}
async function downloadIMG() {
try {
const { filename, image } = await download.image(options)
console.log(filename) // => /path/to/dest/image.jpg
} catch (e) {
console.error(e)
}
}
the recommended formatting for the image downloader is as follows:
const options = {
url: 'http://someurl.com/image.jpg',
dest: '/path/to/dest'
}
async function downloadIMG() {
try {
const { filename, image } = await download.image(options)
console.log(filename) // => /path/to/dest/image.jpg
} catch (e) {
console.error(e)
}
}
downloadIMG()
so it looks like i have to have my url formatted in between ' ' but i have no idea how to get the url from var topPost and place it in between those quotes.
any ideas would be greatly appreciated.
Thanks!
topPost is a Promise, not the final value.
Promises existence is to work with asynchronous data easily. Asynchronous data is data that returns at a point in the future, not instantly, and that's why they have a then method. When a Promise resolves to a value, the then callback is called.
In this case, the library will connect to Reddit and download data from it, which is not something that can done instantly, so the code will continue running and later will call the then callback, when the data has finished downloading. So:
var subReddit = r.getSubreddit('dankmemes');
// First we get the top posts, and register a "then" callback to receive all these posts
subReddit.getTop({time: 'hour' , limit: 1,}).map(post => post.url).then((topPost) => {
// When we got the top posts, we connect again to Reddit to get the top posts title.
subReddit.getTop({time: 'hour' , limit: 1 }).map(post => post.title).then((postTitle) => {
// Here you have both topPost and postTitle (which will be both arrays. You must access the first element)
console.log("This console.log will be called last");
});
});
// The script will continue running at this point, but the script is still connecting to Reddit and downloading the data
console.log("This console.log will be called first");
With this code you have a problem. You first connect to Reddit to get the top post URL, and then you connect to Reddit again to get the post Title. Is like pressing F5 in between. Simply think that if a new post is added between those queries, you will get the wrong title (and also you are consuming double bandwidth consumption, which is not optimal too). The correct way of doing this is to get both the title and the url on the same query. How to do so?, like this:
var subReddit = r.getSubreddit('dankmemes');
// We get the top posts, and map BOTH the url and title
subReddit.getTop({time: 'hour' , limit: 1,}).map(post => {
return {
url: post.url,
title: post.title
};
}).then((topPostUrlAndTitle) => {
// Here you have topPostUrlAndTitle[0].url and topPostUrlAndTitle[0].title
// Note how topPostUrlAndTitle is an array, as you are actually asking for "all top posts" although you are limiting to only one.
});
BUT this is also weird to do. Why don't you just get the post data directly? Like so:
var subReddit = r.getSubreddit('dankmemes');
// We get the top posts
subReddit.getTop({time: 'hour' , limit: 1,}).then((posts) => {
// Here you have posts[0].url and posts[0].title
});
There's a way to get rid of JavaScript callback hell with async/await, but I'm not going to enter into matter because for a newbie is a bit difficult to explain why is not synchronous code although it seems to look like so.
I am a total scrub with the node http module and having some trouble.
The ultimate goal here is to take a huge list of urls, figure out which are valid and then scrape those pages for certain data. So step one is figuring out if a URL is valid and this simple exercise is baffling me.
say we have an array allURLs:
["www.yahoo.com", "www.stackoverflow.com", "www.sdfhksdjfksjdhg.net"]
The goal is to iterate this array, make a get request to each and if a response comes in, add the link to a list of workingURLs (for now just another array), else it goes to a list brokenURLs.
var workingURLs = [];
var brokenURLs = [];
for (var i = 0; i < allURLs.length; i++) {
var url = allURLs[i];
var req = http.get(url, function (res) {
if (res) {
workingURLs.push(?????); // How to derive URL from response?
}
});
req.on('error', function (e) {
brokenURLs.push(e.host);
});
}
what I don't know is how to properly obtain the url from the request/ response object itself, or really how to structure this kind of async code - because again, I am a nodejs scrub :(
For most websites using res.headers.location works, but there are times when the headers do not have this property and that will cause problems for me later on. Also I've tried console logging the response object itself and that was a messy and fruitless endeavor
I have tried pushing the url variable to workingURLs, but by the time any response comes back that would trigger the push, the for loop is already over and url is forever pointing to the final element of the allURLs array.
Thanks to anyone who can help
You need to closure url value to have access to it and protect it from changes on next loop iteration.
For example:
(function(url){
// use url here
})(allUrls[i]);
Most simple solution for this is use forEach instead of for.
allURLs.forEach(function(url){
//....
});
Promisified solution allows you to get a moment when work is done:
var http = require('http');
var allURLs = [
"http://www.yahoo.com/",
"http://www.stackoverflow.com/",
"http://www.sdfhksdjfksjdhg.net/"
];
var workingURLs = [];
var brokenURLs = [];
var promises = allURLs.map(url => validateUrl(url)
.then(res => (res?workingURLs:brokenURLs).push(url)));
Promise.all(promises).then(() => {
console.log(workingURLs, brokenURLs);
});
// ----
function validateUrl(url) {
return new Promise((ok, fail) => {
http.get(url, res => return ok(res.statusCode == 200))
.on('error', e => ok(false));
});
}
// Prevent nodejs from exit, don't need if any server listen.
var t = setTimeout(() => { console.log('Time is over'); }, 1000).ref();
You can use something like this (Not tested):
const arr = ["", "/a", "", ""];
Promise.all(arr.map(fetch)
.then(responses=>responses.filter(res=> res.ok).map(res=>res.url))
.then(workingUrls=>{
console.log(workingUrls);
console.log(arr.filter(url=> workingUrls.indexOf(url) == -1 ))
});
EDITED
Working fiddle (Note that you can't do request to another site in the browser because of Cross domain).
UPDATED with #vp_arth suggestions
const arr = ["/", "/a", "/", "/"];
let working=[], notWorking=[],
find = url=> fetch(url)
.then(res=> res.ok ?
working.push(res.url) && res : notWorking.push(res.url) && res);
Promise.all(arr.map(find))
.then(responses=>{
console.log('woking', working, 'notWorking', notWorking);
/* Do whatever with the responses if needed */
});
Fiddle