Promise is undefined in procedural code in nodejs - node.js

I'm very new to async languages like nodejs, I'm trying to write a web scraper that will visit a link, download a code, extract with regex, THEN visit another link using that code. I'm aware I could use callbacks, but I expect to have to go 8-9 levels deep, I think promises is the way to go (is there a better way?)
var promise = require("promise");
var request = require("request");
login();
function get_login_code()
{
request.get("someurl.com", function (error, response, body)
{
// just for example
body = 'TOKEN" value="hello world"';
var login_code = body.match(/.TOKEN" value="([^"]+)../i);
return login_code
});
}
function login()
{
var login_promise = promise.resolve(get_login_code());
console.log(login_promise);
}
I've tried a bunch of combinations of messing around with promises, but I either always get undefined or a promise which doesn't have a value. I don't want to nest promise functions inside promises because that's exactly the same thing as callback hell. Can someone tell me what I'm doing wrong, I really want this code to be procedural and not 8 callbacks. In the ideal world promise.resolve just waits until get_login_code() returns the actual code, not undefined.
Output:
Promise { _45: 0, _81: 1, _65: undefined, _54: null }
Desired Output:
hello world

What your code do:
calls get_login_code that returns nothing (i.e. undefined)
inside of login function you create a new promise that is immediately resolved to the result of get_login_code, i.e. undefined.
Thus, you do not use login_code at all.
To make it work, you should make get_login_code to return a promise that will be resolved to login_code. Consider you use promise npm module, the code may look like:
// uppercased, it's a constructor
var Promise = require("promise");
var request = require("request");
login();
function get_login_code()
{
return new Promise(function (resolve, reject) {
request.get("someurl.com", function (error, response, body) {
if (err) {
reject(err);
return;
}
// just for example
body = 'TOKEN" value="hello world"';
var login_code = body.match(/.TOKEN" value="([^"]+)../i);
resolve(login_code);
});
});
}
function login()
{
// return a new promise to use in subsequent operations
return get_login_code()
.then(function(login_code) {
console.log(login_code);
});
}

You should create new promise in the function to handle reject and resolve not by handling resolve to the function itself. Use then to get the response value from promise. I guess this should work.
var promise = require("promise");
var request = require("request");
function get_login_code()
{
var promise = new Promise(function(resolve, reject) {
request.get("someurl.com", function (error, response, body)
{
if (error) {
reject(error);
} else {
// just for example
body = 'TOKEN" value="hello world"';
var login_code = body.match(/.TOKEN" value="([^"]+)../i);
resolve(login_code);
}
});
});
}
get_login_code()
.then(function (code) {
console.log(code);
});

Related

NodeJS - Nested async functions

in an express router .post which is async, I have this line:
var recaptcha = await tokenValidate(req);
tokenValidate is below:
async function tokenValidate(req) {
// some code to generate a URL with a private key, public key, etc.
return await tokenrequest(url);
}
Then tokenrequest is below: (note request is the npm request library)
async function tokenrequest(url) {
request(url, function(err, response, body){
//the body is the data that contains success message
body = JSON.parse(body);
//check if the validation failed
if(body.success !== undefined && !body.success){
return false;
}
//if passed response success message to client
return true;
})
}
Issue is the nested async functions. The initial recaptcha variable returns undefined since 'request' doesn't resolve, even though I'm using await. How can I get recaptcha to wait for tokenValidate which in turn has to wait for tokenrequest?
Thank you.
A couple issues:
Your tokenrequest function is not returning anything (the return value in the request callback function won't be returned)
await tokenrequest(url); won't work as you expect because the request library is using a callback (not async/await)
This can be fixed by returning a Promise:
async function tokenrequest(url) {
return new Promise(function (resolve, reject) {
request(url, function (err, response, body) {
//the body is the data that contains success message
body = JSON.parse(body);
//check if the validation failed
if (body.success !== undefined && !body.success) {
resolve(false);
} else {
//if passed response success message to client
resolve(true);
}
});
});
}
Your tokenRequest() function is returning a promise (because it's async), but that promise resolves immediately with no value that is attached to your call to request(). And, the return values you do have are just returning to the request() callback where they are ignored, not from your actual top level function.
What I would suggest is that you ditch the request() module because it's deprecated and does not support promises and pick a new more modern module that does support promises. I use the got() module which would make this whole thing a LOT easier and a lot fewer lines of code.
const got = require('got');
async function tokenrequest(url) {
let result = await got(url).json();
return !!result.success;
}

Promise is returning <pending>

I've written a little code snipped for a http request. After I realized request is async, I rewrote my code with a promise. But it's telling me that the promise is pending. I have absolute no idea why it is wrong. Here my code:
function verifyUser(uname,pword){
var options = {
url: 'CENSORED',
method: 'POST',
headers: headers,
form: {'Username':uname, 'Password':pword, 'Key':key},
json:true
}
return new Promise((r,j) => request(options,(error,response,body)=>{
if(error){
console.log("[ERROR] Promise returned error");
throw j(error);
}
r(body);
}))
}
async function receiveWBBData(uspass,passwd){
const data = await verifyUser(uspass,passwd);
return data;
}
var test1 = receiveWBBData("r0b","CENSORED");
console.log(test1);`
Thanks in advance!
receiveWBBData is async. Therefore, test1 is a promise. If you want to log the result, do test1.then(console.log).catch(console.error), or use var test1 = await receiveWBBData(/*...*/) if you want the result in your variable. Note that await can only be used in async functions.
Also, as #somethinghere mentionned, you should not throw your promise rejection, you should return it.
An async function always returns a promise. In order to "unwrap" a promise, you need to await on it, so you need var test1 = await receiveWBBData("r0b","CENSORED");.
Top-level await is not part of the language yet, so I'd recommend you add a function called main() or run() and just call that when your script starts.
async function receiveWBBData(uspass,passwd){
const data = await verifyUser(uspass,passwd);
return data;
}
async function main() {
var test1 = receiveWBBData("r0b","CENSORED");
console.log(test1);`
}
main().catch(error => console.error(error.stack));

How come async/await doesn't work in my code?

How come this async/await doesn't work?
I've spent all day trying different combinations, watching videos and reading about async/await to find why this doesn't work before posting this here.
I'm trying to make a second nodejs app that will run on a different port, and my main app will call this so it scrap some data and save it to the db for cache.
What it's suppose to do:
Take a keyword and send it to a method called scrapSearch, this method create a complete URI link and send it to the method that actually get the webpage and returns it up to the first caller.
What is happening:
The console.log below the initial call is triggered before the results are returned.
Console output
Requesting : https://www.google.ca/?q=mykeyword
TypeError: Cannot read property 'substr' of undefined
at /DarkHawk/srv/NodesProjects/_scraper/node_scrapper.js:34:18
at <anonymous>
app.js:
'use strict';
var koa = require('koa');
var fs = require('fs');
var app = new koa();
var Router = require('koa-router');
var router = new Router();
app
.use(router.routes())
.use(router.allowedMethods());
app.listen(3002, 'localhost');
router.get('/scraptest', async function(ctx, next) {
var sfn = require('./scrap-functions.js');
var scrapFunctions = new sfn();
var html = await scrapFunctions.scrapSearch("mykeyword");
console.log(html.substr(0, 20));
//Normally here I'll be calling my other method to extract content
let json_extracted = scrapFunctions.exGg('mykeywords', html);
//Save to db
});
scrap-functions.js:
'use strict';
var request = require('request');
var cheerio = require('cheerio');
function Scraper() {
this.html = ''; //I tried saving html in here but the main script seems to have issues
retrieving that
this.kw = {};
this.tr = {};
}
// Search G0000000gle
Scraper.prototype.scrapSearch = async function(keyword) {
let url = "https://www.google.ca/?q="+keyword";
let html = await this.urlRequest(url);
return html;
};
// Get a url'S content
Scraper.prototype.urlRequest = async function(url) {
console.log("Requesting : "+url);
await request(url, await function(error, response, html) {
if(error) console.error(error);
return response;
});
};
module.exports = Scraper;
I tried a lot of things but I finally gave up - I tried putting await/async before each methods - didn't work either.
Why that isn't working?
Edit: wrong function name based on the fact that I created 2 different projects for testing and I mixed the file while copy/pasting.
You are not returning anything from urlRequest. Because it is an async function, it will still create a promise, but it will resolve with undefined. Therefore your html is undefined as seen in the error.
The problematic part is the request function which is a callback style function, but you're treating it as a promise. Using await on any value that is not a promise, won't do anything (technically it creates a promise that resolves directly with the value, but the resulting value remains the same). Both awaits within the urlRequest are unnecessary.
request(url, function(error, response, html) {
if(error) console.error(error);
// This return is for the callback function, not the outer function
return response;
});
You cannot return a value from within the callback. As it's asynchronous, your function will already have finished by the time the callback is called. With the callback style you would do the work inside the callback.
But you can turn it into a promise. You have to create a new promise and return it from urlRequest. Inside the promise you do the asynchronous work (request) and either resolve with the value (the response) or reject with the error.
Scraper.prototype.urlRequest = function(url) {
console.log("Requesting : "+url);
return new Promise((resolve, reject) => {
request(url, (err, response) => {
if (err) {
return reject(err);
}
resolve(response);
});
});
};
When an error occurred you want to return from the callback, so the rest (successful part) is not executed. I also removed the async keyword, because it's manually creating a promise.
If you're using Node 8, you can promisify the request function with the built-in util.promisify.
const util = require('util');
const request = require('request');
const requestPromise = util.promisify(request);
Scraper.prototype.urlRequest = function(url) {
console.log("Requesting : " + url);
return requestPromise(url);
};
Both versions will resolve with the response and to get the HTML you need to use response.body.
Scraper.prototype.scrapSearch = async function(keyword) {
let url = "https://www.google.ca/?q=" + keyword;
let response = await this.urlRequest(url);
return response.body;
};
You still need to handle errors from the promise, either with .catch() on the promise, or using try/catch when you await it.
It is absolutely essential to understand promises when using async/await, because it's syntactic sugar on top of promises, to make it look more like synchronous code.
See also:
Understand promises before you start using async/await
Async functions - making promises friendly
Exploring ES6 - Promises for asynchronous programming

Weird behaviour of request-json with bluebird promise

I'm trying to wrap my head around promises, but so far I can't seem to get simple example working. Here it a code to request JSON from the server:
module.exports = function (app, options) {
var promise = require('bluebird');
var request = require('request-json');
var module = {
url: options.url,
httpClient: promise.promisifyAll(request.createClient(options.url))
};
module.getSample = function() {
return this.httpClient.getAsync('sample/')
.then(function(error, response, body) {
console.log(body);
})
.catch(function(e) {
console.log('error');
console.log(e);
});
};
return module;
};
but when I call it like this:
var backendClient = require('./utils/backendClient.js')(app, {
url: 'http://localhost:8080/'
});
backendClient.getSample()
at runtime I get an error saying '[SyntaxError: Unexpected token o]'. Version without promises works fine. What did I miss?
module.getSample = function() {
return this.httpClient.getAsync('sample/')
.then(function(error, response, body) {
// not sure what Promise library you are using, but in the Promise/A+ spec, the function in then only receives a single argument, the resolved value of the Promise
console.log(body);
// this returns equivalent to Promise.resolve(undefined);
// you really want to return something meaningful here
})
.catch(function(e) {
console.log('error');
console.log(e);
// this also returns equivalent to Promise.resolve(undefined);
// to propagate the "error" condition, you want to either throw e, or return Promise.reject(something here);
});
};
This will always return a fullfilled promise with undefined as the value, never a rejected one. Other errors commented above

Use promises for multiple node requests

With the request library, is there a way to use promises to simplify this callback?
var context = {};
request.get({
url: someURL,
}, function(err, response, body) {
context.one = JSON.parse(body);
request.get({
url: anotherURL,
}, function(err, response, body) {
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
});
});
Here's a solution using the Bluebird promises library. This serializes the two requests and accumulates the results in the context object and rolls up error handling all to one place:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var context = {};
request.getAsync(someURL).spread(function(response, body) {
context.one = JSON.parse(body);
return request.getAsync(anotherURL);
}).spread(response, body)
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
}).catch(function(err) {
// error here
});
And, if you have multiple URLs, you can use some of Bluebirds other features like Promise.map() to iterate an array of URLs:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var urlList = ["url1", "url2", "url3"];
Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response,body) {
return [JSON.parse(body),url];
});
}).then(function(results) {
// results is an array of all the parsed bodies in order
}).catch(function(err) {
// handle error here
});
Or, you could create a helper function to do this for you:
// pass an array of URLs
function getBodies(array) {
return Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response.body) {
return JSON.parse(body);
});
});
});
// sample usage of helper function
getBodies(["url1", "url2", "url3"]).then(function(results) {
// process results array here
}).catch(function(err) {
// process error here
});
Here is how I would implement chained Promises.
var request = require("request");
var someURL = 'http://ip.jsontest.com/';
var anotherURL = 'http://ip.jsontest.com/';
function combinePromises(context){
return Promise.all(
[someURL, anotherURL].map((url, i)=> {
return new Promise(function(resolve, reject){
try{
request.get({
url: url,
}, function(err, response, body) {
if(err){
reject(err);
}else{
context[i+1] = JSON.parse(body);
resolve(1); //you can send back anything you want here
}
});
}catch(error){
reject(error);
}
});
})
);
}
var context = {"1": "", "2": ""};
combinePromises(context)
.then(function(response){
console.log(context);
//render page
res.render('pages/myPage');
}, function(error){
//do something with error here
});
Doing this with native Promises. It's good to understand the guts.
This here is known as the "Promise Constructor Antipattern" as pointed out by #Bergi in the comments. Don't do this. Check out the better method below.
var contextA = new Promise(function(resolve, reject) {
request('http://someurl.com', function(err, response, body) {
if(err) reject(err);
else {
resolve(body.toJSON());
}
});
});
var contextB = new Promise(function(resolve, reject) {
request('http://contextB.com', function(err, response, contextB) {
if(err) reject(err);
else {
contextA.then(function(contextA) {
res.render('page', contextA, contextB);
});
}
});
});
The nifty trick here, and I think by using raw promises you come to appreciate this, is that contextA resolves once and then we have access to it's resolved result. This is, we never make the above request to someurl.com, but still have access to contextA's JSON.
So I can conceivable create a contextC and reuse the JSON without having to make another request. Promises always only resolve once. You would have to take that anonymous executor function out and put it in a new Promise to refresh that data.
Bonus note:
This executes contextA and contextB in parallel, but will do the final computation that needs both contexts when both A & B are resolved.
Here's my new stab at this.
The main problem with the above solution is none of the promises are reusable and they are not chained which is a key feature of Promises.
However, I still recommend promisifying your request library yourself and abstaining from adding another dependency to your project. Another benefit of promisifying yourself is you can write your own rejection logic. This is important if you're working with a particular API that sends error messages in the body. Let's take a look:
//Function that returns a new Promise. Beats out constructor anti-pattern.
const asyncReq = function(options) {
return new Promise(function (resolve, reject) {
request(options, function(err, response, body) {
//Rejected promises can be dealt with in a `catch` block.
if(err) {
return reject(err);
}
//custom error handling logic for your application.
else if (hasError(body)) {
return reject(toError(body));
}
// typically I just `resolve` `res` since it contains `body`.
return resolve(res);
}
});
};
asyncReq(urlA)
.then(function(resA) {
//Promise.all is the preferred method for managing nested context.
return Promise.all([resA, asyncReq(urlB)]);
})
.then(function(resAB) {
return render('page', resAB[0], resAB[1]);
})
.catch(function(e) {
console.err(e);
});
You can use the request-promise library to do this. In your case, you could have something like this, where you chain your requests.
request
.get({ url: someURL })
.then(body => {
context.one = JSON.parse(body);
// Resolves the promise
return request.get({ url: anotherURL });
})
.then(body => {
context.two = JSON.parse(body);
res.render('pages/myPage');
})
.catch(e => {
//Catch errors
console.log('Error:', e);
});
By far the easiest is to use request-promise library. You can also use use a promise library like bluebird and use its promisify functions to convert the request callback API to a promise API, though you may need to write your own promisify function as request does not use the standard callback semantics. Lastly, you can just make your own promise wrapper, using either native promises or bluebird.
If you're starting fresh, just use request-promise. If you're refactoring existing code, I would just write a simple wrapper for request using bluebird's spread function.

Resources