How to call a synchronous sub function from a Cloud Function - node.js

I'd like to run all of my functions synchronously. But if I execute something like the code below the response is sent back before the return value is assigned.
index.js
exports.cfTest = (req, res) => {
try {
result = columnCount(
'project','dataset','table'
);
console.log('sending response');
res.send('<OUTPUT>' + result + '</OUTPUT>');
} catch (err) {
res.status(500).send('<ERROR>' + err + '</ERROR>');
}
};
function columnCount(projectId, bqDataset, bqTable) {
const BigQuery = require('#google-cloud/bigquery');
const bigquery = new BigQuery({projectId: projectId});
var result;
bigquery
.dataset(bqDataset)
.table(bqTable)
.get()
.then(results => {
result = results[1].schema.fields.length;
})
.catch(err => {
result = null;
});
console.log('returning ' + result);
return result;
}
I.e the console output would be something like:
<OUTPUT>undefined</OUTPUT>
and in the Stackdriver logs would show an execution order like this:
2018-07-25 10:00:00 - Function execution started
2018-07-25 10:00:01 - sending response
2018-07-25 10:00:02 - Function execution took 2000 ms, finished with status code: 200
2018-07-25 10:00:03 - returning 5
This is any elegant way to lock down execution into synchronous only behaviour?
Solution
The async/await approach suggested below by Alexandru works! Thanks Alexandru.
NB: But not with standard/GA Cloud Functions. You'll have to use the Google Cloud Functions Node.js v8 Beta.

Yep, there is you can look at async/await
Your code will look something like this one in the end:
async function columnCount(projectId, bqDataset, bqTable) {
const BigQuery = require('#google-cloud/bigquery');
const bigquery = new BigQuery({ projectId: projectId });
try {
let results = await bigquery
.dataset(bqDataset)
.table(bqTable)
.get();
return results[1].schema.fields.length;
} catch (err) {
return null;
}
}
And in the second one
exports.cfTest = async (req, res) => {
try {
result = await columnCount(
'project','dataset','table'
);
console.log('sending response');
res.send('<OUTPUT>' + result + '</OUTPUT>');
} catch (err) {
res.status(500).send('<ERROR>' + err + '</ERROR>');
}
};

Related

Node JS - createWriteStream

I am going crazy trying to fix this bug so please help :-)
I am using https://pdfkit.org/
This creates a stream that when finished is piped to fs.createWriteStream
My issue is the first time the code runs this works and the PDF is generated.
The next time the Code runs a file with Zero Bytes is created.
I am calling the function from an API running on express.
The issue appears to be the async nature of fs.createWriteStream.
The stream finishes after the API has returned. I cannnot seem to find a way to block while confirming the file has been created.
What is odd is that the first time the code works run again it fails:
Here is the Pipe Function;
async function _writeFile(fileObj) {
let fileStream = fs.createWriteStream(fileObj.fileName)
pipeline(
doc,
fileStream,
async (err) => {
if (err) {
console.error('PDF failed', err);
return ('Pipeline failed', err)
} else {
console.log('PDF succeeded');
}
}
)
}
This is called from:
exports.drawReport = async (payload) => {
var date = new Date();
const timeStamp = date.toJSON();
let path = './controllers/tmp/'
var fileName = path + timeStamp + '.' + payload.type + '.pdf'
try {
// Start Report
await _startReport(payload)
// Check Starting position on page & add status box header
if (device_card_reference == 260) {
await _deviceTitle(payload);
}
// Add Devices
await _reportDevice(payload);
// Call Footer for final page
await _reportFooter()
console.log("PDF Done - Writing File")
// File Meta Data
let fileObj = {
type: payload.type,
siteId: payload.siteId,
fileName: fileName,
timeStamp: timeStamp
}
// Create file to store PDF
await _writeFile(fileObj)
doc.end()
console.log("PDF MADE?")
return (fileObj)
} catch (err) {
console.error('MakePDF ERROR: ' + err.message);
return (err.message)
}
}
pipeline runs asynchronously, so it's not awaited, which is why doc.end() runs before the file is done
try wrapping pipeline in a promise, and then resolve when the stream is done:
// function that returns a promise
function _writeFile(fileObj) {
return new Promise((resolve, reject) => {
const fileStream = fs.createWriteStream(fileObj.fileName);
pipeline(
doc,
fileStream,
async(err) => {
if (err) {
console.error('PDF failed', err);
// err, handle in `.catch`
reject({res:'Pipeline failed', err});
} else {
console.log('PDF succeeded');
// done, resolve, to move to doc.end
resolve('PDF succeeded');
}
}
)
});
}
add .catch() to handle error:
// Create file to store PDF
await _writeFile(fileObj).catch(err => console.log(err));
or even better, use stream promises API
const {pipeline } = require('stream/promises');
async function _writeFile(fileObj) {
const fileStream = fs.createWriteStream(fileObj.fileName);
await pipeline(doc, fileStream);
console.log('PDF succeeded');
}

async processing with nodejs and rest api calls & express - sequence error

I am using a rest api service to access a node running program and I found the async and await are not retaining sequence. This code does work with a second api call in the func. I think the await and async are getting out of sequence with the second call and that causes the issue.
I tested using a func without a second api call and the sequence is fine, so the problem is related to the CCAPI promise which is generated. I think the "then" is not correct. Also the code in the func accessing the charity API works in isolation so the actual does work and runs also in the whole code but out of sequence.
Any help appreciated.
async function checkCharityAPI(searchTerm) {
const args = { APIKey: myAPIKey, strSearch: searchTerm };
ccAPI.GetCharitiesByName(args).then(function(result) {
var charityResult = JSON.stringify(result);
var charityResultJson = JSON.parse(charityResult);
console.log(charityResultJson["GetCharitiesByNameResult"]);
return charityResultJson;
}).catch(function(err) {
console.log(`Call to ${err.operationName} failed with error: ${err.err}`);
return null;
});
}
app.post("/api/checkcharitytest", asyncHandler(async (req, res, next) => {
var charitySearch = req.body.charitysearch;
console.log("start api call")
var charityResult = await checkCharityAPI(charitySearch);
console.log("end api call")
res.json({ "charityResult": charityResult})
}));
Addition -
I also tried this -
var charitySearch = req.body.charitysearch;
console.log("start api call")
// var charityResult = await checkCharityAPI(charitySearch);
checkCharityAPI(charitySearch).then(function(result) {
console.log("result")
console.log(result);
res.json({ "charityResult": result})
}).catch(function(err) {
console.log(`Call to ${err.operationName} failed with error: ${err.err}`);
});
console.log("end api call")
Also I added this - then the seq is corrected by the return value is null despite it being populated in the func -
async function checkCharityAPI(searchTerm) {
const args = { APIKey: myAPIKey, strSearch: searchTerm };
await ccAPI.GetCharitiesByName(args).then(function(result) {
var charityResult = JSON.stringify(result);
var charityResultJson = JSON.parse(charityResult);
console.log(charityResultJson["GetCharitiesByNameResult"]);
return charityResultJson["GetCharitiesByNameResult"];
}).catch(function(err) {
console.log(`Call to ${err.operationName} failed with error: ${err.err}`);
return null;
});
}
ccAPI.GetCharitiesByName(args) appears to return a promise, but checkCharityAPI doesn't await it.
You access it with then/catch instead.
Consequently, the promise returned due to the async keyword is entirely independent of the one returned by ccAPI.GetCharitiesByName(args).

How to await a streaming sql query in node.js

I need to call out to a function that runs a sql query, with row level functionality, and await the entire process before continuing.
Function code:
const sql = require('mssql')
exports.doit = ()=>{
const pool1 = new sql.ConnectionPool(dbConfig);
const pool1Connect = pool1.connect();
pool1.on('error', err => {
console.error('error occurred on pool')
})
await pool1Connect
try {
const request = pool1.request();
request.stream = true;
request.query('select * from dbo.user');
request.on('row', async orow => {
console.log('outer row');
const innerPool = new sql.ConnectionPool(dbConfig);
const innerConnection = innerPool.connect();
innerPool.on('error', err => {
console.error('error occurred on pool')
});
const iConnection = await innerConnection;
connections.push(iConnection);
const innerRequest = innerPool.request();
innerRequest.stream = true;
var iquery = 'select * from dbo.order where userId='+ orow.userId
innerRequest.query(iquery);
innerRequest.on('row', async irow => {
console.log(`User: ${orow.userId} Order: ${irow.orderId}`);
});
innerRequest.on('done', async () => {
console.log('inner done');
iConnection.close();
});
});
request.on('done', async () => {
console.log('outer done');
})
} catch (err) {
console.error('SQL error', err);
}
sql.on('error', err => {
// ... error handler
})
}
Then call the above function like this:
var doit = require('./testmeHandler.js').doit;
doit()
.then(()=>{
console.log("I AM DONE");
});
OR
await doit();
console.log('I AM DONE');
You get the idea...
But what is really happening is, the function gets called, then 'I AM DONE' and then the results of all the sql calls.
Can someone help me get 'I AM DONE' at the bottom? Still getting used to the async/await and promises.
Thanks
After quite a bit of time trying to get this to work synchronously from the caller I gave up and re-wrote the method to use the regular query (not streaming) and implemented my own paging/throttling as to control memory usage. It works great now!
I am using a connection pool to allow for sub queries and other processes to occur async within a batch of results.
I will post the updated code.
Somehow I believe you have jumbled it all up a bit.
Use this
exports.doit = async ()=>
{
const request = new sql.Request(conn)
let records = await request.query('select * from dbo.user')
records.forEach(async r=>{
try{
// do something
const inner = new sql.Request(conn)
let recordInner = await request.query(innerQuery)
recordInner.forEach(async r=>{//do some stuff})
inner.close()
}
catch(err){
//do something with the error
}
records.close()
})
}
The execution:
async execute(){
const result = await doit()
return result
}
execute()
Though I have no idea why you are using two connections at all . Just try writing a more defined query using JOIN OR WHERE subquery. You can achieve all this in a single query instead of a using nested connection. SQL though a bit old, it really is quite powerful.
select * from dbo.order WHERE userId IN (SELECT userId FROM dbo.user)
Makes more sense to me. But, whatever floats your boat.
More on sub-queries: https://www.dofactory.com/sql/subquery

Axios.all, how to configure axios wait time to mitigate hung up?

My application uses an internal webservice for fetching data, i have a job which creates approx 500 requests which getsfired async to complete the fetch operation.
I make use of Axios, by creating an array of axios promises and then resolving them using using Axios.all();
It works fine until some 200 requests but post that i get socket hung up, however on the server side i see the requests are being processed.
How to configure axios to set custom time out, or is it a better idea to splice my promises array and then run them as multiple batches ?
Source code
let getAxiosPromiseArray = (urlList) => {
var axiosArrayofPromise = [];
return new Promise ( (resolve, reject) => {
try {
urlList.forEach ( (URL) => {
axiosArrayofPromise.push(axios.get(URL));
});
resolve(axiosArrayofPromise);
}
catch (err) {
reject("There is a problem getting Axios array of promises " + err);
}
})
}
async function processAxiosPromises (PromiseArray) {
try {
var results = []
results = await axios.all(PromiseArray);
return results;
}
catch(err) {
throw("There was a problem resolving promises array (Axios) " + err);
}
}
getallID().then ( (urlList) => {
return getAxiosPromiseArray(urlList);
}).then( (AxiosPromises) => {
return processAxiosPromises(AxiosPromises);
}).then ((resultData) => {
console.log(resultData);
});
Error
There was a problem resolving promises array (Axios) Error: socket hang up
First, that pair of functions getAxiosPromiseArray() and processAxiosPromises() needs fixing.
Your new Promise() construction is unnecessary. You can simply return Promise.all(arrayofPromise) (or axios.all(...) if you must) and do away with the other function.
Renaming the remaining function to something meaningful, you would end up with eg :
let getData = (urlList) => {
return Promise.all(urlList.map(URL => axios.get(URL)))
.catch(error => {
error.message = "There is a problem getting Axios array of promises " + error.message; // augment the error message ...
throw error; // ... and re-throw the errror.
});
};
And call as follows :
getallID().then(getData)
.then(resultData => {
console.log(resultData);
}).catch(error => {
console.error(error);
});
That will put you on solid ground but, on its own, is unlikely to fix a concurrency problem (if that's what it is), for which the simplest approach is to use Bluebird's Promise.map with the concurrency option.
The caller code can remain the same, just change getData(), as follows:
let getData = (urlList) => {
let concurrency = 10; // play with this value to find a reliable concurrency limit
return Promise.map(urlList, URL => axios.get(URL), {'concurrency': concurrency})
.catch(error => {
error.message = "There is a problem getting Axios array of promises " + error.message;
throw error;
});
};
// where `Promise` is Bluebird.
const axios = require('axios');
const axiosThrottle = require('axios-throttle');
//pass axios object and value of the delay between requests in ms
axiosThrottle.init(axios,200)
const options = {
method: 'GET',
};
const urlList = [
'https://jsonplaceholder.typicode.com/todos/1',
'https://jsonplaceholder.typicode.com/todos/2',
'https://jsonplaceholder.typicode.com/todos/3',
'https://jsonplaceholder.typicode.com/todos/4',
'https://jsonplaceholder.typicode.com/todos/5',
'https://jsonplaceholder.typicode.com/todos/6',
'https://jsonplaceholder.typicode.com/todos/7',
'https://jsonplaceholder.typicode.com/todos/8',
'https://jsonplaceholder.typicode.com/todos/9',
'https://jsonplaceholder.typicode.com/todos/10'
];
const promises = [];
const responseInterceptor = response => {
console.log(response.data);
return response;
};
//add interceptor to work with each response seperately when it is resolved
axios.interceptors.response.use(responseInterceptor, error => {
return Promise.reject(error);
});
for (let index = 0; index < urlList.length; index++) {
options.url = urlList[index];
promises.push(axiosThrottle.getRequestPromise(options, index));
}
//run when all promises are resolved
axios.all(promises).then(responses => {
console.log(responses.length);
});
https://github.com/arekgotfryd/axios-throttle

AWS Lambda function that executes 5000+ promises to AWS SQS is extremely unreliable

I'm writing a Node AWS Lambda function that queries around 5,000 items from my DB and sends them via messages into an AWS SQS queue.
My local environment involves me running my lambda with AWS SAM local, and emulating AWS SQS with GoAWS.
An example skeleton of my Lambda is:
async run() {
try {
const accounts = await this.getAccountsFromDB();
const results = await this.writeAccountsIntoQueue(accounts);
return 'I\'ve written: ' + results + ' messages into SQS';
} catch (e) {
console.log('Caught error running job: ');
console.log(e);
return e;
}
}
There are no performance issues with my getAccountsFromDB() function and it runs almost instantly, returning me an array of 5,000 accounts.
My writeAccountsIntoQueue function looks like:
async writeAccountsIntoQueue(accounts) {
// Extract the sqsClient and queueUrl from the class
const { sqsClient, queueUrl } = this;
try {
// Create array of functions to concurrenctly call later
let promises = accounts.map(acc => async () => await sqsClient.sendMessage({
QueueUrl: queueUrl,
MessageBody: JSON.stringify(acc),
DelaySeconds: 10,
})
);
// Invoke the functions concurrently, using helper function `eachLimit`
let writtenMessages = await eachLimit(promises, 3);
return writtenMessages;
} catch (e) {
console.log('Error writing accounts into queue');
console.log(e);
return e;
}
}
My helper, eachLimit looks like:
async function eachLimit (funcs, limit) {
let rest = funcs.slice(limit);
await Promise.all(
funcs.slice(0, limit).map(
async (func) => {
await func();
while (rest.length) {
await rest.shift()();
}
}
)
);
}
To the best of my understanding, it should be limiting concurrent executions to limit.
Additionally, I've wrapped the AWS SDK SQS client to return an object with a sendMessage function that looks like:
sendMessage(params) {
const { client } = this;
return new Promise((resolve, reject) => {
client.sendMessage(params, (err, data) => {
if (err) {
console.log('Error sending message');
console.log(err);
return reject(err);
}
return resolve(data);
});
});
}
So nothing fancy there, just Promisifying a callback.
I've got my lambda set up to timeout after 300 seconds, and the lambda always times out, and if it doesn't it ends abruptly and misses some final logging that should go on, which makes me thing it may even be erroring somewhere, silently. When I check the SQS queue I'm missing around 1,000 entries.
I can see a couple of issues in your code,
First:
let promises = accounts.map(acc => async () => await sqsClient.sendMessage({
QueueUrl: queueUrl,
MessageBody: JSON.stringify(acc),
DelaySeconds: 10,
})
);
You're abusing async / await. Always bear in mind await will wait until your promise is resolved before continuing with the next one, in this case whenever you map the array promises and call each function item it will wait for the promise wrapped by that function before continuing, which is bad. Since you're only interested in getting the promises back, you could simply do this instead:
const promises = accounts.map(acc => () => sqsClient.sendMessage({
QueueUrl: queueUrl,
MessageBody: JSON.stringify(acc),
DelaySeconds: 10,
})
);
Now, for the second part, your eachLimit implementation looks wrong and very verbose, I've refactored it with help of es6-promise-pool to handle the concurrency limit for you:
const PromisePool = require('es6-promise-pool')
function eachLimit(promiseFuncs, limit) {
const promiseProducer = function () {
while(promiseFuncs.length) {
const promiseFunc = promiseFuncs.shift();
return promiseFunc();
}
return null;
}
const pool = new PromisePool(promiseProducer, limit)
const poolPromise = pool.start();
return poolPromise;
}
Lastly, but very important, have a look at SQS Limits, SQS FIFO has up to 300 sends / sec. Since you are processing 5k items, you could probably up your concurrency limit to 5k / (300 + 50) , approx 15. The 50 could be any positive number, just to move away from the limit a bit.
Also, considering using SendMessageBatch which you could have much more throughput and reach 3k sends / sec.
EDIT
As I suggested above, using sendMessageBatch the throughput is much better, so I've refactored the code mapping your promises to support sendMessageBatch:
function chunkArray(myArray, chunk_size){
var index = 0;
var arrayLength = myArray.length;
var tempArray = [];
for (index = 0; index < arrayLength; index += chunk_size) {
myChunk = myArray.slice(index, index+chunk_size);
tempArray.push(myChunk);
}
return tempArray;
}
const groupedAccounts = chunkArray(accounts, 10);
const promiseFuncs = groupedAccounts.map(accountsGroup => {
const messages = accountsGroup.map((acc,i) => {
return {
Id: `pos_${i}`,
MessageBody: JSON.stringify(acc),
DelaySeconds: 10
}
});
return () => sqsClient.sendMessageBatch({
Entries: messages,
QueueUrl: queueUrl
})
});
Then you can call eachLimit as usual:
const result = await eachLimit(promiseFuncs, 3);
The difference now is every promise processed will send a batch of messages of size n (10 in the example above).

Resources