Parsing a nested api response in Node - node.js

An API request like this: const response = await this.publicGetMarkets (params); is giving me a response that contains a list of markets in the following format:
{
"markets": {
"LINK-USD": {
"market": "LINK-USD",
"status": "ONLINE"
},
...
}
As in the example here, my problem is that LINK-USD is changing for every market.
How do I fix my code so that I can variables such as market, status in my code.
I have written the following code snippet:
const market = this.safeValue (response, 'markets');
const result = [];
for (let i = 0; i < markets.length; i++) {
const markets = this.safeString (markets, {}, {});
const market = this.safeString (markets, 'market');
const status = this.safeString (markets, 'status');
result.push({
'market': market,
'status': status,
});
}
return result;

You can get an array of all the inner objects using Object.values(data.markets).
If you need to filter out unwanted properties that is a fairly simple mapping addition to this also
const data = {
"markets": {
"LINK-USD": {
"market": "LINK-USD",
"status": "ONLINE"
},
"LINK-EURO": {
"market": "LINK-EURO",
"status": "TBD"
}
}
}
const res = Object.values(data.markets)
console.log(res)

const responses = this.safeValue (response, 'markets');
const result = [];
for (let response of responses) {
const market = responses.markets["LINK-USD"].market,
status = responses.markets["LINK-USD"].status;
result.push({market, status});
}
return result;
I hope this is what you asked for.

Related

How to speed up Fetching Google Place and Photos

I currently have the following code to fetch matching Google Places according to a received query as shown below:
async function searchGoogleBusiness(req, res) {
let { name } = req.query;
const apiKey = process.env.API_KEY;
const searchUrl = `https://maps.googleapis.com/maps/api/place/textsearch/json?query=`;
try {
let { data } = await axios.get(`${searchUrl}${name}&key=${apiKey}`)
let { status, error_message, results } = data;
if (status === 'OK') {
let businessResults = [];
if ((results ?? []).length > 0) {
for (let business of results) {
let businessDetails = {
....
}
if ((business.photos ?? []).length > 0) {
let { width = 1200, height = 1200, photo_reference } = business.photos[0];
let photoUrl = `https://maps.googleapis.com/maps/api/place/photo?photoreference=${photo_reference}&sensor=false&maxheight=${height}&maxwidth=${width}&key=${apiKey}`
try {
let businessPhotoResponse = await axios.get(photoUrl, { responseType: 'arraybuffer' });
let imageBuffer = businessPhotoResponse.data;
let base64Image = Buffer.from(imageBuffer, 'binary').toString('base64');
businessDetails.photo = `data:${businessPhotoResponse.headers['content-type']};base64,${base64Image}`;
} catch (e) {
businessDetails.photo = business.icon;
}
} else {
businessDetails.photo = business.icon;
}
businessResults.push(businessDetails);
}
}
...//Omitted
}
...//Omitted
} catch (e) {
...//Omitted
}
}
As you can immediately notice, the function takes forever to return when the results are more than 5 and the reason is because I'm looping through each business to make another api call to fetch each photo.
I don't like this approach at all.
This idea of making another network call using photoReferences is really affecting my site speed and basically just makes my users angry.
Is there no way to automatically fetch the photo urls along just in the first request?

No results when scanning DynamoDB with AWS Lambda(node.js)

I've been attempting to scan the table so I can have more functionality with the data. I'm able to .get from the table successfully, but I can't seem to get the scanning function right.
Sample Table:
controlID(N)
controlFunction(S)
1
Protect
2
Assess
3
Protect
Code:
const AWS = require("aws-sdk");
const dynamo = new AWS.DynamoDB.DocumentClient();
exports.handler = async (event, context) => {
let controlInfo;
let body;
let statusCode = 200;
const headers = {
"Content-Type": "application/json"
};
try {
controlInfo = await dynamo
.scan({
FilterExpression: "controlFunction = :cF",
ExpressionAttributeValues: {
":cF": { N: "5" }
},
ProjectionExpression: "controlID",
TableName: "testControls",
})
.promise();
} catch (err) {
statusCode = 400;
controlInfo = err.message;
} finally {
//controlInfo = JSON.stringify(controlInfo);
}
body = {
"Control Info" : controlInfo,
"Threat Info" : "placeHolder"
};
body = JSON.stringify(body);
return {
statusCode,
body,
headers
};
};`
I was expecting the output to be the items of the table with the specified "controlFunction".
Here are the results I get from running the current script:
{
"Control Info": {
"Items": [],
"Count": 0,
"ScannedCount": 115
},
"Threat Info": "placeHolder"
}
You're using the DocumentClient, which auto-marshalls attribute values on both requests to the SDK and responses from the SDK. That means that you don't need to tell the SDK what type each attribute value is. The SDK will automatically map types between DynamoDB native types and their JavaScript equivalents.
So, instead of:
":attrname": { type: value }
You should use:
":attrname": value
For example:
":cF": 5

AWS Sdk response not showing in Lambda Function

I am working on lambda function and creating a method for AWS-SDK historical metric report using node, js. The method is running successful but in response showing nothing. Have a look at the response.
Here is my code
function getKeyByValue(object, value) {
return Object.keys(object).find(key =>
object[key] === value);
}
exports.handler = async (event) => {
const AWS = require('aws-sdk');
var connect = new AWS.Connect({ apiVersion: '2017-08-08' });
let queueARN = event.queueARN || null;
const connectInstanceId = process.env.instanceID;
let flag =0, nextToken = null;
let queueARNsObject = {}, queueARNsArray=[], queueTypeObject={},listQueuesResult;
console.log('At line 12 entring do while loop....')
do{
console.log('How many times do I stay here???')
let listQueuesParams = {
InstanceId: connectInstanceId, /* required */
QueueTypes: [
"STANDARD",
],
NextToken: nextToken,
};
let listQueuesPromise = connect.listQueues(listQueuesParams).promise();
listQueuesResult = await listQueuesPromise;
// console.log(listQueuesResult);
listQueuesResult.QueueSummaryList.forEach(queue => {
if(queueARN != null){
if (queue.Arn == queueARN){
queueARNsArray = [queue.Arn];
queueARNsObject[queue.Name]= queue.Arn;
queueTypeObject[queue.QueueType]= queue.Arn;
flag = 1;
return;
}
}else{
queueARNsObject[queue.Name]= queue.Arn;
queueTypeObject[queue.QueueType]= queue.Arn;
queueARNsArray.push(queue.Arn);
nextToken = listQueuesResult.NextToken;
}
});
}while (flag=0 && nextToken != null);
const HistoricalMetrics = [
{
Name : "CONTACTS_HANDLED",
Unit : "COUNT",
Statistic : "SUM"
},
{
Name : "CONTACTS_ABANDONED",
Unit : "COUNT",
Statistic : "SUM"
},
];
// Metrics params
var getHistoricalMetricsParams = {
InstanceId: connectInstanceId,
StartTime: 1593099900,
EndTime: 1593129300,
Filters: {
Channels: ["VOICE"],
Queues: queueARNsArray
},
HistoricalMetrics: HistoricalMetrics,
Groupings: ["QUEUE"]
};
// console.log(getHistoricalMetricsParams);
// get current metrics by queues
var getHistoricalMetricsPromise = connect
.getMetricData(getHistoricalMetricsParams)
.promise();
var getHistoricalMetricsResult = await getHistoricalMetricsPromise;
console.log("historical metrics",getHistoricalMetricsResult);
// console.log("current |||||||| 1 metrics:", JSON.stringify(getCurrentMetricsResult));
let queueMetricsArray = [];
if(getHistoricalMetricsResult.MetricResults.length){
getHistoricalMetricsResult.MetricResults.forEach(queue => {
let queueMetrics = {
"Queue_Name" : getKeyByValue(queueARNsObject ,queue.Dimensions.Queue.Arn),
"CallsHandled": queue.Collections[0].Value,
"CallsAbanoded": queue.Collections[1].Value,
}
queueMetricsArray.push(queueMetrics);
console.log("TYPE||||", getKeyByValue(queueTypeObject ,queue.Dimensions.Queue.Arn))
});
}
const response = {
responseCode: 200,
metricResults: queueMetricsArray
};
return response;
};
I don't have any idea why it is not showing anything. if anyone of you knows please help me to fix it Thanks. I don't know what is Missing I've almost checked everything but I didn't get anything.
There are a few general areas you can look at:
Specify the region.
AWS.Connect({ apiVersion: '2017-08-08', region:'xxxxx' });
use Await directly with listQueues method
let listQueuesPromise = await connect.listQueues(listQueuesParams).promise();
Check Permissions - make sure there is sufficient authority
Lambda Configuration - increase timeout and memory size
PS: What did console log listQueuesPromise return?

Using cheerio to scrape data from links extracted using cheerio

Are using cheerio and nodejs to get data from the allegro website to create endpoints in an API that gives back csv data this data will be studied later on as part of a data science project:
https://allegro.pl/kategoria/samochody-osobowe-4029?bmatch=baseline-al-product-cl-eyesa2-engag-dict45-aut-1-3-0605
to get the cars information I managed to scrape off the links from the first page each link sends you to the car (item of cars) to see the full information of the car I need to scrape more data from each link how do I do that?
and how to i make the json data shows off as csv instead ?
here the code used :
const url =
"https://allegro.pl/kategoria/samochody-osobowe-4029?bmatch=baseline-al-product-cl-eyesa2-engag-dict45-aut-1-3-0605";
//const writeStream = fs.createWriteStream("allegro.csv");
// Write Headers
//writeStream.write(`Price,Link \n`);
function getCars() {
return fetch(`${url}`)
.then((response) => response.text())
.then((body) => {
const cars = [];
const $ = cheerio.load(body);
$("._9c44d_2H7Kt").each(function (i, el) {
const $price = $(el).find("._9c44d_1zemI");
const $link = $(el).find("a");
const $year = $(el).find("dd");
const $make = $(el).find("h2");
const car = {
price: $price.text().replace(/\s\s+/g, ""),
link: $link.attr("href"),
year: $year.first().next().next().text(),
make: $make.text(),
};
cars.push(car);
});
// Write Row to CSV
// writeStream.write(`${price},${link} \n`);
return cars;
});
}
the code used for the nodejs endpoint :
app.get("/scraping/:allegro", (req, res) => {
scraper.getCars(req.param.allegro).then((cars) => {
//console.log(cars);
res.json(cars);
});
The data to get from each link is the following : date added,model,phone number, city,vin
There is a convenient thing about these pages, it's that you can return the data in JSON instead of html by just setting the media type to application/json eg setting the Accept header.
For instance to get the list :
curl "https://allegro.pl/kategoria/samochody-osobowe-4029?bmatch=baseline-al-product-cl-eyesa2-engag-dict45-aut-1-3-0605&order=dd" \
-H "Accept: application/json"
To get a specific item :
curl "https://allegro.pl/ogloszenie/mercedes-ml320-9341716141" -H "Accept: application/json"
So you don't need to use webscraping tools just parsing JSON. The pagination is done by adding a query param &p=PAGE_NUM which is convenient too
I've made a small example in python that can be easily ported to JS. It request the list of cars, then request the first element :
import requests
import json
import pandas as pd
r = requests.get("https://allegro.pl/kategoria/samochody-osobowe-4029",
headers = {
"Accept": "application/json"
},
params = {
"bmatch":"baseline-al-product-cl-eyesa2-engag-dict45-aut-1-3-0605",
"order":"dd"
})
data = [{
"name": t["name"],
"url": t["url"],
"price": t["sellingMode"]["advertisement"]["price"]["amount"],
**dict([(j["name"],j["values"][0]) for j in t["parameters"]]),
}
for t in r.json()["pagination bottom"]["collection"]["items"]["promoted"]
]
df = pd.DataFrame(data)
print(df)
print("get data for first element")
r = requests.get(data[0]["url"],
headers = {
"Accept": "application/json"
})
item = r.json()
item_data = {
"phone": item["summary"]["offer"]["contact"]["phones"][0]["number"],
"delivery": item["summary"]["offer"]["delivery"]["summary"][0]["value"]["text"],
"startingAt": item["summary"]["offer"]["publication"]["startingAt"],
"endingAt": item["summary"]["offer"]["publication"]["endingAt"],
**dict([(j["name"], j["values"][0]["valueLabel"]) for j in item["summary"]["offer"]["parametersGroups"]["groups"][0]["parameters"]])
}
print(item_data)
An implementation in nodejs using axios :
const axios = require("axios");
async function process() {
let response = await axios.get('https://allegro.pl/kategoria/samochody-osobowe-4029',{
query: {
"bmatch":"baseline-al-product-cl-eyesa2-engag-dict45-aut-1-3-0605",
"order":"dd"
},
responseType: "json"
});
let promoted = response.data["pagination bottom"].collection.items.promoted;
list = [];
for (var i = 0; i < promoted.length;i++) {
let item = {
name: promoted[i].name,
url: promoted[i].url,
price: promoted[i].sellingMode.advertisement.price.amount,
};
let params = promoted[i].parameters;
for (var j = 0; j < params.length;j++){
item[params[j].name] = params[j].values[0];
}
list.push(item);
}
console.log(list);
console.log("fetching : " + list[0].url);
response = await axios.get(list[0].url,{
responseType: "json"
});
let entryData = response.data;
let entry = {
phone: entryData.summary.offer.contact.phones[0].number,
delivery: entryData.summary.offer.delivery.summary[0].value.text,
startingAt: entryData.summary.offer.publication.startingAt,
endingAt: entryData.summary.offer.publication.endingAt
};
let parameters = entryData.summary.offer.parametersGroups.groups[0].parameters;
for (var i = 0; i < parameters.length;i++) {
entry[parameters[i].name] = parameters[i].values[0].valueLabel
}
console.log(entry);
}
process();

Pagination in DynamoDB using Node.js?

I've had a read through AWS's docs around pagination:
As their docs specify:
In a response, DynamoDB returns all the matching results within the scope of the Limit value. For example, if you issue a Query or a Scan request with a Limit value of 6 and without a filter expression, DynamoDB returns the first six items in the table that match the specified key conditions in the request (or just the first six items in the case of a Scan with no filter)
Which means that given I have a table called Questions with an attribute called difficulty(that can take any numeric value ranging from 0 to 2) I might end up with the following conundrum:
A client makes a request, think GET /questions?difficulty=0&limit=3
I forward that 3 to the DynamoDB query, which might return 0 items as the first 3 in the collection might not be of difficulty == 0
I then have to perform a new query to fetch more questions that match that criteria without knowing I might return duplicates
How can I then paginate based on a query correctly? Something where I'll get as many results as I asked for whilst having the correct offset
Using async/await.
const getAllData = async (params) => {
console.log("Querying Table");
let data = await docClient.query(params).promise();
if(data['Items'].length > 0) {
allData = [...allData, ...data['Items']];
}
if (data.LastEvaluatedKey) {
params.ExclusiveStartKey = data.LastEvaluatedKey;
return await getAllData(params);
} else {
return data;
}
}
I am using a global variable allData to collect all the data.
Calling this function is enclosed within a try-catch
try {
await getAllData(params);
console.log("Processing Completed");
// console.log(allData);
} catch(error) {
console.log(error);
}
I am using this from within a Lambda and it works fine.
The article here really helped and guided me. Thanks.
Here is an example of how to iterate over a paginated result set from
a DynamoDB scan (can be easily adapted for query as well) in Node.js.
You could save the LastEvaluatedKey state serverside and pass an identifier back to your client, which it would send with its next request and your server would pass that value as ExclusiveStartKey in the next request to DynamoDB.
const AWS = require('aws-sdk');
AWS.config.logger = console;
const dynamodb = new AWS.DynamoDB({ apiVersion: '2012-08-10' });
let val = 'some value';
let params = {
TableName: "MyTable",
ExpressionAttributeValues: {
':val': {
S: val,
},
},
Limit: 1000,
FilterExpression: 'MyAttribute = :val',
// ExclusiveStartKey: thisUsersScans[someRequestParamScanID]
};
dynamodb.scan(scanParams, function scanUntilDone(err, data) {
if (err) {
console.log(err, err.stack);
} else {
// do something with data
if (data.LastEvaluatedKey) {
params.ExclusiveStartKey = data.LastEvaluatedKey;
dynamodb.scan(params, scanUntilDone);
} else {
// all results scanned. done!
someCallback();
}
}
});
Avoid using recursion to prevent call stack overflow. An iterative solution extending #Roshan Khandelwal's approach:
const getAllData = async (params) => {
const _getAllData = async (params, startKey) => {
if (startKey) {
params.ExclusiveStartKey = startKey
}
return this.documentClient.query(params).promise()
}
let lastEvaluatedKey = null
let rows = []
do {
const result = await _getAllData(params, lastEvaluatedKey)
rows = rows.concat(result.Items)
lastEvaluatedKey = result.LastEvaluatedKey
} while (lastEvaluatedKey)
return rows
}
I hope you figured out. So just in case others might find it useful. AWS has QueryPaginator/ScanPaginator as simple as below:
const paginator = new QueryPaginator(dynamoDb, queryInput);
for await (const page of paginator) {
// do something with the first page of results
break
}
See more details at https://github.com/awslabs/dynamodb-data-mapper-js/tree/master/packages/dynamodb-query-iterator
2022-05-19:
For AWS SDK v3 see how to use paginateXXXX at this blog post https://aws.amazon.com/blogs/developer/pagination-using-async-iterators-in-modular-aws-sdk-for-javascript/
Query and Scan operations return LastEvaluatedKey in their responses. Absent concurrent insertions, you will not miss items nor will you encounter items multiple times, as long as you iterate calls to Query/Scan and set ExclusiveStartKey to the LastEvaluatedKey of the previous call.
For create pagination in dynamodb scan like
var params = {
"TableName" : "abcd",
"FilterExpression" : "#someexperssion=:someexperssion",
"ExpressionAttributeNames" : {"#someexperssion":"someexperssion"},
"ExpressionAttributeValues" : {":someexperssion" : "value"},
"Limit" : 20,
"ExclusiveStartKey" : {"id": "9ee10f6e-ce6d-4820-9fcd-cabb0d93e8da"}
};
DB.scan(params).promise();
where ExclusiveStartKey is LastEvaluatedKey return by this query last execution time
Using async/await, returning the data in await.
Elaboration on #Roshan Khandelwal's answer.
const getAllData = async (params, allData = []) => {
const data = await dynamodbDocClient.scan(params).promise()
if (data['Items'].length > 0) {
allData = [...allData, ...data['Items']]
}
if (data.LastEvaluatedKey) {
params.ExclusiveStartKey = data.LastEvaluatedKey
return await getAllData(params, allData)
} else {
return allData
}
}
Call inside a try/catch:
try {
const data = await getAllData(params);
console.log("my data: ", data);
} catch(error) {
console.log(error);
}
you can do a index secundary by difficulty and at query set KeyConditionExpression where difficulty = 0. Like this
var params = {
TableName: questions,
IndexName: 'difficulty-index',
KeyConditionExpression: 'difficulty = :difficulty ',
ExpressionAttributeValues: {':difficulty':0}
}
You can also achieve this using recrusion instead of a global variable, like:
const getAllData = async (params, allData = []) => {
let data = await db.scan(params).promise();
return (data.LastEvaluatedKey) ?
getAllData({...params, ExclusiveStartKey: data.LastEvaluatedKey}, [...allData, ...data['Items']]) :
[...allData, ...data['Items']];
};
Then you can simply call it like:
let test = await getAllData({ "TableName": "test-table"}); // feel free to add try/catch
Using DynamoDB pagination with async generators:
let items = []
let params = {
TableName: 'mytable',
Limit: 1000,
KeyConditionExpression: 'mykey = :key',
ExpressionAttributeValues: {
':key': { S: 'myvalue' },
},
}
async function* fetchData({
params
}) {
let data
do {
data = await dynamodb.query(params).promise()
yield data.Items
params.ExclusiveStartKey = data.LastEvaluatedKey
} while (typeof data.LastEvaluatedKey != 'undefined')
}
for await (const data of fetchData(params)) {
items = [...items, ...data]
}

Resources