How to speed up query response time? - node.js

There is a collection in the database, which has about 4.6 million documents.
(data structure can be seen in the screenshot.)
When trying to pull at least one value from this collection, the response time to the request reaches more than 3 seconds:
(And this is on my local powerful machine.)
Work with the code request and response looks like this:
// server.js
const http = require('http');
const { getProducts } = require('./controllers/currencyController');
const server = http.createServer((req, res) => {
if(req.url.match(/\/api\/currency.+/g) && req.method === 'GET') {
getProducts(req, res);
} else {
res.writeHead(404, { 'Content-Type': 'application/json' })
res.end(JSON.stringify({ message: 'Route Not Found' }))
}
})
const PORT = process.env.PORT || 5000
server.listen(PORT, () => console.log(`Server running on port ${PORT}`))
module.exports = server;
// currencyModel.js
const {MongoClient} = require('mongodb');
const client = new MongoClient('mongodb://127.0.0.1:27017/');
const findAll = async (currencyPair) => {
try{
await client.connect();
const testingData = client.db('Trading').collection(currencyPair);
return testingData;
}
catch(e){
console.log(e);
}
}
module.exports = {
findAll,
}
// currencyController.js
const currencyData = require('../models/currencyModel')
async function getProducts(req, res) {
try {
const currency_data = await currencyData.findAll('EUR/USD');
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(await currency_data.find({id:1}).toArray()));
} catch (error) {
console.log(error)
}
}
module.exports = {
getProducts,
}
I suspect that the reason for such a long answer may be somehow connected with the lack of indexes.
(those numbers that are present are custom created.)
Question:
How can I speed up the response time from the server after receiving the request?
(preferably without using ORM and frameworks.)
And what should ideally be this time?

I'm editing my answer as per the insight gained though the comments.
The findAll method there is a bit of a misnomer, as it just is selecting a collection via the MongoDB driver. The problem is that you are triggering a table scan, that is, MongoDB has to go through every row in the database to see if it matches your expression id:1.
You can cut down the execution time by using findOne instead of find. Once a single record is found that satisfies the expression, it will return it as a result. This is usually not enough, as if the matching record is the last in the database, the performance will be the same as for find.
You should instead create an index:
db.collection.createIndex({"id": 1})
Run this in the MongoDB console, or use MongoDB Compass to create an index. The example creates an index for queries using the id field in ascending order.
If you wanted to query for two fields, say vol and close, vol ascending and close descending, you would create an index via:
db.collection.createIndex({"vol": 1, "close":-1})

Related

How to update an sqlite3 database every 30 seconds from an express server using an external api

I'm making a website which tracks kill statistics in a game but I can only access 50 key value pairs per request, so to make sure that my data is accurate I want to make a request about every 30 seconds.
I feel like I may have gone wrong at some stage in the implementation. Like, maybe there is a way to make requests that doesn't involve using
express.get('/route/', (req, res) => { //code })
syntax and I just don't know about it. In short, what I want is for the database to be updated every 30 seconds without having to refresh the browser. I've tried wrapping my get request in a function and putting it in set interval but it still doesn't run.
const express = require('express');
const zkillRouter = express.Router();
const axios = require('axios');
const zkillDbInit = require('../utils/zkillTableInit');
const sqlite3 = require('sqlite3').verbose();
zkillDbInit();
let db = new sqlite3.Database('zkill.db');
setInterval(() => {
zkillRouter.get('/', (req, res)=>{
axios.get('https://zkillboard.com/api/kills/w-space/', {headers: {
'accept-encoding':'gzip',
'user-agent': 'me',
'connection': 'close'
}})
.then(response => {
// handle success
const wormholeData = (response.data)
res.status(200);
//probably not the best way to do this but it's fine for now.
for (let i = 0; i < Object.keys(wormholeData).length; i++) {
const currentZKillId = Object.keys(wormholeData)[i]
const currentHash = Object.values(wormholeData)[i]
let values = {
$zkill_id: currentZKillId,
$hash: currentHash
};
//puts it in the database
db.run(`INSERT INTO zkill (zkill_id, hash) VALUES ($zkill_id, $hash)`,
values
, function(err){
if(err){
throw(err)
}
})
}
})
})
}, 1000)
module.exports = zkillRouter;
One thing I have considered is that I don't necessarily need this functionality to be part of the same program. All I need is the database, so if I have to I could run this code separately in say, the terminal as just a little node program that makes requests to the api and updates the database. I don't think that would be ideal but if it's the only way to do what I want then I'll consider it.
clarification: the .get() method is called on zkillRouter which is an instance of express.Router() declared on line two. This in turn links back to my app.js file through an apiRouter, so the full route is localhost:5000/api/zkill/. That was a big part of the problem, I didn't know you could call axios.get without specifying a route, so I was stuck on this for a while.
I fixed it myself.
(edit 4)
I was using setInterval wrong.
I just got rid of the router statement as it wasn't needed.
I definitely need to tweak the interval so that I don't get so many sql errors for violating the unique constraint. Every 5 minutes should be enough I think.
Don't throw an error.
function myfunction(){
axios.get('https://zkillboard.com/api/kills/w-space/', {headers: {
'accept-encoding':'gzip',
'user-agent': 'me lol',
'connection': 'close'
}})
.then(response => {
// handle success
const wormholeData = (response.data)
for (let i = 0; i < Object.keys(wormholeData).length; i++) {
const currentZKillId = Object.keys(wormholeData)[i]
const currentHash = Object.values(wormholeData)[i]
let values = {
$zkill_id: currentZKillId,
$hash: currentHash
};
db.run(`INSERT INTO zkill (zkill_id, hash) VALUES ($zkill_id, $hash)`,
values
, function(err){
if(err){
return console.log(i);
}
})
}
})
}
setInterval(myfunction, 1000 * 30)

MongoError: pool destroyed when fetching all data without conditions

I am new to mongoDb, as I am trying to query from different collection and in order to do that, when I am fetching data from category collection I mean when I am running select * from collection it is throwing error, MongoError: pool destroyed.
As per my understanding it is because of some find({}) is creating a pool and that is being destroyed.
The code which I am using inside model is below,
const MongoClient = require('mongodb').MongoClient;
const dbConfig = require('../configurations/database.config.js');
export const getAllCategoriesApi = (req, res, next) => {
return new Promise((resolve, reject ) => {
let finalCategory = []
const client = new MongoClient(dbConfig.url, { useNewUrlParser: true });
client.connect(err => {
const collection = client.db(dbConfig.db).collection("categories");
debugger
if (err) throw err;
let query = { CAT_PARENT: { $eq: '0' } };
collection.find(query).toArray(function(err, data) {
if(err) return next(err);
finalCategory.push(data);
resolve(finalCategory);
// db.close();
});
client.close();
});
});
}
When my finding here is when I am using
let query = { CAT_PARENT: { $eq: '0' } };
collection.find(query).toArray(function(err, data) {})
When I am using find(query) it is returning data but with {} or $gte/gt it is throwing Pool error.
The code which I have written in controller is below,
import { getAllCategoriesListApi } from '../models/fetchAllCategory';
const redis = require("redis");
const client = redis.createClient(process.env.REDIS_PORT);
export const getAllCategoriesListData = (req, res, next, query) => {
// Try fetching the result from Redis first in case we have it cached
return client.get(`allstorescategory:${query}`, (err, result) => {
// If that key exist in Redis store
if (false) {
res.send(result)
} else {
// Key does not exist in Redis store
getAllCategoriesListApi(req, res, next).then( function ( data ) {
const responseJSON = data;
// Save the Wikipedia API response in Redis store
client.setex(`allstorescategory:${query}`, 3600, JSON.stringify({ source: 'Redis Cache', responseJSON }));
res.send(responseJSON)
}).catch(function (err) {
console.log(err)
})
}
});
}
Can any one tell me what mistake I am doing here. How I can fix pool issue.
Thanking you in advance.
I assume that toArray is asynchronous (i.e. it invokes the callback passed in as results become available, i.e. read from the network).
If this is true the client.close(); call is going to get executed prior to results having been read, hence likely yielding your error.
The close call needs to be done after you have finished iterating the results.
Separately from this, you should probably not be creating the client instance in the request handler like this. Client instances are expensive to create (they must talk to all of the servers in the deployment before they can actually perform queries) and generally should be created per running process rather than per request.

many queries postgres (node), no parallel queries?

I am running a node server with the postgres-node (pg) package.
I wrote a program, which requests n-queries (for instance 20,000) at once to my postgres database.
When I do this with several clients who want to query 20,000 at once too, there is no parallelity. That means, the requests of the second client will be queued until the first client finished all his queries.
Is this a normal behavior for postgres? If yes, how can I prevent that one user gets all the ressources (and the others have to wait) if there is no parallelity?
This is my code:
const express = require('express');
const app = express();
const { Pool } = require("pg");
const pool = new Pool();
benchmark(){
pool.connect((err, client, done) => {
if (err) throw err;
client.query("SELECT * from member where m_id = $1", [1], (err, res) => {
done();
if (err) {
console.log(err.stack);
} else {
console.log(res.rows[0]);
}
});
});
}
app.get('/', function(req, res) {
for(let i=0;i<20000;i++){
benchmark();
}
});
First you need to create a connection pool, here's an example with node's pg in a separate module (node-pg-sql.js) for convenience:
node-pg-sql.js:
const { Pool } = require('pg');
const pool = new Pool(fileNameConfigPGSQL);
module.exports = {
query: (text, params, callback) => {
const start = Date.now()
return pool.query(text, params, (err, res) => {
const duration = Date.now() - start
// console.log('executed query', { text, duration, rows: res.rowCount })
callback(err, res)
})
},
getClient: (callback) => {
pool.connect((err, client, done) => {
const query = client.query.bind(client)
// monkey patch
client.query = () => {
client.lastQuery = arguments
client.query.apply(client, arguments)
}
// Timeout 5 sek
const timeout = setTimeout(() => {
// console.error('A client has been checked out for more than 5 seconds!')
// console.error(`The last executed query on this client was: ${client.lastQuery}`)
}, 5000)
const release = (err) => {
// 'done' Methode - returns client to the pool
done(err)
// clear Timeouts
clearTimeout(timeout)
// reset der Query-Method before Monkey Patch
client.query = query
}
callback(err, client, done)
})
}
}
In your postgresql.conf (on linux normally under /var/lib/pgsql/data/postgresql.conf) set max-connection to the desired value:
max_connection = 300
Keep in mind:
Each PostgreSQL connection consumes RAM for managing the connection or the client using it. The more connections you have, the more RAM you will be using that could instead be used to run the database.
While increasing your max-connections, you need to increase shared_buffers and kernel.shmmax as well in order for the client-connection increase to be effective .
Whenever you want to run a query from in one of your routes/endpoints just require the separate client-pool-file like:
const db = require('../../../node-pg-sql');
module.exports = (router) => {
router.get('/someRoute', (req, res) => {
console.log(`*****************************************`);
console.log(`Testing pg..`);
let sqlSelect = `SELECT EXISTS (
SELECT 1
FROM pg_tables
WHERE schemaname = 'someschema'
)`;
db.query(sqlSelect, (errSelect, responseSelect) => {
if (errSelect) {
/* INFO: Error while querying table */
console.log(`*****************************************`);
console.log(`ERROR WHILE CHECKING CONNECTION: ${errSelect}`);
}
else {
// INFO: No error from database
console.log(`*****************************************`);
console.log(`CONNECTION TO PGSQL WAS SUCCESSFUL..`);
res.json({ success: true, message: responseSelect, data:responseSelect.rows[0].exists });
}
})
});
}
EDIT:
"there is no parallelity.."
Node is asynchronous, you can either work with promises or spawn more clients/pools and tune your max-connections (as explained in my answer, but keep performance of your host-machine in mind), but with multiple clients running around 20.000 queries, they won't resolve with a result instantly or parallel. What is the exact goal you try to achieve?
"Is this a normal behavior for postgres?"
This is due to node's event-loop as well as due to certain performance-limitation of the host-machine running the Postgres.

How to efficiently forward request to multiple endpoints using nodejs?

I built a nodejs server to act as an adapter server, which upon receiving a post request containing some data, extracts the data from the request body and then forwards it to a few other external servers. Finally, my server will send a response consisting of the responses from each of the external server (success/fail).
If there's only 1 endpoint to forward to, it seems fairly straightforward. However, when I have to forward to more than one servers, I have to rely on things like Promise.All(), which has a fail-fast behaviour. That means if one promise is rejected (an external server is down), all other promises will also be rejected immediately and the rest the servers will not receive my data.
May be this ain't be the exact solution. But, what I am posting could be the work around of your problem.
Few days back I had the same problem, as I wanted to implement API versioning. Here is the solution I implemented, please have a look.
Architecture Diagram
Let me explain this diagram
Here in the diagram is the initial configuration for the server as we do. all the api request come here will pass to the "index.js" file inside the release directory.
index.js (in release directory)
const express = require('express');
const fid = require('./core/file.helper');
const router = express.Router();
fid.getFiles(__dirname,'./release').then(releases => {
releases.forEach(release => {
// release = release.replace(/.js/g,'');
router.use(`/${release}`,require(`./release/${release}/index`))
})
})
module.exports = router
code snippet for helper.js
//requiring path and fs modules
const path = require('path');
const fs = require('fs');
module.exports = {
getFiles: (presentDirectory, directoryName) => {
return new Promise((resolve, reject) => {
//joining path of directory
const directoryPath = path.join(presentDirectory, directoryName);
//passsing directoryPath and callback function
fs.readdir(directoryPath, function (err, files) {
// console.log(files);
//handling error
if (err) {
console.log('Unable to scan directory: ' + err);
reject(err)
}
//listing all files using forEach
// files.forEach(function (file) {
// // Do whatever you want to do with the file
// console.log(file);
// });
resolve(files)
});
})
}
}
Now, from this index file all the index.js inside each version folder is mapped
Here is the code bellow for "index.js" inside v1 or v2 ...
const express = require('express');
const mongoose = require('mongoose');
const fid = require('../../core/file.helper');
const dbconf = require('./config/datastore');
const router = express.Router();
// const connection_string = `mongodb+srv://${dbconf.atlas.username}:${dbconf.atlas.password}#${dbconf.atlas.host}/${dbconf.atlas.database}`;
const connection_string = `mongodb://${dbconf.default.username}:${dbconf.default.password}#${dbconf.default.host}:${dbconf.default.port}/${dbconf.default.database}`;
mongoose.connect(connection_string,{
useCreateIndex: true,
useNewUrlParser:true
}).then(status => {
console.log(`Database connected to mongodb://${dbconf.atlas.username}#${dbconf.atlas.host}/${dbconf.atlas.database}`);
fid.getFiles(__dirname,'./endpoints').then(files => {
files.forEach(file => {
file = file.replace(/.js/g,'');
router.use(`/${file}`,require(`./endpoints/${file}`))
});
})
}).catch(err => {
console.log(`Error connecting database ${err}`);
})
module.exports = router
In each of this index.js inside version folder is actually mapped to each endpoints inside endpoints folder.
code for one of the endpoints is given bellow
const express = require('express');
const router = express.Router();
const userCtrl = require('../controllers/users');
router.post('/signup', userCtrl.signup);
router.post('/login', userCtrl.login);
module.exports = router;
Here in this file actually we are connecting the endpoints to its controllers.
var config = {'targets':
[
'https://abc.api.xxx',
'https://xyz.abc',
'https://stackoverflow.net'
]};
relay(req, resp, config);
function relay(req, resp, config) {
doRelay(req, resp, config['targets'], relayOne);
}
function doRelay(req, resp, servers, relayOne) {
var finalresponses = [];
if (servers.length > 0) {
var loop = function(servers, index, relayOne, done) {
relayOne(req, servers[index], function(response) {
finalresponses.push[response];
if (++index < servers.length) {
setTimeout(function(){
loop(servers, index, relayOne, done);
}, 0);
} else {
done(resp, finalresponses);
}
});
};
loop(servers, 0, relayOne, done);
} else {
done(resp, finalresponses);
}
}
function relayOne(req, targetserver, relaydone) {
//call the targetserver and return the response data
/*return relaydone(response data);*/
}
function done(resp, finalresponses){
console.log('ended');
resp.writeHead(200, 'OK', {
'Content-Type' : 'text/plain'
});
resp.end(finalresponses);
return;
}
It sounds like you are trying to design a reverse proxy. If you are struggling to get custom code to work, there is a free npm library which is very robust.
I would recommend node-http-proxy
I have posted link below, which will lead you directly to the "modify response", since you mentioned modification of the API format in your question. Be sure to read the entire page though.
https://github.com/http-party/node-http-proxy#modify-a-response-from-a-proxied-server
Note: this library is also very good because it can support SSL, and proxies to both localhost (servers on the same machine) and servers on other machines (remote).
Promise.all() from MDN
It rejects with the reason of the first promise that rejects.
To overcome the problem, you'll need to catch() each request you've made.
e.g.
Promise.all([
request('<url 1>').catch(err => /* .. error handling */),
request('<url 2>').catch(err => /* .. error handling */),
request('<url 3>').catch(err => /* .. error handling */)
])
.then(([result1, result2, result3]) => {
if(result1.err) { }
if(result2.err) { }
if(result3.err) { }
})

Yelp API - Too Many Request Per Second in Node.js

Experts,
It seems that yelp recently changed their REST API to limit the amount of requests you can make per second. I've tried using setTimeout and various sleep functions with no success. I believe it has to do with setTimeout although. I only get a few responses back and a slew of TOO_Many_Requests_Per_Second. Also, I'm using the Node.js Fusion API Client. Any help would be appreciated. Thanks in advance.
Here is the code below as I'm getting the Yelp URL from my Parse Server, and I want to get the Yelp Business Name response:
'use strict';
var Parse = require('parse/node');
Parse.initialize("ServerName");
Parse.serverURL = 'ParseServerURL';
const yelp = require('yelp-fusion');
const client = yelp.client('Key');
var object;
var Business = Parse.Object.extend("Business");
var query = new Parse.Query(Business);
query.notEqualTo("YelpURL", "Bus");
query.find({
success: function(results) {
for (var i = 0; i < results.length; i++) {
object = results[i];
//I belive a setTimeout block needs to come somewhere in here. Tried many places but with no success.
client.business(object.get('YelpURL')).then(response => {
console.log(response.jsonBody.name);
}).catch(e => {
console.log(e);
});
}
},
error: function(error) {
alert("Error" + error.code + " " + error.message);
}
});
Use query each, which will iterate over each object and perform the requests in a sequence rather than all more or less at once:
query.each(
function(object) {
return client.business(object.get('YelpURL')).then(response => {
console.log(response.jsonBody.name);
});
}
).catch( e => {
res.json('error');
});
One cool thing about this is that it'll automatically propagate the error from client.bussiness() call if there is one to the catch block at the bottom. It will iterate over the objects one at a time, and since we "return" the results of the client.business() call, it's not going to move on to the next object until you've gotten the response. query.each() will also iterate over every object in a collection that meets your query criteria, so you don't have to worry about limits.
Im not quite sure if this is what your looking for, but you can retrieve up to 50 records per request, in the example below will return 20 business names within that zip code, or you can tweak it a little to return all that data for those businesses, does this help:
app.get('/:id', (req, res) => {
let zipcode = req.params.id;
let names = [];
let searchRequest = {
term: 'Business', // or for ex. food
limit: 20, //set the number of responses you want up to 50
radius: 20000, // 20 miles
location: zipcode
};
client.search(searchRequest)
.then(response => {
response.jsonBody.businesses.map(elem => {
names.push(elem.name);
})
res.json(names); // business names only
//or
//res.json(response.jsonBody.businesses) //all details included with business name
}).catch(e => {
res.json('error');
});
})

Resources