convert mongoose stream to array - node.js

I have worked with mongodb but quite new to mongoose ORM. I was trying to fetch data from a collection and the explain() output was showing 50ms. the overall time it was taking to fetch the data via mongoose was 9 seconds. Here is the query:
Node.find({'dataset': datasetRef}, function (err, nodes){
// handle error and data here
});
Then I applied index on the field I was querying on. The explain() output now showed 4ms. But the total time to retrieve data via mongoose did not change. Then i searched a bit and found that using lean() can help bring the performance of read queries in mongoose quite close to native mongodb
So I changed my query to:
Node.find({'dataset': datasetRef})
.lean()
.stream({transform: JSON.stringify})
.pipe(res)
This solved the performance issues completely. But the end result is a stream of JSON docs like this:
{var11: val11, var12: val12}{var21: val21, var22: val22} ...
How do I parse this to form an array of docs ? Or should I not be using stream at all ? In my opinion, there is no point using a stream if I am planning to form the array at backend, since I will then have to wait for all the docs to be read into memory. But I also think that parsing and creating the whole array at front end might be costly.
How can I achieve best performance in this case without clogging the network as well ?
UPDATE
I am trying to solve this problem using a through stream. However, I am not able to insert commas in between the JSON objects yet. See the code below:
res.write("[");
var through = require('through');
var tr = through(
function write(data){
this.queue(data.replace(/\}\{/g,"},{"));
}
);
var dbStream = db.node.find({'dataset': dataSetRef})
.lean()
.stream({'transform': JSON.stringify});
dbStream.on("end", function(){
res.write("]");
});
dbStream
.pipe(tr)
.pipe(res);
With this, I am able to get the "[" in the beginning and "]" at the end. However, still not able to get patten "}{" replaced with "},{". Not sure what am I doing wrong
UPDATE 2
Now figured out why the replace is not working. It appears that since I have specified the transform function as JSON.stringify, it reads one JSON object at a time and hence never encounter the pattern }{ since it never picks multiple JSON elements at a time.
Now I have modified my code, and written a custom transform function which does JSON.stringify and then appends a comma at the end. The only problem I am facing here is that I don't know when it is the last JSON object in the stream. Because I don't wanna append the comma in that case. At the moment, I append an empty JSON object once the end is encountered. But somehow this does not look like a convincing idea. Here is the code:
res.write("[");
function transform(data){
return JSON.stringify(data) + ",";
}
var dbStream = db.node.find({'dataset': dataSetRef})
.lean()
.stream({'transform': transform});
dbStream.on("end", function(){
res.write("{}]");
});
dbStream
.pipe(res);

The only problem I am facing here is that I don't know when it is the last JSON object in the stream.
But you do know which one is first. Knowing that, instead of appending the comma, you can prepend it to every object except the first one. In order to do that, set up your transform function inside a closure:
function transformFn(){
var first = true;
return function(data) {
if (first) {
first = false;
return JSON.stringify(data);
}
return "," + JSON.stringify(data);
}
}
Now you can just call that function and set it as your actual transform.
var transform = transformFn();
res.write("[");
var dbStream = db.node.find({'dataset': dataSetRef})
.lean()
.stream({'transform': transform});
dbStream.on("end", function(){
res.write("]");
});
dbStream
.pipe(res);

#cbajorin and #rckd both gave correct answers.
However, repeating this code all the time seems like a pain.
Hence my solution uses an extra Transform stream to achieve the same thing.
import { Transform } from 'stream'
class ArrayTransform extends Transform {
constructor(options) {
super(options)
this._index = 0
}
_transform(data, encoding, done) {
if (!(this._index++)) {
// first element, add opening bracket
this.push('[')
} else {
// following element, prepend comma
this.push(',')
}
this.push(data)
done()
}
_flush(done) {
if (!(this._index++)) {
// empty
this.push('[]')
} else {
// append closing bracket
this.push(']')
}
done()
}
}
Which in turn can be used as:
const toArray = new ArrayTransform();
Model.find(query).lean().stream({transform: JSON.stringify })
.pipe(toArray)
.pipe(res)
EDIT: added check for empty

I love #cdbajorin's solution, so i created a more readable version of it (ES6):
Products
.find({})
.lean()
.stream({
transform: () => {
let index = 0;
return (data) => {
return (!(index++) ? '[' : ',') + JSON.stringify(data);
};
}() // invoke
})
.on('end', () => {
res.write(']');
})
.pipe(res);

var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/shoppingdb');
var Sports = mongoose.model('sports', {});
var result = [];
var prefix_out = "your info";
Sports.find({"goods_category": "parts"}).
cursor().
on("data", function(doc){
//stream ---> string
var str = JSON.stringify(doc)
//sring ---> JSON
var json = JSON.parse(str);
//handle Your Property
json.handleYourProperty = prefix_out + json.imageURL;
result.push(result);
}).
on('error', function(err){
console.log(err);
}).
on('close', function(){
console.log(result);
});

Related

i need access all axios data after for loop

I'm making a simple word combinatiion website.
and as a final step, I need all possible word in one string
so I write code like this
const fs=require('fs');
const axios=require('axios')
function test(want){
const res=axios.get("http://api.dictionaryapi.dev/api/v2/entries/en/"+want);
const datapromise=res.then((res)=>res.data);
return datapromise
}
fs.readFile('./input.txt','utf-8',function(error,data){
//console.log("console log")
var array=data.toString().split("\n");
fs.writeFile("./log.txt","",(err)=>{});
var res=""
for(i in array){
test(array[i]).then((data)=>(data)=>res+=data[0].word+"<br>").catch(/*(data)=>console.log(data.code)*/);
}
console.log(res);
})
But this code isn't work. console.log(res); is executed first and followed by for loop.
How can I fix it?
Without knowing much about Axios I can tell that axios.get and therefore the test function is going to be async. This means console.log here will always run first here as a result. Test ends up returning a promise that will resolve at a later time.
I'd do something like this (assuming you don't have async/await available):
var res= "";
var promises = [];
for(i in array) {
promises.push(
test(array[i]).then((data) => res+=data[0].word + "<br>")
);
}
Promise.all(promises).finally(() => {
console.log(res);
});
Other notes:
The catch here is being called but nothing is being passed in - this may result in an error
The then has a nested function that I imagine wouldn't ever be called (data) => (data) => this is basically creating a 2nd nested function. I don't think it'd get called.

Correct way to organise this process in Node

I need some advice on how to structure this function as at the moment it is not happening in the correct order due to node being asynchronous.
This is the flow I want to achieve; I don't need help with the code itself but with the order to achieve the end results and any suggestions on how to make it efficient
Node routes a GET request to my controller.
Controller reads a .csv file on local system and opens a read stream using fs module
Then use csv-parse module to convert that to an array line by line (many 100,000's of lines)
Start a try/catch block
With the current row from the csv, take a value and try to find it in a MongoDB
If found, take the ID and store the line from the CSV and this id as a foreign ID in a separate database
If not found, create an entry into the DB and take the new ID and then do 6.
Print out to terminal the row number being worked on (ideally at some point I would like to be able to send this value to the page and have it update like a progress bar as the rows are completed)
Here is a small part of the code structure that I am currently using;
const fs = require('fs');
const parse = require('csv-parse');
function addDataOne(req, id) {
const modelOneInstance = new InstanceOne({ ...code });
const resultOne = modelOneInstance.save();
return resultOne;
}
function addDataTwo(req, id) {
const modelTwoInstance = new InstanceTwo({ ...code });
const resultTwo = modelTwoInstance.save();
return resultTwo;
}
exports.add_data = (req, res) => {
const fileSys = 'public/data/';
const parsedData = [];
let i = 0;
fs.createReadStream(`${fileSys}${req.query.file}`)
.pipe(parse({}))
.on('data', (dataRow) => {
let RowObj = {
one: dataRow[0],
two: dataRow[1],
three: dataRow[2],
etc,
etc
};
try {
ModelOne.find(
{ propertyone: RowObj.one, propertytwo: RowObj.two },
'_id, foreign_id'
).exec((err, searchProp) => {
if (err) {
console.log(err);
} else {
if (searchProp.length > 1) {
console.log('too many returned from find function');
}
if (searchProp.length === 1) {
addDataOne(RowObj, searchProp[0]).then((result) => {
searchProp[0].foreign_id.push(result._id);
searchProp[0].save();
});
}
if (searchProp.length === 0) {
let resultAddProp = null;
addDataTwo(RowObj).then((result) => {
resultAddProp = result;
addDataOne(req, resultAddProp._id).then((result) => {
resultAddProp.foreign_id.push(result._id);
resultAddProp.save();
});
});
}
}
});
} catch (error) {
console.log(error);
}
i++;
let iString = i.toString();
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(iString);
})
.on('end', () => {
res.send('added');
});
};
I have tried to make the functions use async/await but it seems to conflict with the fs.openReadStream or csv parse functionality, probably due to my inexperience and lack of correct use of code...
I appreciate that this is a long question about the fundamentals of the code but just some tips/advice/pointers on how to get this going would be appreciated. I had it working when the data was sent one at a time via a post request from postman but can't implement the next stage which is to read from the csv file which contains many records
First of all you can make the following checks into one query:
if (searchProp.length === 1) {
if (searchProp.length === 0) {
Use upsert option in mongodb findOneAndUpdate query to update or upsert.
Secondly don't do this in main thread. Use a queue mechanism it will be much more efficient.
Queue which I personally use is Bull Queue.
https://github.com/OptimalBits/bull#basic-usage
This also provides the functionality you need of showing progress.
Also regarding using Async Await with ReadStream, a lot of example can be found on net such as : https://humanwhocodes.com/snippets/2019/05/nodejs-read-stream-promise/

How to convert snapshot.val() to object in node js?

I am trying to write a firebase-cloud-function.I am trying to parse data here is the data structure to object(model/pojo) or Hashmap as I believe everything in JS is a map, So i have written this code
exports.checkAllBookings = functions.https.onRequest((request,response)=>{
admin.database().ref(`/bookings`).once('value').then((snapshot)=>{
return snapshot.val()
}).then((bookingDetails)=>{
var map = new HashMap()
map = bookingDetails
console.log(map.size)
map.forEach(function(value,key){
console.log(value)
})
// console.log(bookingDetails.keys)
// bookingDetails.forEach(function(childSnap){
// var item = childSnap.val()
// console.log("item ::::: "+ item)
// })
response.send(map)
})
})
& this is how it completely looks alike complete pic. I have install Hashmap depandency from here Hashmap npm install. But after all this I am keep getting error here. So how can I parse that data to a bean or pojo or hashmap in node.js so that I can move further to complete my task?? thanks for any help
It's hard to determine what you're trying to do. But if you're simply trying to return the data to the called, this is the simplest approach:
exports.checkAllBookings = functions.https.onRequest((request,response)=>{
admin.database().ref(`/bookings`).once('value').then((snapshot)=>{
response.send(snapshot.val());
})
})
If you're for example trying to filter the data before sending it back to the user, you can use the Snapshot.forEach method:
exports.checkAllBookings = functions.https.onRequest((request,response)=>{
admin.database().ref(`/bookings`).once('value').then((snapshot)=>{
result = [];
snapshot.forEach(function(bookingSnapshot) {
var booking = bookingSnapshot.val();
if (booking.someProperty === true) {
result.push(booking);
}
});
response.send(map)
})
})
The if (booking.someProperty === true) condition here is whatever you want it to be. You could do a similar operation to add more information for each booking.

save many records to couchdb in nodejs

I have a very large dataset that I want to save in couchdb for searchability.
I want the records to look like this:
{
"type": "first",
"name": "ryan",
"count": 447980
}
Since the text-files are larger than I should hold in memory, I am setting up a streaming readline reader, like so:
var db = require('./db'),
readline = require('readline'),
path = require('path'),
fs = require('fs');
// simple callback after cradle save
function saveHandler(er, doc){
if (er) return console.log('Error: ', er);
console.log(doc);
}
// save record of type, based on line with count & name
function handleCountedLine(type, line){
return function(line){
var record = {type:type};
var i = line.trim().split(' ');
record.name = i[1].trim();
record.count = Number(i[0]);
db.save(record, saveHandler);
}
}
var handleFirst = handleCountedLine('first');
readline.createInterface({
input: fs.createReadStream('data/facebook-firstnames-withcount.txt'),
terminal: false
})
.on('line', handleFirst);
db is a cradle db.
After 40 records or so, it slows to a total crawl, then eventually runs out of memory. I tried poolr and node-rate-limiter, using "only run this many at a time" & "only allow this many to run in a minute" strategies. Both work a little better, but it still runs out of memory. Is there a good way to accomplish this goal, or am I stuck writing it in python?
With awesome help from Paulo Machado in google hangouts, I made an answer using line-by-line, a simple wrapper that uses stream.pause() & stream.resume() to only allow a single line to be processed at a time. I'd like to give him the credit, but he hasn't come over here to make an answer, so I will just put this here. It has parsed 34039 records, so far. I will update the answer if it crashes.
var LineByLineReader = require('line-by-line'),
path = require('path'),
db = require('./db')
// line-by-line read file, turn into a couch record
function processFile(type){
var fname = path.join('data', types[type] + '.txt');
var lr = new LineByLineReader(fname, {skipEmptyLines: true});
lr.on('error', function (err) {
console.log('Error:');
console.log(err);
});
lr.on('record', function (record) {
console.log('Saved:');
console.log(record);
});
lr.on('line', function (line) {
lr.pause();
var record = { type: type };
if (type == 'full'){
record.name = line.trim().split(' ');
}else{
var i = line.trim().split(' ');
record.name = i[1].trim();
record.count = Number(i[0]);
}
db.save(record, function(er, res){
if (er) lr.emit('error', er, record);
if (res) lr.emit('record', record);
lr.resume();
})
});
}
var types = {
'first':'facebook-firstnames-withcount',
'last':'facebook-lastnames-withcount',
'full':'facebook-names-unique'
};
for (type in types){
processFile(type);
}
// views for looking things up
db.save('_design/views', require('./views'));
I guess couchdb is the bottleneck here. Have a look at couchdb's bulk doc api that allows you to insert documents en masse. (You should probably not try to commit all your data at once, but accumulate a bunch of docs in an array and push that to the database -- use stream.pause() and stream.resume() to throttle the text stream). You will be rewarded with efficiency gains by couchdb if you use the bulk api.

Node-orm saving nested objects

My model is essentially
Idea has many Position
Idea has one User
Position has one Direction
Position has one Security
Position has one Idea (inverse of Idea has many Position)
I have added 'Q' promises to many of the node-orm functions to program in that manner. So find is now qFind etc...
I am struggling to find the best way to persist this to the DB (MySql):
User.qGet(1004).then(function(user) {
var newIdea = new Idea({
'openedOn': new Date()
})
newIdea.setUser(user, console.log)
Idea.qCreate(newIdea).then(function(idea) {
_.each(positions, function(position) {
Security.qFind({ticker: position.ticker}).then(function(securities){
var security = securities[0]
Direction.qFind({direction: position.direction}).then(function(directions){
var direction = directions[0]
var newPosition = Position({
'weight': 1
})
newPosition.setIdea(idea, console.log)
newPosition.setDirection(direction, console.log)
newPosition.setSecurity(security, console.log)
console.log(JSON.stringify(newPosition))
}) // Direction.qFind
}) // Security.qFind
}) // _.each
console.log(JSON.stringify(idea))
res.send(idea)
}) // Idea.qCreate
}) // User.find
Here are my problems
It is not working. When I set the idea, i get the error:
[Error: ER_BAD_NULL_ERROR: Column 'directionId' cannot be null]
The issue is I need to set three foreign keys in this object...
2. Is this the right approach to saving nested objects?
One solution is setting the id
var newPosition = Position({
'weight': 1
,'ideaId': idea.id
,'directionId': direction.id
,'securityId': security.id
})
Position.create(newPosition, function(e, i){
})
The problem is that /Associations/One.js calls save when setting an association.
I have decided to embrace the use of promises, and the Q library
Step 1 adding q nbind to interesting methods in my orm model. for instance:
model = db.models.direction
model['qFind'] = Q.nbind(model['find'], model);
Step 2 was adding instance methods to my Models to:
setDirectionByName: function(direction) {
var _this = this;
var internalSuccess = function(directions) {
_this.direction = directions[0]
_this.directionId = directions[0].id
return directions
}
return {
then: function(externalSuccess, externalFail) {
var success = _.compose(externalSuccess, internalSuccess)
Direction.qFind({direction: direction}).then(success, externalFail)
}
}
}
in this step i define an internal success method which stores the properties, and return a promise which utilizes composition to form an overall success function (the one from my model inside of the one passed to the 'then' invocation.
Step 3: handling the request and saving the data
User.qGet(1004).then(function(user) {
var newIdea = new Idea({
'openedOn': new Date()
})
newIdea.setUser(user, function(){})
Idea.qCreate(newIdea).then(function(idea) {
var positionPromises = _.map(positions, function(position) {
var newPosition = Position({
'weight': 1
,'ideaId': idea.id
})
Here I set the foreign keys, and wait for them to complete, before the Position.create is asynchronously kicked off, with a promise being returned.
return Q.all([
newPosition.setDirectionByName(position.direction)
, newPosition.setSecurityByTicker(position.ticker)
]).then(function(noop) {
//newPosition.setIdea(idea)
return Position.qCreate(newPosition)
})
}) // _.map
SO then with an array of promises for Position objects. Here I am going to wait for them all to fill, prior to returning the results tot he client
Q.all(positionPromises).then(function(positions){
console.log('RESULTS of POSITION Promises')
//console.log(JSON.stringify(positions))
//This doesn't seem to work as expected
//idea.setPositions(positions, function(e, o){ })
// kludge it
idea.positions = positions
console.log(JSON.stringify(idea))
console.log('/RESULTS of POSITION Promises')
res.send(idea)
})
console.log('FALL THROUGH')
}) // Idea.qCreate
}) // User.find

Resources