Reading all JSONs in a folder and getting their strings - node.js

There is a folder with a lot of JSON files and all of them got an object called "name"
I want to get their strings and turn them into a string like this
name0=UsernameExample;name1=Flowers;name2=Test; ...
the number after name is the index/count of the json, like if its name48, its the 48th json
This far I only tried to read the JSONs from the folder but I failed of course
let s = "";
fs.readdir('/tmp/userdb/', (files) => {
files.each(file => {
name = file[file.keys()[0]];
})})
I can already convert this
var other_users = (serialize({
"sid0": 0,
"name0": "user1",
"pays0": "8521",
"avatar0": "357",
"onlinescore0": "50"
}));
to this:
sid0=0;name0=user1;pays0=8521;avatar0=357;onlinescore0=50
with this const
const serialize = obj =>
Object.entries(obj).map(([k, v]) => `${k}=${v}`).join(';')
And I want to send the result to the user with this way
if (req.query.d === 'getRandomPlayers') {
var sessionid = req.body.player_sid
let user = require("./tmp/userdb/" + sessionid + ".json")
var current_user = (serialize({
player_name: user.name
}));
res.send("method_id=1665;" + current_user);
}
It should be like res.send("method_id=1665;" + current_user + thefinalresult);
thefinalresult is what this all should go. current_user and other stuff is not related to this question.

Assuming an example JSON files inside /tmp/userdb/ has the following structure,
{
"53874745": {
"avatar": "372",
"name": "BILLY",
"onlinescore": "1",
"pays": "8758"
}
}
you could do something like the following:
const { promisify } = require("util");
const fs = require("fs");
const path = require("path");
const readdir = promisify(fs.readdir);
const readFile = promisify(fs.readFile);
async function process(excludedSessionId) {
try {
const entries = [];
// get a list of all `JSON` files
const jsonFiles = await readdir(
path.join(__dirname, "./tmp/userdb/")
).then(
(files) => files.filter(
(file) => path.extname(file) === ".json" && !file.includes(excludedSessionId)
)
);
// iterate through a list of all `JSON` files & read their content
for (const [index, file] of jsonFiles.entries()) {
const content = await readFile(
path.join(__dirname, "./tmp/userdb/", file)
).then(JSON.parse);
// create an object for a new entry
const key = `sid${index}`;
const keyValue = Object.keys(content)[0];
// use the `spread syntax` to include the rest of the
// properties in a new entry
const entry = {
[key]: keyValue,
...content[keyValue],
};
entries.push(entry);
}
console.log(entries[0]);
// {
// sid0: '53874745',
// avatar: '372',
// name: 'BILLY',
// onlinescore: '1',
// pays: '8758'
// }
const result = entries.map((entry) => serialize(entry)).join(";");
console.log(result);
// sid0=53874745;avatar=372;name=BILLY;onlinescore=1;pays=8758;
// sid1=154261758;avatar=480;name=JESSEY;onlinescore=30;pays=8521;
return result;
} catch (error) {
console.error(error);
throw error;
}
}
process("154261742");
Then, if you'd want to use this function in a callback of your route controller, you could do something like the following:
app.get("/user", (req, res) => {
// ...
const excludedSessionId = req.body.player_sid;
process(excludedSessionId)
.then(result => {
res.send(result);
})
.catch(error => {
res.status(500).send("Something went wrong.");
});
});
References:
Spread syntax (...) - MDN
async function - MDN

Related

Async function to scrape subreddits using Cheerio returns undefined

The script by itself works great (entering the url manually, writing a json file using the fs module, node script_name.js) but within a Express get request it returns undefined.
So I've built a simple frontend to let the user enter the subreddit name to be scraped.
And here's where the problem is:
Express controller
const run = require("../run");
requestPosts: async (req, res) => {
try {
const { subreddit } = req.body;
const response = await run(subreddit);
//console.log(response);
res.json(response);
} catch (error) {
console.error(error);
}
},
Cheerio functions
const axios = require("axios");
const { load } = require("cheerio");
let posts = [];
async function getImage(postLink) {
const { data } = await axios(postLink);
const $ = load(data);
return $("a.post-link").attr("href");
}
async function run(url) {
try {
console.log(url);
const { data } = await axios(url);
const $ = load(data);
$(".thing.linkflair.link").map(async (i, e) => {
const title = $(e)
.find(".entry.unvoted .top-matter .title .title")
.text();
const user = $(e)
.find(".entry.unvoted .top-matter .tagline .author")
.text();
const profileLink = `https://old.reddit.com/user/${user}`;
const postLink = `https://old.reddit.com/${$(e).find("a").attr("href")}`;
// const thumbail = $(e).find("a img").attr("src");
const image = await getImage(postLink);
posts.push({
id: i + 1,
title,
postLink,
image,
user: { user, profileLink },
});
});
const nextPage = $(".next-button a").attr("href");
if (nextPage) {
await run(nextPage);
} else {
return posts;
}
} catch (error) {
console.error(error);
}
}
module.exports = run;
I've tried working with Promise((resolve, reject) => {}).
I think it's returning undefined because maybe the code its not synchronized.
(idk if it makes sense, i've just started programming)
.map() is not promise-aware and does not wait for your promises to finish. So, $(".thing.linkflair.link").map() finishes long before any of the asynchronous functions inside its callback do. Thus you try to return posts BEFORE it has been populated.
Passing an async callback to .map() will return an array of promises. You can use Promise.all() on those promises to know when they are done and once you're doing that, you may as well just return each post object rather that using a higher level scoped/shared object, thus making the code more self contained.
I would suggest this code:
async function run(url) {
try {
console.log(url);
const { data } = await axios(url);
const $ = load(data);
const posts = await Promise.all($(".thing.linkflair.link").map(async (i, e) => {
const title = $(e)
.find(".entry.unvoted .top-matter .title .title")
.text();
const user = $(e)
.find(".entry.unvoted .top-matter .tagline .author")
.text();
const profileLink = `https://old.reddit.com/user/${user}`;
const postLink = `https://old.reddit.com/${$(e).find("a").attr("href")}`;
// const thumbail = $(e).find("a img").attr("src");
const image = await getImage(postLink);
// return a post object
return {
id: i + 1,
title,
postLink,
image,
user: { user, profileLink },
};
}));
const nextPage = $(".next-button a").attr("href");
if (nextPage) {
const newPosts = await run(nextPage);
// add these posts to the ones we already have
posts.push(...newPosts);
}
return posts;
} catch (error) {
console.error(error);
}
}

res.json returns undefined from express server after retrieving data from firebase realtime database

I currently have a realtime database set up that saves a list of a specific user's list of saved jobs for a job search application with react on the front end. However, when I try to return that data through res.json, console.logging it prints out undefined. Would appreciate any help or insight on why it is behaving this way and/or possible fixes.
Thanks in advance.
The data looks something like this:
{
"users" : {
"<userId>" : {
"savedJobs" : [ {
"company" : "PepsiCo",
"id" : 7693578,
"location" : [ {
"name" : "Valhalla, NY"
} ]
} ]
}
}
}
It is initialized like so:
var admin = require("firebase-admin");
let db = admin.database();
var userRef = db.ref("users");
Old Backend logic:
app.get("/fetchSavedJobs/:userId", (req, res) => {
console.log("inside fetch saved jobs");
console.log("fetch saved jobs user id " + req.params.userId);
const userId = req.params.userId;
let list = [];
userRef.orderByChild(userId).limitToLast(1).once('value').then((querySnapshot) => {
if(!querySnapshot.numChildren) {
throw new Error("user not in database, no saved jobs");
}
let dataSnapshot;
querySnapshot.forEach((snap) => {
dataSnapshot = snap;
})
if (!dataSnapshot.exists()) { // value may be null, meaning idToFind doesn't exist
throw new Error(`Entry ${userId} not found.`);
}
const jobsList = dataSnapshot.val().savedJobs;
jobsList.forEach((x) => list.push({company: x.company, id: x.id}));
return res.json(list);
})
I've re-edited my back-end to to the following:
console.log(userId, typeof userId)
prints "userId_value, string"
app.get("/fetchSavedJobs/:userId", (req, res) => {
const userId = req.params.userId;
var userRef = db.ref("users/" + userId); //initializing userRef
let list = [];
userRef.once("value").then((snapshot) => {
console.log("snapshot val", snapshot.val());
res.json(snapshot.val().savedJobs);
})
Front end:
export const fetchSavedJobs = (userUid) => {
return async (dispatch) => {
const fetchData = async () => {
console.log("fetch data is called");
const response = await fetch("/fetchSavedJobs/" + userUid, {
method: "GET",
headers: {
'Accept': 'application/json',
"Content-Type": "application/json",
},
});
if (!response.ok) {
throw new Error("Fetching saved cart data failed");
}
};
try {
const savedJobsData = await fetchData();
console.log("came back from server with saved jobs");
console.log("retrieved Saved Jobs data" + savedJobsData); //this prints out undefined
} catch (error) {}
};
};

Why the nodejs heap out of memory for creating Excel file with big data?

I am creating an excel file at nodejs end and returning base64 data to reactJS to download the file. At nodejs end, I am using promise all and fetch data from a server in chunks and append data into Excel as
worksheet.addRows(data);
For data around 20-30k, it is working fine but for data like 100k, it shows me an error heap out of memory at nodejs end.
I have increase memory allocate to nodejs also but same error
node --max_old_space_size=5000 app.js
What I am doing wrong any suggestions?
Nodejs
const axios = require('axios');
var excel = require("exceljs");
const workbook = new excel.Workbook();
const worksheet = workbook.addWorksheet("My Sheet");
worksheet.columns = [
{ header: "TicketId", key: "ticketId" },
{ header: "Email", key: 'user_email' },
{ header: "User", key : 'user_name' },
{ header: "Subject", key: "subject" },
...//many more headers
];
exports.getTicketData = async (req, res, next) => {
res.connection.setTimeout(0);
const { body } = req;
const token = body.token;
const organization_id = body.organization_id;
const server = body.server;
const sideFilter = body.sideFilter;
let baseurl = 'url for server end to fetch data';
if (baseurl) {
let data = new Array();
let limit = 3000;
const promises = [];
try {
let count = await getCount(token,limit, organization_id, baseurl, sideFilter);
for(var i = 1;i<=count;i++) {
promises.push(getData(i,limit,organization_id,token, baseurl, sideFilter));
}
await Promise.all(promises).then((results) => {
}).catch((e) => {
throw e;
});
var base64File = await writeExcelAndUpload(workbook);
return res.status(200).json({ file:base64File });
} catch (err) {
return res.status(400).json({ type:'error', msg:'File not generated please contact support staff' });
}
} else {
return res.status(400).json({ type:'error', msg:'please define server name' });
}
};
let getData = (page,limit, organization_id,token, baseurl, sideFilter) =>{
return new Promise((resolve, reject) => {
axios.post(baseurl+`/v2/get-export`, {
page:page,
organization_id:organization_id,
per_page:limit,
filter: "",
sorted:"",
...sideFilter
},{ headers: {"Authorization" : `Bearer ${token}`} }).then(function (response) {
let dataTemp = response.data.data.data.map((t,i)=>{
return {
...t,
name:t.name,
...//many more columns like 70
}
});
worksheet.addRows(dataTemp);
resolve(true);
}).catch(function (error) {
reject(error);
});
});
}
let getCount = (token,limit, organization_id, baseurl, sideFilter) => {
// run an api and return count against limit
}
let writeExcelAndUpload = async (workbook) => {
const fileBuffer = await workbook.xlsx.writeBuffer();
let base64File = Buffer.from(fileBuffer).toString('base64');
base64File = 'data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,'+base64File;
return base64File;
}
Client side reactjs
exportLink = () => {
postData ={
...
};
return axios.post(`${baseurl}/api/ticketing/get-ticket`, postData).then(function (response) {
const downloadLink = document.createElement("a");
const fileName = "export.xlsx";
downloadLink.href = response.data.file;
downloadLink.download = fileName;
downloadLink.click();
}).catch(function(error){
throw error;
});
}
Well, it is kinda expected that you may get a heap out of memory when working with such an amount of entries like 100k.
I could suggest you start using pagination, and instead of fetching e.g. 100k of entries at once fetch 1k of entries do what you need with them, then fetch the next 1k of entries repeat until you processed all entries.

How to download all files in aws S3folder in local machine using node js

I have an S3 bucket which has a folder with some files I want to download all the files in that folder to the local machine folder I tried for the single file it's working how to download multiple files.
As per below code in key folderA has 10 files I want to download all the ten to localfolder directory which I mentioned in s3.getObject(params).createReadStream().pipe(ws);
My code :
const downloadObject = () => {
var params = { Bucket: "Sample", Key:"folderA/"};
const ws = fs.createWriteStream(`${__dirname}/localfolder/`);
const s3Stream = s3.getObject(params).createReadStream().pipe(ws);
s3Stream.on("error", (err) => {
ws.end();
});
s3Stream.on("close", () => {
console.log(`downloaded successfully from s3 at ${new Date()}`);
ws.end();
});
};
expected output:
s3 -> bucket/folderA/<10 files>
localmachine -> localfolder/<need all 10 files in local>
There is quite alot to it,
Firstly you would need to list all buckets, then loop over all the buckets (if you only want one fine). Create a local directory if not found etc.
Then find out all files in the bucket and then loop over them, on each path you the get the object and store it.
Here is how would do it with the minio js client (the calls would be the same) tweak it to your needs obviously the folder paths would be different.
/**
* S3 images pull script
*/
const fs = require('fs')
const path = require('path')
const util = require('util')
const readFile = util.promisify(fs.readFile)
const writeFile = util.promisify(fs.writeFile)
//
const rootPath = path.join(__dirname, '..')
const publicPath = path.join(rootPath, 'public', 'images')
//
require('dotenv').config({
path: path.join(rootPath, '.env')
})
// minio client S3
const s3 = new(require('minio')).Client({
endPoint: process.env.S3_HOST,
port: parseInt(process.env.S3_PORT, 10),
useSSL: process.env.S3_USE_SSL === 'true',
accessKey: process.env.S3_ACCESS_KEY,
secretKey: process.env.S3_ACCESS_SECRET,
region: process.env.S3_REGION
})
/**
* Functions
*/
const mkdir = dirPath => {
dirPath.split(path.sep).reduce((prevPath, folder) => {
const currentPath = path.join(prevPath, folder, path.sep);
if (!fs.existsSync(currentPath)) {
fs.mkdirSync(currentPath);
}
return currentPath
}, '')
}
// list objects in bucket
const listObjects = bucket => new Promise(async (resolve, reject) => {
//
bucket.objects = []
bucket.total_objects = 0
bucket.total_size = 0
//
let stream = await s3.listObjectsV2(bucket.name, '', true)
//
stream.on('data', obj => {
if (obj && (obj.name || obj.prefix)) {
bucket.objects.push(obj)
bucket.total_objects++
bucket.total_size = bucket.total_size + obj.size
}
})
//
stream.on('end', () => resolve(bucket))
stream.on('error', e => reject(e))
})
// get an objects data
const getObject = (bucket, name) => new Promise((resolve, reject) => {
s3.getObject(bucket, name, (err, stream) => {
if (err) reject(err)
//
let chunks = []
stream.on('data', chunk => chunks.push(chunk))
stream.on('end', () => resolve(Buffer.concat(chunks || [])))
stream.on('error', e => reject(e))
})
})
/**
*
*/
async function main() {
// get buckets
console.log(`Fetching buckets from: ${process.env.S3_HOST}`)
let buckets = []
try {
buckets = await s3.listBuckets()
console.log(buckets.length + ' buckets found')
} catch (e) {
return console.error(e)
}
// create local folders if not exists
console.log(`Creating local folders in ./api/public/images/ if not exists`)
try {
for (let bucket of buckets) {
//
bucket.local = path.join(publicPath, bucket.name)
try {
await fs.promises.access(bucket.local)
} catch (e) {
if (e.code === 'ENOENT') {
console.log(`Creating local folder: ${bucket.local}`)
await fs.promises.mkdir(bucket.local)
} else
bucket.error = e.message
}
}
} catch (e) {
return console.error(e)
}
// fetch all bucket objects
console.log(`Populating bucket objects`)
try {
for (let bucket of buckets) {
bucket = await listObjects(bucket)
}
} catch (e) {
console.log(e)
}
// loop over buckets and download all objects
try {
for (let bucket of buckets) {
console.log(`Downloading bucket: ${bucket.name}`)
// loop over and download
for (let object of bucket.objects) {
// if object name has prefix
let dir = path.dirname(object.name)
if (dir !== '.') {
try {
await fs.promises.access(path.join(bucket.local, dir))
} catch (e) {
if (e.code === 'ENOENT') {
console.log(`Creating local folder: ${bucket.local}`)
mkdir(path.join(bucket.local, dir))
}
}
}
//
console.log(`Downloading object[${bucket.name}]: ${object.name}`)
await writeFile(path.join(bucket.local, object.name), await getObject(bucket.name, object.name))
}
}
console.log(`Completed!`)
} catch (e) {
console.log(e)
}
}
main()

Finding the differences between 2 files in nodejs

I have 2 json files which have static data.
File A.json
{
"data": ["A","B","C","D"]
}
File B.json
{
"data":["A","B","C"]
}
Now I want find the difference between these two files.
I tried using this code to get files by its name
const express = require("express");
const jsonfile = require("jsonfile");
const app = express();
const fs = require("fs");
app.use(express.json());
const BASEPATH = "./data"
app.get("/api/v1/data/:name", async (req,res) => {
const fileName = req.params.name;
const filePath = `${BASEPATH}/${fileName}.json`
try {
const readData = await jsonfile.readFile(filePath);
res.status(200).json(readData);
}catch (e) {
res.status(404).send(e);
}
});
The URL is : localhost:3000/api/v1/data/A
To add data I used,
app.put("/api/v1/data",(req,res) => {
const fileName = req.body.name;
const data = req.body.data;
const filePath = `${BASEPATH}/${fileName}.json`
fs.exists(filePath, async exists => {
if(exists) {
try {
await jsonfile.writeFile(filePath,{data:data});
res.status(200).send();
}catch(e) {
res.send(500).json(e);
}
} else {
try {
await jsonfile.writeFile(filePath,{data:data});
res.status(201).send();
}catch(e) {
res.send(500).json(e);
}
}
})
});
Example of added data:
{ "name":"C", "data":["A","B","Z"]}
URL is: localhost:3000/api/v1/data
you can try like this,
app.get("/api/v1/data/diff", async (req,res) => {
try {
const file1 = req.query.file1;
const file2 = req.query.file2;
console.log(file1,file2)
if(file1 === undefined || file2 === undefined ) {
res.status(401).send("BAD REQUEST. SEND FILE NAMES");
return;
} else {
const filePath1 = `${BASEPATH}/${file1}.json`
const filePath2 = `${BASEPATH}/${file2}.json`
const file1Data = await jsonfile.readFile(filePath1);
const file2Data = await jsonfile.readFile(filePath2);
let difference = file1Data.data.filter(x => !file2Data.data.includes(x));
let difference2 = file2Data.data.filter(x => !file1Data.data.includes(x));
res.status(200).json({diff: [...difference,...difference2]});
}
}catch(e) {
res.status(500).json(e);
}
});
you have to use the url like : localhost:3000/api/v1/data/diff?file1=A&file2=B
the output will be
{
"diff": [
"D"
]
}

Resources