Node JS get CSV - node.js

I have a project with Node JS in which I am collecting the information of a JSON by http using the node-fetch module.
This is the way I have found to use the node-fetch module with async, if it is possible to improve this function, suggestions are added, I am new to this module.
This is my code where I read the information:
const fetch = require('node-fetch');
(async () => {
try {
const res = await fetch('https://jsonplaceholder.typicode.com/users');
const headerDate = res.headers && res.headers.get('date') ? res.headers.get('date') : 'no response date';
const users = await res.json();
for(user of users) {
console.log(`Got user with id: ${user.id}, name: ${user.name}`);
}
} catch (err) {
console.log(err.message); //can be console.error
}
})();
My problem: how can I extract all the information to a CSV with a limit of lines ?, that is, the CSV has a limit of 10 lines (the limit can vary), if the JSON information occupies 30 lines, 3 CSVs would be created to store all the information. I have added the json-2-csv module, but I don't know how to use it or if this module is necessary or something else is better.

const { Parser } = require("json2csv");
const fetch = require("node-fetch");
const fs = require("fs");
const csvLimit = 3;
const getJson = async () => {
const response = await fetch("https://jsonplaceholder.typicode.com/users");
const responseJson = await response.json();
return responseJson;
};
const jsonToCsv = async () => {
const json = await getJson();
const json2csvParser = new Parser();
let i = 0,
j = 0;
while (j < json.length) {
let csv = [];
let temp = [];
for (j = i * csvLimit; j < (i + 1) * csvLimit; j++) {
temp.push(json[j]);
}
csv.push(json2csvParser.parse(temp));
fs.writeFileSync(`file${(i * csvLimit) / 3}.csv`, csv);
i++;
}
};
jsonToCsv();
If you want only specific fields in the csv file, then you can pass the fields as parameter in this way.
const json2csvParser = new Parser({fields})

I used the flat package to extract the field names from the keys of the first record of the JSON and then used the json-2-csv package to convert from JSON to CSV.
const converter = require("json-2-csv");
const fetch = require("node-fetch");
const fs = require("fs");
const flatten = require('flat');
const maxRecords = 3;
const getJson = async () => {
const response = await fetch("https://jsonplaceholder.typicode.com/users");
const responseJson = await response.json();
return responseJson;
};
const convertToCSV = async () => {
const json = await getJson();
let keys = Object.keys(flatten(json[0]));
let options = {
keys: keys
};
converter.json2csv(json, json2csvCallback, options);
};
let json2csvCallback = function (err, csv) {
if (err) throw err;
const headers = csv.split('\n').slice(0,1);
const records = csv.split('\n').slice(0,);
for(let i=1;i<records.length;i=i+maxRecords) {
let dataOut = headers.concat(records.slice(i, i+3)).join('\n');
let id = Math.floor(i/maxRecords)+1;
fs.writeFileSync('data' + id + '.csv', dataOut)
}
};
convertToCSV();
Here's one of the files opened in Excel.

Related

Batching in nodejs

Very new to nodejs, I have a file which contains only single column jobid. And I am iterating one by one job id and sending requesting to a service which give me the status of the job in a json format. Reading the json response and fetching few values from it and writing to the database. And I want write when it reach the 100th(with the help of a counter) jobid(because I am expecting more than 100 jobid in the file, also this is dynamic).
For example if I have 234 records, then it will write 3 times, first two 100 each and third one with 34. And the jobStatusMetrics array should be cleaned every write.
const fileStatusprocess = require('../controller/readResultFile');
const config = require('../config/config');
const https = require('https');
const uuid = require('uuid-random');
async function jobProcesser() {
const iterator = (await fileStatusprocess.processResultFile("C:\Support\result.csv"))
console.log('Total jobs are',iterator[0].length);
var counter = 0 ;
for (i = 0; i < iterator[0].length; i++) {
counter++;
const formJobStatusURL = "https://localhost:8091/api/job/" + iterator[0][i] + "/status";
const option = {
method: 'GET' ,
headers: {
'X-Message-Created-Ts': `${new Date().toISOString()}`,
'X-Transaction-Created-Ts': `${new Date().toISOString()}`,
'X-User-Id': 'PerformanceExecuter',
'X-Client-Id': `${uuid()}`,
'X-Message-Id': `${uuid()}`,
'X-Transaction-Id': `${uuid()}`,
'Content-Type': 'application/json'
}
}
let content = '';
let reqGet = https.request(formJobStatusURL,option, function (response) {
response.on('data', function (data) {
content += data;
});
response.on('end', function () {
const jsonPayload = JSON.parse(content);
const jobStatusMetrics = {};
for ( var key in jsonPayload){
if(jsonPayload.hasOwnProperty(key)){
jobStatusMetrics.job_id = jsonPayload.id;
jobStatusMetrics.status = jsonPayload.status;
jobStatusMetrics.initiatedBy = jsonPayload.initiatedBy;
jobStatusMetrics.product = jsonPayload.product;
jobStatusMetrics.operation = jsonPayload.operation;
jobStatusMetrics.startTimestamp = jsonPayload.startTimestamp;
jobStatusMetrics.endTimestamp = jsonPayload.endTimestamp;
jobStatusMetrics.totalRecords = jsonPayload.file.totalRecords;
jobStatusMetrics.failedRecords = jsonPayload.file.totalFailedRecords;
jobStatusMetrics.sucessRecords = jsonPayload.file.totalSuccessRecords;
jobStatusMetrics.inprogressRecords = jsonPayload.file.totalInProgressRecords;
jobStatusMetrics.sucessStatus = jsonPayload.results.successFileAvailable;
jobStatusMetrics.failureStatus = jsonPayload.results.failureFileAvailable;
jobStatusMetrics.uploadJob = jsonPayload.actionsAvailable.dataUploadAllowed;
jobStatusMetrics.abortJob = jsonPayload.actionsAvailable.abortJob;
}
if ( counter%100 == 0) {
console.log('Writting to the database')
// logic for influxdb writter
}
else {
}
}
//console.log(jobStatusMetrics);
})
});
reqGet.end();
}
}
jobProcesser()
Rather to use https for http request please use another library like phin-retry. Please check the below code, added the logic. Hope this work.
const rp = require('phin-retry');
const fileStatusprocess = require('../controller/readResultFile');
const config = require('../config/config');
const uuid = require('uuid-random');
const influxDBWrite = require('../controller/influxdbWritter')
async function checkFileStatus() {
const iterator = (await fileStatusprocess.processResultFile("C:\Support\result.csv"));
const jobIds = iterator[0];
console.log(jobIds)
let jobDetailsList = [];
for (i = 0; i < jobIds.length; i++) {
console.log('JobId', i)
const jobId = jobIds[i];
const formJobStatusURL = "https://localhost:8091/api/job/" + jobId + "/status";
const jobDetails = await rp.get({
url: formJobStatusURL,
headers: {
'X-Message-Created-Ts': `${new Date().toISOString()}`,
'X-Transaction-Created-Ts': `${new Date().toISOString()}`,
'X-User-Id': 'PerformanceExecuter',
'X-Client-Id': `${uuid()}`,
'X-Message-Id': `${uuid()}`,
'X-Transaction-Id': `${uuid()}`,
'Content-Type': 'application/json'
}
});
jobDetailsList.push(jobDetails);
if ((i + 1) % 5 === 0) {
await saveResultsToInfux(jobDetailsList);
jobDetailsList = [];
}
}
await saveResultsToInfux(jobDetailsList);
}
async function saveResultsToInfux(jobDetailsList) {
const metrics = [];
for (i = 0; i < jobDetailsList.length; i++) {
console.log('Metrics', i)
const jobDetail = jobDetailsList[i];
const tags = {
Id: jobDetail.id,
Status: jobDetail.status.overall,
Product: jobDetail.product,
Entity: jobDetail.entity,
Operation: jobDetail.operation
}
const fields = {
startTimestamp:jobDetail.startTimestamp,
endTimestamp:jobDetail.endTimestamp,
totalRecords:jobDetail.file.totalRecords,
totalFailedRecords:jobDetail.file.totalFailedRecords,
totalSuccessRecords:jobDetail.file.totalSuccessRecords,
totalInProgressRecords:jobDetail.file.totalInProgressRecords,
successFileAvailable:jobDetail.results.successFileAvailable,
failureFileAvailable:jobDetail.results.failureFileAvailable,
dataUploadAllowed:jobDetail.actionsAvailable.dataUploadAllowed,
abortJob:jobDetail.actionsAvailable.abortJob,
}
metrics.push({
measurement: 'BulkData',
tags,
fields
});
}
influxDBWrite.dbWritter(metrics);
}
checkFileStatus()

Breaking functions down in Nodejs

In my attempt to become a better developer I'm trying to refactor the below Node code into 2 (or even 3) separate functions.
The code simply takes in a file, parses some of the data, and rewrites the parsed data to another file.
So my question is, can the code below be broken down to 2 functions (one function to read and parse, the other to write)? Can it be broken down even further to 3 functions (one to read and parse, one to write, one that calls the other two)?
I have attempted to refactor the code into 2 functions but am not having any luck
const neatCsv = require('neat-csv');;
const fs = require('fs');
const ObjectsToCsv = require('objects-to-csv');
fs.readFile('./someFile.csv', async (err, data) => {
if (err) {
console.error(err);
return;
}
const neat = await neatCsv(data);
const sortArray = neat.filter((obj) => obj.Type !== 'Name');
const priceSortArray = sortArray.filter((obj) => +obj.Price > 5);
const filterSortArray = priceSortArray.sort((a,b) => parseFloat(b.IV) - parseFloat(a.IV));
(async () => {
const csv = new ObjectsToCsv(filterSortArray);
// Save to file:
await csv.toDisk('./someOtherFile.csv');
})();
});
Please try below approach. I hope this is asked by you.
const neatCsv = require('neat-csv');;
const fs = require('fs');
const ObjectsToCsv = require('objects-to-csv');
const readFile = (filePath) => {
fs.readFile('./someFile.csv', async (err, data) => {
if (err) {
console.error(err);
return;
};
const neat = await neatCsv(data);
const sortArray = neat.filter((obj) => obj.Type !== 'Name');
const priceSortArray = sortArray.filter((obj) => +obj.Price > 5);
const filterSortArray = priceSortArray.sort((a,b) => parseFloat(b.IV) - parseFloat(a.IV));
saveFile(filterSortArray);
});
}
const saveFile = (filterSortArray) => {
const csv = new ObjectsToCsv(filterSortArray);
// Save to file:
await csv.toDisk('./someOtherFile.csv');
}
readFile();

How to set a value to a variable from a csv file (node.js)

I am trying to return a random value from a list of numbers stored in a csv file. However, when trying to parse the csv and set a value a promise is returned instead of a value.
const fs = require('fs')
async function getRemainingItem() {
await fs.readFileSync('remaining-items.csv', 'utf8', (err, data) => {
if (err) {}
let linesExceptFirst = data.split('\n').slice(1)
let num = Math.floor(Math.random() * linesExceptFirst.length)
let linesArr = linesExceptFirst.map(line=>line.split(','))
return linesArr[num][0]
});
}
let item = getRemainingItem()
console.log(item)
How can I return a random number from a csv that contains a list of numbers?
You are using the readFileSync method, which await is not needed with.
const fs = require('fs')
function getRemainingItem() {
const data = fs.readFileSync('remaining-items.csv', 'utf8');
const linesExceptFirst = data.split('\n').slice(1);
const num = Math.floor(Math.random() * linesExceptFirst.length);
const linesArr = linesExceptFirst.map(line=>line.split(','));
return linesArr[num][0];
}
const item = getRemainingItem();
console.log(item);
If you want the file to be read asynchronously and not block the execution process, then this can be done like this:
const fs = require('fs');
const fsp = fs.promises;
async function getRemainingItem() {
const data = await fsp.readFile('remaining-items.csv', 'utf8');
const linesExceptFirst = data.split('\n').slice(1);
const num = Math.floor(Math.random() * linesExceptFirst.length);
const linesArr = linesExceptFirst.map(line=>line.split(','));
return linesArr[num][0];
}
async function main() {
const item = await getRemainingItem();
console.log(item);
}
main();

How to return data from loop in order in node

I am creating a web scraper that scrapes all of the movies coming out for the next year from this site (https://www.imdb.com/movies-coming-soon/) and it loops through an array of links that contain all the movies for each month for the next year, its working but the only problem is that its not returning them in order due to node.js asynchronous behavior, how do i get it to loop through the array and return the data in order?
Ive tried to make a callback function but I don't know where it would go at
const request = require('request')
const cheerio = require('cheerio')
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ]
for (let i = 0; i < movieArray.length; i++) {
request.get('https://www.imdb.com' + movieArray[i] , (err, res, body) => {
if (!err && res.statusCode == 200) {
console.log(res.request.href)
const $ = cheerio.load(body)
//console.log(next)
$('h4').each((i, v) => {
const date = $(v).text()
console.log(date)
})
}
})
}
I'm expecting it to return the data in order instead of it being returned in a order based off how fast the data is returned due to nodes asynchronous behavior
It's a classic async issue in for loop as per explained https://lavrton.com/javascript-loops-how-to-handle-async-await-6252dd3c795/. Below would be the solution:
// const request = require('request')
const request = require('request-promise');
const cheerio = require('cheerio');
const movieArray = [
'/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/',
];
async function processMovieArray(array) {
for (const item of array) {
await getMovie(item);
}
console.log('Done');
}
async function getMovie(item) {
const options = {
method: `GET`,
uri: 'https://www.imdb.com' + item,
};
const response = await request(options);
const $ = cheerio.load(response.body);
$('h4').each((i, v) => {
const date = $(v).text();
console.log(date);
});
}
processMovieArray(movieArray);
The low tech way that deviates the least from your current code is to just use the index of your for loop to populate an array. Since let in the for loop will make a separate variable for i for each iteration of the for loop, we can use that index inside the async callback to reference the desired spot in a results array. Then, you also use a cntr to know when you've finished with all the results:
const request = require('request');
const cheerio = require('cheerio');
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ];
let results = [];
let cntr = 0;
for (let i = 0; i < movieArray.length; i++) {
request.get('https://www.imdb.com' + movieArray[i] , (err, res, body) => {
++cntr;
if (!err && res.statusCode == 200) {
console.log(res.request.href)
const $ = cheerio.load(body)
//console.log(next)
let textArray = [];
$('h4').each((i, v) => {
console.log(date)
textArray.push($(v).text());
});
results[i] = textArray;
}
if (cntr === moveArray.length) {
// all results are done now
let allResults = results.flat();
}
})
}
A bit more elegant way is to switch over to promises and let the promise infrastructure keep everything in order for you:
const rp = require('request-promise');
const cheerio = require('cheerio');
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ];
//
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
Promise.all(movieArray.map(path => {
return rp('https://www.imdb.com' + path).then(body => {
const $ = cheerio.load(body);
let textArray = [];
$('h4').each((i, v) => {
// console.log($(v).text());
textArray.push($(v).text());
});
return textArray;
}).catch(err => {
// ignore errors on urls that didn't work
// so we can get the rest of the results without aborting
console.log("err");
return undefined;
});
})).then(results => {
// flatten the two level array and remove empty items
let allResults = results.flat().filter(item => !!item);
console.log(allResults);
}).catch(err => {
console.log(err);
});
FYI, I tested the 2nd version in nodejs version 10.16.0 and it works.

How do your pipe to multiple streams? [duplicate]

This question already has answers here:
How to pipe one readable stream into two writable streams at once in Node.js?
(3 answers)
Closed 4 years ago.
I'm new to Node and I'm trying to download a file, hash it, then save it to disk using the hash as the file name. I'm getting correct hashes (I think) but the files are blank.
const fs = require("fs-extra")
const fetch = require("node-fetch")
const crypto = require('crypto')
const response = await fetch(url)
const sha256 = crypto.createHash("sha256")
sha256.setEncoding('hex')
response.body.pipe(sha256)
await new Promise(resolve => response.body.on("end", resolve))
sha256.end()
const hash = sha256.read()
const file = fs.createWriteStream(hash + ".jpg")
response.body.pipe(file)
My trick for resolving your problem is storing your file with unique name (using current timestamp as name), and then you can hash stream (from response) and rename it.
I've tested this code and it's working well:
const fs = require("fs-extra")
const path = require('path');
const fetch = require("node-fetch")
const crypto = require('crypto')
const downloadImage = async (url) => {
try {
const response = await fetch(url);
/** Store file with name current timestamp */
const filename = "IMG_" + Date.now() + ".jpg";
const dest = path.join(__dirname, filename);
response.body.pipe(fs.createWriteStream(dest))
/** Hash file */
const sha256 = crypto.createHash("sha256")
sha256.setEncoding('hex')
response.body.pipe(sha256)
await new Promise(resolve => response.body.on("end", resolve))
sha256.end()
const hash = sha256.read()
/** Rename file with hash value */
await fs.rename(dest, path.join(__dirname, hash + ".jpg"))
} catch (err) {
console.log(err);
}
}
const url = "https://i0.wp.com/wptavern.com/wp-content/uploads/2016/07/stack-overflow.png?ssl=1";
downloadImage(url);
But you can create function for hashing stream (response you received) as utility, this is my recommend for your code:
const fs = require("fs-extra")
const path = require('path');
const fetch = require("node-fetch")
const crypto = require('crypto')
function streamHash(stream, algorithm = 'md5') {
return new Promise((resolve, reject) => {
let shasum = crypto.createHash(algorithm);
try {
stream.on('data', function (data) {
shasum.update(data)
})
stream.on('end', function () {
const hash = shasum.digest('hex')
return resolve(hash);
})
} catch (error) {
return reject(error);
}
});
}
const downloadImage = async (url) => {
try {
const response = await fetch(url);
/** Store file with name current timestamp */
const filename = "IMG_" + Date.now() + ".jpg";
const dest = path.join(__dirname, filename);
response.body.pipe(fs.createWriteStream(dest))
/** Hash file */
const hash = await streamHash(response.body, 'sha256');
/** Rename file with hash value */
await fs.rename(dest, path.join(__dirname, hash + ".jpg"))
} catch (err) {
console.log(err);
}
}
const url = "https://i0.wp.com/wptavern.com/wp-content/uploads/2016/07/stack-overflow.png?ssl=1";
downloadImage(url);

Resources