I'm trying to download a .xlsx file from a website through web scraping, I've done the entire process until I access the temporary url that the website generates for the download.
When I open the file url in the browser, the download starts automatically (as shown in the image).
excel file download
The problem is that I need to parse this file to send later to my front-end. When I try to create the file using fs.createWriteStream('result.xlsx') and later populate it with res.pipe(fileStream); the file is always generated empty.
Here's my full code:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
const obj = xlsx.parse('result.xlsx');
callback(obj[0]);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
callback(filename);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
My questions are:
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? If yes, how?
If I can't parse the data without needing to populate a physical file, how can I download the spreadsheet and then read and parse the data later.
NOTE: I have already tested the rest of my download function with a valid file entered manually, everything is working perfectly. The only thing that isn't working is the data downloading and reading part of the spreadsheet.
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? Basically No (file from remote server). Except the server allowed you to do it live.
Your code is nearly right, except the order is wrong. You must callback after the writing is done, it will fix your empty file issue.
Here is how:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
const obj = xlsx.parse('result.xlsx');// or whatever you named it
callback(obj[0]);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
I was working on a program which fetches the data from Google Trends API using Node.js, I am getting the desired data, but not in the correct format. I want to do JSON parsing so that I can get data in the correct format.
My code:
//This program is fetching data from google trend api for a particular job title
var googleTrends = require('google-trends-api');
var fs = require('fs');
fs.readFile('keywords.txt', 'utf8', function (err,data) {
if (err) {
return console.log(err);
}
data = data.toString().split("\n");
recur(0, data);
});
function recur(index, data){
if (index < data.length){
var keyword = data[index].split(",");
console.log(keyword);
googleTrends.interestByRegion({keyword:keyword,
geo:'US',
resolution:'state'
})
.then(function(results ){
console.log(results+ "\n");
console.log("***********************************************************************************************");
index = index+1;
recur(index, data);
})
.catch(function(err){
console.error('We have an error!', err);
});
}
}
Output :-
[ 'House Cleaning' ]
{"default":{"geoMapData":[{"geoCode":"US-CO","geoName":"Colorado","value":[100],"formattedValue":["100"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-AZ","geoName":"Arizona","value":[96],"formattedValue":["96"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-WA","geoName":"Washington","value":[95],"formattedValue":["95"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-KS","geoName":"Kansas","value":[93],"formattedValue":["93"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-NV","geoName":"Nevada","value":[93],"formattedValue":["93"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-ID","geoName":"Idaho","value":[93],"formattedValue":["93"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-RI","geoName":"Rhode
Island","value":[87],"formattedValue":["87"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-UT","geoName":"Utah","value":[86],"formattedValue":["86"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-OR","geoName":"Oregon","value":[85],"formattedValue":["85"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-OK","geoName":"Oklahoma","value":[83],"formattedValue":["83"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-NC","geoName":"North
Carolina","value":[83],"formattedValue":["83"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-TN","geoName":"Tennessee","value":[80],"formattedValue":["80"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-CA","geoName":"California","value":[79],"formattedValue":["79"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-GA","geoName":"Georgia","value":[78],"formattedValue":["78"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-TX","geoName":"Texas","value":[78],"formattedValue":["78"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-MO","geoName":"Missouri","value":[77],"formattedValue":["77"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-MT","geoName":"Montana","value":[77],"formattedValue":["77"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-NH","geoName":"New
Hampshire","value":[76],"formattedValue":["76"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-SC","geoName":"South
Carolina","value":[76],"formattedValue":["76"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-AL","geoName":"Alabama","value":[75],"formattedValue":["75"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-MA","geoName":"Massachusetts","value":[75],"formattedValue":["75"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-FL","geoName":"Florida","value":[75],"formattedValue":["75"],"maxValueIndex":0,"hasData":[true]},{"geoCode":"US-AR","geoName":"Arkansas","value":
I am reading a csv file, then writing another in the format I need. console.log shows the data i need but the file I create shows [object, Object] for each line.
I am not sure why the console.log shows the correct data but the file does not. I've read over the Node documentation but I cannot figure this out. Any information you can provide is appreciated.
this is what console.log shows
var fs = require("fs");
var parse = require('csv-parse');
//read file
var inputFile = 'EXAMPLE_UPSdailyShipment.csv';
fs.createReadStream('EXAMPLE_UPSdailyShipment.csv', "utf8", function read(err, data) {
if (err) {
throw err;
}
content = data;
});
var arr = [];
//data for new file
var parser = parse({
delimiter: ","
}, function(err, data) {
data.forEach(function(column) {
// create line object, data from parsed fields, PackageReference1 is order#
var line = {
"BuyerCompanyName": " ",
"OrderNumber": column[8],
"ShipperName": "UPS",
"TrackingNumber": column[9],
"DateShipped": column[0],
"ShipmentCost": column[2],
};
arr.push(line);
});
console.log(arr);
fs.writeFileSync("newtemp3.csv", arr, 'utf8');
console.log("file read and new file created")
});
fs.createReadStream(inputFile).pipe(parser);
I think you just need to stringify the data first:
fs.writeFileSync("newtemp3.csv", JSON.stringify(arr), 'utf8');
Hopefully this solves your problem.
I am using request js to download a file.
function requ(){
const options = {
uri: `api/tasks/${id}/attachments/${attachmentId}`
}
return rp.get(options)
}
My question is:
why piping to "res" like requ().pipe(res) works and returning the result of the request above using "send" like
requ().then((result)=>{
//here result is the file's representing string
res.send(result)
})
don't?
const fs = require('fs');
requ().then((result) => {
//here result is the file's representing string
const path = __dirname + '/tempFiles' + Date.now(); // a temporary file to send it
fs.writeFile(path, result, function(err) {
if(err) throw err;
return res.sendFile(path);
})
});
Read More About fs, link 2
My file was being corrupted because request was converting the response body to utf8. Using:
const options = {
uri: `api/tasks/${id}/attachments/${attachmentId}`,
encoding:null
}
fixed the problem
the nodejs file is on Heroku and I'm using PostgreSQL as a Database
I would like to export a csv file from a view :
// My PostgreSQL query
var copyTo = require('pg-copy-streams').to;
var csv = require('csv');
var fs = require('fs');
var stream = client.query(copyTo('COPY (SELECT * FROM v_metric) TO
STDOUT WITH CSV HEADER DELIMITER as \'|\''));
// export in csv
csv().from(stream.pipe(process.stdout,{ end: false)).to(fs.createWriteStream('sample.csv'))
I dont have any result, sample.csv is empty.
any idea?
thank you in advance
You can use pg-copy-streams npm module made specifically for this:
let data = '', copyTo = require('pg-copy-streams').to;
pool.connect(function(pgErr, client, done) {
if(pgErr) {
//handle error
return;
}
var stream = client.query(copyTo(`COPY (${query}) TO STDOUT With CSV HEADER`));
stream.on('data', chunk => {
data += chunk;
})
stream.on('end', response => {
console.log(data)
done();
});
stream.on('error', err => {
done();
})
})
Hope this helps.
The SQL Server's STDOUT will be different than your node processes's. You could try streaming the query results:
var query = client.query('SELECT * FROM v_metric');
query.on('row', function(row) {
//handle the row here
});
Then in your handler if your data isn't complicated (i.e. no delimiters or double quotes) you could skip using csv and iterate over the columns to convert them into a string that you write to the write stream. May be easier to have the column names in an array which you can pass as the SQL (via joining with ', ') and iterate over in the handler, but you could also extract the column names using Object.keys(row).
UPDATE: Example based on your comment:
var columns = ['country_cd','product_name','product_lvel','month_year','metric_name','val'];
var ws = fs.createWriteStream('sample.csv');
var query = client.query('SELECT '+columns.join(', ')+' FROM users');
query.on('row', function(row) {
var values = [];
// process column values; if you need to do special formatting (i.e. dates) don't loop and instead handle each one specially
columns.forEach(function(col) {
values = row[col];
});
ws.write(values.join('| '));
});
query.on('end', function(result) {
ws.close();
});
If you do want to use csv you can create a stream that you write to write data to in the handler and pip that to csv.
One other note, , is the default delimter so if you want to use something else, like |, you will need to specify that in the options.