I'm trying to download a .xlsx file from a website through web scraping, I've done the entire process until I access the temporary url that the website generates for the download.
When I open the file url in the browser, the download starts automatically (as shown in the image).
excel file download
The problem is that I need to parse this file to send later to my front-end. When I try to create the file using fs.createWriteStream('result.xlsx') and later populate it with res.pipe(fileStream); the file is always generated empty.
Here's my full code:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
const obj = xlsx.parse('result.xlsx');
callback(obj[0]);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
callback(filename);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
My questions are:
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? If yes, how?
If I can't parse the data without needing to populate a physical file, how can I download the spreadsheet and then read and parse the data later.
NOTE: I have already tested the rest of my download function with a valid file entered manually, everything is working perfectly. The only thing that isn't working is the data downloading and reading part of the spreadsheet.
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? Basically No (file from remote server). Except the server allowed you to do it live.
Your code is nearly right, except the order is wrong. You must callback after the writing is done, it will fix your empty file issue.
Here is how:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
const obj = xlsx.parse('result.xlsx');// or whatever you named it
callback(obj[0]);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
Related
Im trying to copy an png file from a folder to another folder in my nodeJs project.i have some problem. new Image file has some problem and cant open.
i use this code
const fs = require('fs');
var inStr = fs.createReadStream(mainDir+"/"+req.body.ExRequestId+".png");
var outStr = fs.createWriteStream(mainDir+"/"+docReq._id + ".png");
inStr.pipe(outStr);
When working with streams, it's good practice to wait for streams to become ready before using them and handle the errors.
The following snippet waits for the ready event on both streams before piping the ReadStream to the WriteStream and handles the errors.
// assuming you're using express and the app instance is bound to a variable named app
const fs = require('fs');
const path = require('path');
// ...
// helper function: returns a promise that gets resolved when the specified event is fired
const waitForEvent = (emitter, event) => new Promise((resolve, reject) => emitter.once(event, resolve));
app.post('/handle-post', async (req, res) => {
// TODO: validate the input
const { ExRequestId } = req.body;
const srcFileName = `${path.join(mainDir, ExRequestId)}.png`;
const destFileName = `${path.join(mainDir, docReq._id)}.png`;
const srcStream = fs.createReadStream(srcFileName);
await waitForEvent(srcStream, "ready");
const destStream = fs.createWriteStream(destFileName);
await waitForEvent(destStream, "ready");
const handleError = err => res.status(500).json(err);
srcStream.on("error", handleError);
destStream.on("error", handleError);
srcStream.pipe(destStream);
await waitForEvent(srcStream, 'end');
res.status(200).json({srcFileName, destFileName});
});
I also put together a minimal working example. It can be found here.
try with this code:
fs.readFile(sourcePath , function (err, data) {
if (err) throw err;
fs.writeFile(destinationPath , data , 'base64' , function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
});
I am using request js to download a file.
function requ(){
const options = {
uri: `api/tasks/${id}/attachments/${attachmentId}`
}
return rp.get(options)
}
My question is:
why piping to "res" like requ().pipe(res) works and returning the result of the request above using "send" like
requ().then((result)=>{
//here result is the file's representing string
res.send(result)
})
don't?
const fs = require('fs');
requ().then((result) => {
//here result is the file's representing string
const path = __dirname + '/tempFiles' + Date.now(); // a temporary file to send it
fs.writeFile(path, result, function(err) {
if(err) throw err;
return res.sendFile(path);
})
});
Read More About fs, link 2
My file was being corrupted because request was converting the response body to utf8. Using:
const options = {
uri: `api/tasks/${id}/attachments/${attachmentId}`,
encoding:null
}
fixed the problem
My issue is this:
I have made a call to someones web service. I get back the file name, extension and the "bytes". Bytes actually come in as an array and at position 0 "Bytes[0]" is the following string:
JVBERi0xLjYKJeLjz9MKMSAwIG9iago8PC9EZWNvZGVQYXJtczw8L0sgLTEvQ29sdW1ucyAyNTUwL1Jvd3MgMzMwMD4+L1R5cGUvWE9iamVjdC9CaXRzUGVyQ29tcG9uZW50IDEvU3VidHlwZS9JbWFnZS9XaWR0aCAyNTUwL0NvbG9yU3BhY2UvRGV2aWNlR3JheS9GaWx0ZXIvQ0NJVFRGYXhEZWNvZGUvTGVuZ3RoIDI4Mzc0L0hlaWdodCAzMzAwPj5zdHJlYW0K////////y2IZ+M8+zOPM/HzLhzkT1NAjCCoEY0CMJNAjCR4c8HigRhBAi1iZ0eGth61tHhraTFbraRaYgQ8zMFyGyGM8ZQZDI8MjMI8M6enp9W6enp+sadIMIIEYwy/ggU0wwgwjWzSBUmwWOt/rY63fraTVNu6C7R7pN6+v///20v6I70vdBaPjptK8HUQfX9/17D/TMet+l06T//0v3/S9v+r98V0nH///7Ff+Ed3/v16X9XX/S/KP0vSb//W88ksdW18lzBEJVpPXT0k9b71///...
The string example above has been cut off for readability.
How do I take that string and save it as a readable file?
This case it's a pdf.
let pdfBytes = '{String shown above in example}'
You can use the Node.js File System Module to save the received buffer.
Assuming the encoding of your data is base64:
const fs = require('fs');
let pdfBytes = 'JVBERi0xLjYKJeLjz9...'
let writeStream = fs.createWriteStream('filename.pdf');
writeStream.write(pdfBytes, 'base64');
writeStream.on('finish', () => {
console.log('saved');
});
writeStream.end();
I am using the fs file system here to create and save the file. I use a lot of try catch in case anything goes wrong. This example shows how you could pass the data to a function that could then create the file for you.
const util = require('util');
const fs = require('fs');
const fsOpen = util.promisify(fs.open);
const fsWriteFile = util.promisify(fs.writeFile);
const fsClose = util.promisify(fs.close);
function saveNewFile(path, data) {
return new Promise((async (resolve, reject) => {
let fileToCreate;
// Open the file for writing
try {
fileToCreate = await fsOpen(path, 'wx');
} catch (err) {
reject('Could not create new file, it may already exist');
return;
}
// Write the new data to the file
try {
await fsWriteFile(fileToCreate, data);
} catch (err) {
reject('Error writing to new file');
return;
}
// Close the file
try {
await fsClose(fileToCreate);
} catch (err) {
reject('Error closing new file');
return;
}
resolve('File created');
}));
};
// Data we want to use to create the file.
let pdfBytes = 'JVBERi0xLjYKJeLj...'
saveNewFile('./filename.pdf', pdfBytes);
I'm trying to upload a large (8.3GB) video to my Node.js (Express) server by chunking using busboy. How to I receive each chunk (busboy is doing this part) and piece it together as one whole video?
I have been looking into readable and writable streams but I'm not ever getting the whole video. I keep overwriting parts of it, resulting in about 1 GB.
Here's my code:
req.busboy.on('file', (fieldname, file, filename) => {
logger.info(`Upload of '${filename}' started`);
const video = fs.createReadStream(path.join(`${process.cwd()}/uploads`, filename));
const fstream = fs.createWriteStream(path.join(`${process.cwd()}/uploads`, filename));
if (video) {
video.pipe(fstream);
}
file.pipe(fstream);
fstream.on('close', () => {
logger.info(`Upload of '${filename}' finished`);
res.status(200).send(`Upload of '${filename}' finished`);
});
});
After 12+ hours, I got it figured out using pieces from this article that was given to me. I came up with this code:
//busboy is middleware on my index.js
const fs = require('fs-extra');
const streamToBuffer = require('fast-stream-to-buffer');
//API function called first
uploadVideoChunks(req, res) {
req.pipe(req.busboy);
req.busboy.on('file', (fieldname, file, filename, encoding, mimetype) => {
const fileNameBase = filename.replace(/\.[^/.]+$/, '');
//save all the chunks to a temp folder with .tmp extensions
streamToBuffer(file, function (error, buffer) {
const chunkDir = `${process.cwd()}/uploads/${fileNameBase}`;
fs.outputFileSync(path.join(chunkDir, `${Date.now()}-${fileNameBase}.tmp`), buffer);
});
});
req.busboy.on('finish', () => {
res.status(200).send(`Finshed uploading chunk`);
});
}
//API function called once all chunks are uploaded
saveToFile(req, res) {
const { filename, profileId, movieId } = req.body;
const uploadDir = `${process.cwd()}/uploads`;
const fileNameBase = filename.replace(/\.[^/.]+$/, '');
const chunkDir = `${uploadDir}/${fileNameBase}`;
let outputFile = fs.createWriteStream(path.join(uploadDir, filename));
fs.readdir(chunkDir, function(error, filenames) {
if (error) {
throw new Error('Cannot get upload chunks!');
}
//loop through the temp dir and write to the stream to create a new file
filenames.forEach(function(tempName) {
const data = fs.readFileSync(`${chunkDir}/${tempName}`);
outputFile.write(data);
//delete the chunk we just handled
fs.removeSync(`${chunkDir}/${tempName}`);
});
outputFile.end();
});
outputFile.on('finish', async function () {
//delete the temp folder once the file is written
fs.removeSync(chunkDir);
}
});
}
Use streams
multer allow you to easily handle file uploads as part of an express route. This works great for small files that don’t leave a significant memory footprint.
The problem with loading a large file into memory is that you can actually run out of memory and cause your application to crash.
use multipart/form-data request. This can be handled by assigning the readStream to that field instead in your request options
streams are extremely valuable for optimizing performance.
Try with this code sample, I think it will work for you.
busboy.on("file", function(fieldName, file, filename, encoding, mimetype){
const writeStream = fs.createWriteStream(writePath);
file.pipe(writeStream);
file.on("data", data => {
totalSize += data.length;
cb(totalSize);
});
file.on("end", () => {
console.log("File "+ fieldName +" finished");
});
});
You can refer this link also for resolve this problem
https://github.com/mscdex/busboy/issues/143
I think multer is good with this, did you try multer?
I am trying to write an app that uploads files to an ftp server in node.js using the npm module ftp. I have a file, foo.txt, whose content is a single line: "This is a test file to upload via ftp." My code is:
var Client = require("ftp");
var fs = require("fs");
var connection = require("./connections.js");
var c = new Client();
const ftpFolder = "./files/";
var fileList = [];
fs.readdir(ftpFolder, (err, files) => {
if(err) {
console.log(err);
} else {
files.forEach(file => {
console.log(file);
fileList.push(file);
});
}
console.log(fileList);
});
c.on("ready", function(){
fileList.forEach(file => {
c.put(file, "/backups/" + file, function(err){
if(err){
console.log(err);
} else {
console.log(file + " was uploaded successfully!");
}
c.end();
});
});
});
// Connect to ftp site
c.connect(connection.server_ftp);
I see the file foo.txt on the ftp server, but when I open it the contents are: "foo.txt". It appears to have written the name of the file to the file rather than uploading it. Any guidance would be appreciated!
When you read a directory, it gives you a list of files. It doesn't read the contents of the file, it just lists the names of the files in the dir.
You will need to use this file name to create a path to read the file from.
const path = require('path')
let filePath = path.join(ftpFolder, file)
let fileContents = fs.readFile(path, 'utf8', (err, data) => {
// do the upload
})
As a side note... While your directory reading may work, you should consider reading the directory after the connection is established. Otherwise, there is a chance you will see it fail because it's a race condition between the client connection and the directory read. You may need to read a directory with so many files that it resolves AFTER the client connects.
You could nest the callbacks, but another way to handle this is Promises. You can kick off both async methods at the same time, and handle the results when both have resolved
var filesPromise = new Promise((resolve, reject) => {
fs.readdir(ftpFolder, (err, files) => {
if(err) reject(err)
else resolve(files)
})
})
var connectionPromise = new Promise((resolve, reject) => {
c.on("ready", () => { resolve(c) }
c.connect(connection.server_ftp)
})
Promise.all([filesPromise, connectionPromise], results => {
results[0] // files
results[1] // client
}).catch(err => {console.error(err)})
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
In my case which is very similar, I put the filename instead of the full path to file in c.put . From your code I think it is the same.