I am reading a large zip file(500MB) from a URL using request.get(url).File contains one CSV file which is pretty huge in size. I am reading the response and writing the filestream into a zip file using fs.createWriteStream(zipFile). On close event of the fs.createWriteStream I have tried using adm-zip file with which i got "error invalid or unsupported zip format. no end header found" and with Unzipper npm package I am getting "invalid signature unzip ". Below is the code
const request = require('superagent');
const fs = require('fs');
const unzip = require('unzipper');
request.get(url).on('error', function(err) { console.log(err) }
.pipe(fs.createWriteStream(zipFile))
.on('close', function() {
const admZip = require('adm-zip');
console.log('start unzip');
var zip = new admZip(zipFile);
console.log('unzipping ' + uploadDir + "to");
zip.extractAllTo(uploadDir, true);
console.log('finished unzip');
with Unzipper
request.get(url)
.on('error', function(err) { console.log(err) }
.pipe(fs.createWriteStream(zipFile))
.on('close', function() {
fs.createReadStream(zipFile)
.pipe(unzipper.Extract({ path: UploadDir }));`enter code here`
})
Error is resolved.
First step is to pipe the incoming readable response stream.
request.get(urlData)
.pipe(writeStream);
Once the read is done pipe will trigger the write stream. Then I am triggering the unzipping process on close event of the writestream.
writeStream.on('close', function() {
fs.createReadStream(zipFile).pipepipe(unzip.Extract({
path: uploadDir
}));
console.log('finished unzip');
});
Related
i am using json2csv package for downloading my data it is working fine for small data. but if the records are more then 300 values it gets crashed. here is my code
const csvString = json2csv.parse(responseData);
res.setHeader('Content-disposition', 'attachment; filename=shifts-report.csv');
res.set('Content-Type', 'text/csv');
res.status(200).send(csvString);
this code is working perfectly fine on small data how can i stream data when there is large amount of data using the same approach that i followed.
i am trying something like this but it gives me an error that cannot set the headers.
const headers = {
'Content-type': 'text/csv',
'Transfer-Encoding': 'chunked',
'Content-Disposition': 'attachment; filename="file.csv"'
};
res.writeHead(200, headers);
res.flushHeaders();
const stream = new Writable({
write(chunk, encoding, callback) {
res.write(chunk);
callback();
}
});
try {
stream.write(file, 'utf-8');
} catch (error) {
console.log('error', error);
}
}
res.end();
You should use Json2CSV stream instead.
npm install json2csv-stream
const fs = require('fs');
const MyStream = require('json2csv-stream');
// create the one-time-use transform stream
const parser = new MyStream();
// create the read and write streams
const reader = fs.createReadStream('data.json');
const writer = fs.createWriteStream('out.csv');
//You can use writer to write it to the FS
// or can stream the content to the response object
// however, you need to write this code in any route handler
// which is sending the CSV data back to the user
reader.pipe(parser).pipe(res);
//reader.pipe(parser).pipe(writer);
For more details check here.
I've been trying to build an update system in my webapp to download and parse some data automatically from this location. When I access the site manually in my browser I have no problem downloading and opening the zip file there, but when I try this implementation (from a similar question) I get a zero-KB file that circularly unzips into a .cpgz and back again:
var http = require('http');
var fs = require('fs');
exports.downloader = function(url, filePath, callback) {
var file = fs.createWriteStream(filePath);
var request = http.get(url, function(res) {
res.pipe(file);
file.on('finish', function() {
console.log('downloaded!');
file.close(callback);
});
}).on('error', function(err) {
fs.unlink(filePath);
if (callback) callback(err.message);
});
};
I don't have much experience working with streams or downloading files directly - does this particular site handle download requests abnormally? Is the .cpgz anomaly just because the write stream is creating an empty file?
I am uploading a file using NodeJS. My requirement is to read the stream into a variable so that I can store that into AWS SQS. I do not want to store the file on disk. Is this possible? I only need the uploaded file into stream. The code I am using is(upload.js):
var http = require('http');
var Busboy = require('busboy');
module.exports.UploadImage = function (req, res, next) {
var busboy = new Busboy({ headers: req.headers });
// Listen for event when Busboy finds a file to stream.
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
// We are streaming! Handle chunks
file.on('data', function (data) {
// Here we can act on the data chunks streamed.
});
// Completed streaming the file.
file.on('end', function (stream) {
//Here I need to get the stream to send to SQS
});
});
// Listen for event when Busboy finds a non-file field.
busboy.on('field', function (fieldname, val) {
// Do something with non-file field.
});
// Listen for event when Busboy is finished parsing the form.
busboy.on('finish', function () {
res.statusCode = 200;
res.end();
});
// Pipe the HTTP Request into Busboy.
req.pipe(busboy);
};
How do I get the uploaded stream?
On busboy 'file' event you get parameter named 'file' and this is a stream so you can pipe it.
For example
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
file.pipe(streamToSQS)
}
I hope that will help you.
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
var filename = "filename";
s3Helper.pdfUploadToS3(file, filename);
}
busboy.on('finish', function () {
res.status(200).json({ 'message': "File uploaded successfully." });
});
req.pipe(busboy);
While the current and existing arguments assume one could actually just send the stream (file) off to something that can receive the stream, the actual chunks are received in the file callback methods you implemented.
From the docs: (https://www.npmjs.com/package/busboy)
file.on('data', function(data) {
// data.length bytes seems to indicate a chunk
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
Update:
Found the constructor docs, second argument is a readable stream.
file(< string >fieldname, < ReadableStream >stream, < string >filename, < string >transferEncoding, < string >mimeType) - Emitted for each new file form field found. transferEncoding contains the 'Content-Transfer-Encoding' value for the file stream. mimeType contains the 'Content-Type' value for the file stream.
I'm using NodeJS and Multer to upload files to S3.
On the surface, everything appears to be working, the files get uploaded, and I can see them in the bucket when I log into the AWS console. However, most of the time when I follow the link to the file, the file is broken, often the file size is much smaller than the original file.
When the file reaches the server, the file size is correct if I log it, but on S3 it is much smaller. For example I just uploaded a file which is 151kb. The post request logs the file size correctly, but on S3 the file says it's 81kb.
Client side:
uploadFile = (file) ->
formData = new FormData()
formData.append 'file', file
xhr = new XMLHttpRequest()
xhr.open "POST", "/upload-image", true
# xhr.setRequestHeader("Content-Type","multipart/form-data");
console.log 'uploadFile'
xhr.onerror = ->
alert 'Error uploading file'
xhr.onreadystatechange = ->
if xhr.readyState is 4
console.log xhr.responseText
xhr.send formData
Server:
app.use(multer({ // https://github.com/expressjs/multer
inMemory: true,
limits : { fileSize:3000000 },
rename: function (fieldname, filename) {
var time = new Date().getTime();
return filename.replace(/\W+/g, '-').toLowerCase() + '_' + time;
},
onFileUploadData: function (file, data, req, res) {
var params = {
Bucket: creds.awsBucket,
Key: file.name,
Body: data,
ACL: 'public-read'
};
var s3 = new aws.S3();
s3.putObject(params, function (perr, pres) {
if (perr) {
console.log("Error uploading data: ", perr);
} else {
console.log("Successfully uploaded data", pres);
}
});
}
}));
app.post('/upload-image', function(req, res){
if (req.files.file === undefined){
res.end("error, no file chosen");
} else if (req.files.file.truncated) {
res.end("file too large");
} else {
console.log(req.files.file.size); //logs the correct file size
var path = creds.awsPath + req.files.file.name;
res.type('text/plain');
res.write(path);
res.end();
};
});
EDIT:
Setting file.buffer to the body perma onFileUploadComplete seems to work, but I have a feeling that this isn't the proper way of doing things, and may come back to bite me later. Is this approach okay, or are there issues I should be aware of doing this?
I'm trying to upload files to a server using node.js as backend and angular.js as frontend. I'm using express 4 + busboy for this. I have a table in the frontend where I should display all the files I'm uploading. So if I have 3 files and click on upload, angular should post these files to node.js and after getting the response back, refresh the table with those three files.
This is the function I'm using in angular:
function uploadFiles(files){
var fd = new FormData();
for(var i = 0; i<files.length; i++){
fd.append("file", files[i]);
}
$http.post('http://localhost:3000/upload', fd, {
withCredentials: false,
headers: {'Content-Type': undefined },
transformRequest: angular.identity
}).success(refreshTable()).error(function(){
console.log("error uploading");
});
}
and this is from node.js:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
var fstream = fs.createWriteStream('./files/' + filename);
file.pipe(fstream);
});
busboy.on('finish', function(){
res.writeHead(200, { 'Connection': 'close' });
res.end("");
});
return req.pipe(busboy);
});
the problem is that if I upload three files, as soon as the first file has been uploaded node.js sends the response and hence the table is updated only with the first file uploaded, if I refresh the page, the rest of the files appear.
I think the problem is with this line in node: return req.pipe(busboy); if I remove that line, the post response keeps on pending for a long time and nothing happens, I think this is an async problem, anybody knows if there's a way to send the response back only when all files have been uploaded?
thanks
A simple and common solution to this particular problem is to use a counter variable and listening for the finish event on the fs Writable stream. For example:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
var files = 0, finished = false;
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
++files;
var fstream = fs.createWriteStream('./files/' + filename);
fstream.on('finish', function() {
if (--files === 0 && finished) {
res.writeHead(200, { 'Connection': 'close' });
res.end("");
}
});
file.pipe(fstream);
});
busboy.on('finish', function() {
finished = true;
});
return req.pipe(busboy);
});
The reason for this is that busboy's finish event is emitted once the entire request has been fully processed, that includes files. However, there is some delay between when there is no more data to write to a particular file and when the OS/node has flushed its internal buffers to disk (and the closing of the file descriptor). Listening for the finish event for a fs Writable stream lets you know that the file descriptor has been closed and no more writes are going to occur.