Sails.js Skipper: How to read the uploaded file stream during upload? - node.js

I am using sails version 0.11 which comes bundled with skipper for file uploads. I need to upload big files of over 100 MB in size at least.
Now I don't want the file upload to complete before I start further processing of the file on server. I want to read the file stream while it is being uploaded. Here is how I am trying to achieve the same:
var csv = require("fast-csv");
bulk: function (req, res){
function parseEntry(entry){
return entry
}
var inputStream = req.file('employees');
var csvStream = csv();
inputStream.pipe(csvStream);
csvStream.on("data", function (data){
count++
// skip invalid records
var parsedData = parseEntry(data);
console.log(parsedData);
});
csvStream.on("end", function(){
// console.log("total time taken in minutes:", timeTaken);
return res.json(200, {"status": 1, "message": "successfully uploaded the file"});
});
}
But my logs only show up the end event and no data event is being captured. I read in the documentation that req.file("filename")
will return stream of file streams. But how do I access the particular file stream I need since I am only uploading a single file?

Just published skipper-csv ;)
npm install skipper-csv --save
Use it the same way you would use other adapters.
It uses csv-parse to do the actual parsing. You typically pass csv-parse options to the upload function. Additionally you pass the rowHandler function that is called for each parsed row. The fd (file descriptor) is also passed in case you are uploading multiple files at the same time. Here is an example:
req.file('files').upload({
adapter: require('skipper-csv'),
csvOptions: {delimiter: ',', columns: true},
rowHandler: function(row, fd){
console.log(fd, row);
}
}, function (err, files) {
if (err)
return res.serverError(err);
return res.json({
message: "Uploaded " + files.length + " CSV files!",
files: files
});
});

Related

How to write multiple streams for one file in Node?

Learning how to do large file manipulation with Node and streams I'm stuck in the middle of a file change when passing down the results to a module and I think the process is still in memory when it reaches another module.
I get a zip from an s3 bucket locally and unzip the contents:
try {
const stream = fs.createReadStream(zipFile).pipe(unzipper.Extract({ path }))
stream.on('error', err => console.error(err))
stream.on('close', async () => {
fs.removeSync(zipFile)
try {
const neededFile = await dir(path) // delete files not needed from zip, rename and return named file
await mod1(neededFile) // review file, edit and return info
await mod2(neededFile, data) // pass down data for further changes
return
} catch (err) {
console.log('error')
}
})
} catch (err) {
console.log('stream error')
}
Initial unzip I learned that there is a difference between stream on close and finish because I could pass the file to the first module and start the manipulation but the file, I guess due to the size, output and file never matched. After cleaning the files I dont need I pass the renamed file to mod1 for changes and run a write file sync:
mod1.js:
const fs = require('fs-extra')
module.exports = file => {
fs.readFile(file, 'utf8', (err, data) => {
if (err) return console.log(err)
try {
const result = data.replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
fs.writeFileSync(file, result)
} catch (err) {
console.log(err)
return err
}
})
}
when I tried to do the above with:
const readStream = fs.createReadStream(file)
const writeStream = fs.createWriteStream(file)
readStream.on('data', chunk => {
const data = chunk.toString().replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
writeStream.write(data)
})
readStream.on('end', () => {
writeStream.close()
})
the file would always be blank. After writeFileSync I proceed with the next module to search for a line ref:
mod2.js:
const fs = require('fs-extra')
module.exports = (file, data) => {
const parseFile = fs.readFileSync(file, 'utf8')
parseFile.split(/\r?\n/).map((line, idx) => {
if (line.includes(data)) console.log(idx + 1)
})
}
but the line number returned is that of the initial unzipped file not the file that was modded from the first module. Because I thought the sync process would be for the file it would appear the file being referenced is in memory? My search results for streams when learning about them:
Working with Node.js Stream API
Stream
How to use stream.pipe
Understanding Streams in Node.js
Node.js Streams: Everything you need to know
Streams, Piping, and Their Error Handling in Node.js
Writing to Files in Node.js
Error handling with node.js streams
Node.js Readable file stream not getting data
Node.js stream 'end' event not firing
NodeJS streams not awaiting async
stream-handbook
How should a file be manipulated after an unzip stream and why does the second module reference the file after it was unzipped and not when it was already manipulated? Is it possible to write multiple streams synchronously?

NodeJS: Download multiple files from GridFS and provide zip to the user on a button click

The front-end is written in ReactJS, more specifically grommet. There are multiple pdf files to be served to the user on clicking the Download button. The files are stored in GridFS. I wish to give the user a zipped folder which contains all these files. How can I achieve this?
Thanks in advance.
I have it!! Super simple solution with archiver. Worked at first time.
Note: I am using sails.js. DBFile is my Model.
const GridFsAdapter = require('../adapters/gridfs-adapter');
const archiver = require('archiver');
async function downloadMultiple (query, res, filename) {
// create a stream for download
const archive = archiver('zip', {
zlib: {level: 9} // Sets the compression level.
});
// catch warnings (ie stat failures and other non-blocking errors)
archive.on('warning', (err) => {
if (err.code === 'ENOENT') {
// log warning
sails.log.warn(err);
} else {
// throw error
throw err;
}
});
archive.on('error', (err) => {
throw err;
});
// set file name
res.attachment(filename);
// pipe the stream to response before appending files/streams
archive.pipe(res);
// add your streams
await DBFile
.stream(query)
// like mongoDBs cursor.forEach() function. Avoids to have all record in memory at once
.eachRecord(async (dbFile) => {
// get the stream from db
const {stream, data} = await GridFsAdapter().read(dbFile.fileId);
// append stream including filename to download stream
archive.append(stream, {name: data.filename});
});
// tell the download stream, you have all your files added
archive.finalize();
}

fs.createWriteStream, no such file or directory, open Nodejs

I want to save files that I am getting from another server on my server but the problem is when I am calling createWriteStream it giving me the error :
no such file or directory, open
E:\pathtoproject\myproject\public\profile_14454.jpg
Here is my code which is in E:\pathtoproject\myproject\modules\dowload.js :
request.head(infos.profile_pic, function(err, res, body) {
const completeFileName = '../public/profile_14454.' + res.headers['content-type'].split('/')[1];
var imageStream = fs.createWriteStream(completeFileName);
imageStream.on('open', function(fd) {
console.log("File open");
request(infos.profile_pic).pipe(imageStream).on('close', function(body) {
consoleLog('Profile pic saved');
console.log('This is the content of body');
console.log(body);
connection.query('UPDATE user set photo=? where id=?', [completeFileName, lastID], function(err, result, fields) {
if (err) {
consoleLog('Error while update the profile pic');
}
});
})
});
});
When I removed the directory ../public/ and leave only the name of the file
profile_14454.' + res.headers['content-type'].split('/')[1] , it worked but the file was saved in the root directory of the project (E:\pathtoproject\myproject\).
What's wrong in what I am doing? How can I have the file saved under public directory?
I am using nodeJS 8.9.4
I tried with my small code .
var fs = require("fs");
var data = 'Simply Easy Learning';
// Create a writable stream
var writerStream = fs.createWriteStream('./airo/output.txt');
// Write the data to stream with encoding to be utf8
writerStream.write(data,'UTF8');
// Mark the end of file
writerStream.end();
// Handle stream events --> finish, and error
writerStream.on('finish', function() {
console.log("Write completed.");
});
writerStream.on('error', function(err){
console.log(err.stack);
});
console.log("Program Ended");
My code is in this path E:\syed ayesha\nodejs\nodejs now I want to store my file in airo folder which is in this path. So I used one dot for storing. Hope this helps.

How to retrieve image file from Mongo using Sails.JS + GridFS?

I'm currently building web using Sails.js and got stuck in retrieving image file from GridFS. I have successfully uploading the file using skipper-gridfs to my mongo gridfs. I have no idea to display the file in the correct way (I'm new in Sails.js and Node system)
Here is my code for retrieving image file from gridfs looks like in FileController.js (I'm using gridfs-stream):
show: function (req, res, next) {
var mongo = require('mongodb');
var Grid = require('gridfs-stream');
var buffer="";
// create or use an existing mongodb-native db instance
var db = new mongo.Db('testDb', new mongo.Server("192.168.0.2", 27017), {safe:true});
var gfs = Grid(db, mongo);
// streaming from gridfs
var readstream = gfs.createReadStream({
filename: 'e1ecfb02-e095-4e2f.png'
});
//check if file exist
gfs.exist({
filename: 'e1ecfb02-e095-4e2f.png'
}, function (err, found) {
if (err) return handleError(err);
found ? console.log('File exists') : console.log('File does not exist');
});
//buffer data
readstream.on("data", function (chunk) {
buffer += chunk;
console.log("adsf", chunk);
});
// dump contents to console when complete
readstream.on("end", function () {
console.log("contents of file:\n\n", buffer);
});
}
When I ran it, the console showed nothing.
There is no error either.
How should I fix this?
Additional Question:
Is it better & easier to store/read file to/from local disk instead of using gridfs?
Am I correct in choosing gridfs-stream to retrieve the file form gridfs?
In the skipper-gridfs codes and there's a 'read' method that accept fd value and returns the required file corresponding to that value. So, you just have to pull that file from mongo by that method and send as a response. It should work file.
download: function (req, res) {
var blobAdapter = require('skipper-gridfs')({
uri: 'mongodb://localhost:27017/mydbname.images'
});
var fd = req.param('fd'); // value of fd comes here from get request
blobAdapter.read(fd, function(error , file) {
if(error) {
res.json(error);
} else {
res.contentType('image/png');
res.send(new Buffer(file));
}
});
}
I hope it helps :)
Additional Questions:
Yes, using gridfs is better both in performance and efficiency. And normally mongodb has a limitation of 16MB probably for binary files, but using gridfs you can store any size file, it breaks them in chunks and stores them.
Retrieving has been shown above.
You can now use skipper-gridfs in sails to manage uploads/downloads.
var blobAdapter = require('skipper-gridfs')({uri: 'mongodb://jimmy#j1mtr0n1xx#mongo.jimmy.com:27017/coolapp.avatar_uploads' });
Upload:
req.file('avatar')
.upload(blobAdapter().receive(), function whenDone(err, uploadedFiles) {
if (err) return res.negotiate(err);
else return res.ok({
files: uploadedFiles,
textParams: req.params.all()
});
});
Download
blobAdapter.read(filename, callback);
Bear in mind the file name will change once you upload it to mongo, you have to use the file name returned in the first response.

How can I convert uploaded file on-the-fly and stream it directly to amazon S3 within sails

I am working on an application that is sending a audio file to a server powered by sails.js.
I need to convert this audio file on-the-fly and send the converted data to amazon S3 using node streams.
I don't want to store data on the server but directly stream the upload file to S3 after it has been converted.
Do you know a way to do that?
I tried using formidable, but I couldn't get it working. Did anyone succeed implementing such a thing?
Thanks
EDIT
As jibsales noticed, it will probably be better if I show you a piece of what I've tried so far. So basically my strategy is to use formidable, fluent-ffmpeg and knox with streams.
I plan to receive the file stream with formidable and write chunks of received data in a first stream (stream 1) that will be the entry point for the conversion with fluent-ffmpeg. Then fluent-ffmpeg writes the output stream into stream2 which is the entry point of Knox.
The first problem I have to face, is the fact that formidable doesn't seem to work. However I'm not sure my strategy is good...
The code so far looks like this:
upload : function(req,res){
//to streams to transfer file data
var stream1 = new stream.Stream(); //stream for the incoming file data
var stream2 = new stream.Stream(); //stream for the converted file data
var client = knox.createClient({
key: 'APIKEY'
, secret: 'SECRET'
, bucket: 'bucket'
});
//Using formidable to acces data chunks
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files){ //form.parse is not called
if(err){
return res.json(err);
}else{
return res.send('ok');
}
});
//overriding form.onPart to get the file data chunk
form.onPart = function(part) {
sails.log('getting part...');
if (!part.filename) {
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function(chunk) {
stream1.write(chunk[1]);
});
}
form.on('error',function(err){
return sails.log(err);
});
form.on('progress', function(bytesReceived, bytesExpected) {
sails.log(bytesReceived);
});
//conversion process
var proc = new ffmpeg({ source : stream1})
.withAudioCodec('libfdk_aac')
.toFormat('mp3')
.writeToStream(stream2, {end:true}, function(retcode, error){
console.log('file has been converted succesfully');
});
client.putStream(stream2, '/file.mp3', headers, function(err, response){
return res.send(response);
});
},
The reason formidable isn't working is that the default Sails body parser is parsing the request before formidable can get to it. In order to get this to work, you'll have to bypass the Sails body parser for multi-part form uploads. So, in config/express.js:
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function(options) {
return function (req, res, next) {
if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart/form-data') === -1) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
This just skips the body parser entirely if the content-type header includes multipart/form-data. Otherwise, it executes the default express body parser. Note that the default Sails body parser is slightly fancier than what comes with Express (if it can't parse the request, it fakes an application/json header and retries), so if you want the extra goodness you'll have to copy/paste the code from the core in to the bodyParser function above. But in most cases you won't miss it ;-)
We're working on a better file parser for Sails that will hopefully take care of some of this for you, but in the mean time this is your best bet!
I've figured out a way to convert files on-the-fly with fluent-ffmpeg and formidable. However it seems impossible for the moment to stream the converted chunks comming out of ffmpeg conversion directly to amazon as you must precise the "Content-Length" header which is unknown during the conversion...
For the first part (client upload) I first had to disable express bodyParser on the upload route in a config/express.js file :
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function() {
return function (req, res, next){
console.log(req.path);
if (!(req.path === '/upload' && req.method === 'POST')) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
For the implementation I used a transform stream that does basically nothing. It just get the right parts of the uploaded data (the ones related to file data). It links formidable parser to fluent-ffmpeg. Then I can only save the converted file on the disk before sending it to amazon...
upload : function(req,res){
var Transform = Stream.Transform; //stream for the incoming file data
var client = knox.createClient({
key: 'KEY'
, secret: 'SECRET'
, bucket: 'BUCKET',
region : 'eu-west-1' //don't forget the region (My bucket is in Europe)
});
function InputStream(options)
{
if(!(this instanceof InputStream))
{
return new InputStream(options);
}
Transform.call(this,options);
return;
};
util.inherits(InputStream, Transform);
var inputDataStream = new InputStream;
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files)
{
if(err){
return res.send(err);
}else{
return;
}
});
form.onPart = function(part)
{
if (!part.filename)
{
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function (chunk)
{
if(!inputDataStream.write(chunk));
form.pause()
inputDataStream.once('drain', function(){form.resume()});
});
part.on('end', function (chunk){
inputDataStream.end(chunk);
});
}
InputStream.prototype._transform = function (chunk, enc, cb)
{
this.push(chunk);
cb();
}
var proc = new ffmpeg({ source : inputDataStream})
.withAudioBitrate('64k')
.withAudioCodec('libmp3lame')
.toFormat('mp3')
.saveToFile('file.mp3', function (retcode, error){
console.log('file has been converted successfully');
res.send('ok');
var upload = new MultiPartUpload(
{
client : client,
objectName: 'file.mp3',
file: 'file.mp3'
}, function(err,body){
if(err) {
console.log(err);
return;
}
console.log(body);
return;
});
});
},
EDIT
Using knox-mpu you can actually stream data to amazon s3 directly ! You just have to create another transform stream that will be the source of your upload and knox-mpu do the magic. Thanks you all!

Resources