I'm using the Busboy to parse multipart/form-data in my server, and I want to store each file in a Buffer without automatically convert to utf8. Is it possible?
const result = { files: [] }
const busboy = new Busboy({
headers: req.headers
})
busboy.on('file', (fieldname, file, filename, encoding, mimetype) => {
const temp = {}
file.on('data', (data) => {
temp.file += data
})
file.on('end', () => {
temp.filename = filename
temp.contentType = mimetype
result.files = [...result.files, temp]
})
})
busboy.on('field', (fieldname, value) => {
result[fieldname] = value
})
busboy.on('error', (error) => {
console.error(error)
})
Currently the file.on('data') doesn't work properly, I'm loosing information because the operation += automatically converts the buffer to utf8.
You can set temp.file to be an array instead of a string and concat the buffer array in the end.
busboy.on('file', (fieldname, file, filename, encoding, mimetype) => {
const temp = {file: []}
file.on('data', (data) => {
temp.file.push(data)
})
file.on('end', () => {
temp.file = Buffer.concat(temp.file)
temp.filename = filename
temp.contentType = mimetype
result.files = [...result.files, temp]
})
})
Related
I am trying to upload files through a lambda function and request/response is working fine. Problem is each uploaded file is missing some data so the uploaded file is corrupted. e.g. if I try to upload a 5 Kb file, only 4.5 Kb is getting uploaded. This is confirmed with size variable from logs.
parseMultipart = async (event) => {
return new Promise((resolve, reject) => {
const parsedForm = {};
const bb = new busboy({
headers: {
'content-type': event.headers['Content-Type'] || event.headers['content-type']
}
});
bb.on('file', function (fieldname, file, filename, encoding, mimetype) {
var bufs = [];
var size = 0;
file
.on('data', async (data) => {
//bufs[bufs.length] = data;
await bufs.push(data);
size += data.length;
console.log('size:' + size);
})
.on('end', async () => {
console.log('size in end:' + size);
parsedForm[fieldname] = {
data: Buffer.concat(bufs),
filename: filename,
encoding: encoding,
mimetype: mimetype
};
});
})
.on('field', (fieldname, val) => {
parsedForm[fieldname] = val
})
.on('finish', async () => {
console.log("in finish:");
await resolve(parsedForm);
})
.on('close', () => {
console.log("in close");
resolve(parsedForm);
})
.on('error', error => reject(error))
bb.write(event.body, event.isBase64Encoded ? 'base64' : 'binary');
bb.end();
})
}
What is it that I am missing or doing differently? I have already checked relevant questions on SO for busboy.
I am currently trying to develop a google cloud function to parse multipart files (excel format or csv) in order to populate the firestore database.
I am using busboy in a helper function to parse the file, convert it to json and return it to the main function.
Everything goes well until I am trying to return the parsed data. I thought the most logic way of doing was to return the data from the busboy 'finish' event but it seems not to return the data as once back in the main function it is undefined. I first thought of some issue related to asynchronous code execution but when I tried to only print the data in the busboy finish event it worked properly.
I've tried to find some related content online but unfortunately didnt success. Here is my helper function :
// Takes a multipart request and sends back redable data
const processRequest = (req) => {
const busboy = Busboy({headers: req.headers});
formats = ['application/vnd.ms-excel', 'text/csv', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'];
var finalData;
// fieldname is the request key name of the file
// file is the stream
// fname is the name of the fileq
busboy.on('file', (fieldname, file, fname) => {
// Checks if file is right format
if(!formats.includes(fname.mimeType)) throw new FileFormatError('File must be excel or csv');
bytes = [];
// Checks that the request key is the right one
if(fieldname == 'file') {
// Data is the actual bytes, adds it to the buffer each time received
file.on('data', (data) => {
bytes.push(data);
});
// Concatenates the bytes into a buffer and reads data given mimetype
file.on('end', async () => {
buffer = Buffer.concat(bytes);
if(fname.mimeType === 'application/vnd.ms-excel' ||
fname.mimeType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') {
workbook = XLSX.read(buffer, {type: 'buffer'});
json = excelToJson(workbook);
console.log(json);
finalData = json;
}
if (fname.mimeType === 'text/csv') {
var csv = [];
const stream = Readable.from(buffer.toString());
stream.pipe(CSV.parse({delimiter: ','}))
.on('error', (err) => {
console.log('csv parsing error');
console.log(err.message);
}).on('data', (row) => {
csv.push(row);
}).on('end', () => {
console.log('csv file properly processed');
console.log(csv);
// CSV PARSING LOGIC TO COME, JUST TESTING RIGHT NOW
finalData = csv;
});
}
});
}
});
busboy.on('finish', () => {
console.log('busboy finished');
return finalData;
// WHEN ONLY PRINTED THE DATA IS PRESENT AND DISPLAYS PROPERLY HERE
})
// Processes request body bytes
busboy.end(req.rawBody);
}
There must be something I am misunderstanding but as of yet I cannot point out what.
Thanks in advance for your time :)
You're not waiting for your CSV parsing to actually finish.
It would be better to refactor your async code to use async/await.
Since you're using libraries that might only support callback-style async, you'll need to do some new Promise wrapping yourself.
Understandably, I haven't tested the below code, but something like this...
/**
* Parse the given buffer as a CSV, return a promise of rows
*/
function parseCSV(buffer) {
return new Promise((resolve, reject) => {
const csv = [];
const stream = Readable.from(buffer.toString());
stream
.pipe("text/csv".parse({ delimiter: "," }))
.on("error", reject)
.on("data", (row) => csv.push(row))
.on("end", () => resolve(csv));
});
}
/**
* Parse the given buffer as a spreadsheet, return a promise
*/
async function parseSpreadsheet(mimeType, buffer) {
if (
mimeType === "application/vnd.ms-excel" ||
mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
) {
const workbook = XLSX.read(buffer, { type: "buffer" });
return excelToJson(workbook);
}
if (mimeType === "text/csv") {
return parseCSV(buffer);
}
throw new Error(`Unknown mime type ${mimeType}`);
}
/**
* Get the bytes of the field `fieldName` in the request.
*/
function getFileFromRequest(req, fieldName) {
return new Promise((resolve, reject) => {
const busboy = Busboy({ headers: req.headers });
busboy.on("file", (name, file, info) => {
// Only process the field we care about
if (name != fieldName) {
return;
}
const bytes = [];
file.on("data", (data) => bytes.push(data));
file.on("end", () =>
resolve({
info,
buffer: Buffer.concat(bytes),
}),
);
file.on("error", reject);
});
busboy.end(req.rawBody);
});
}
async function parseRequest(req) {
// (1) Get the file as a buffer
const { info, buffer } = await getFileFromRequest(req, "file");
// (2) Try parsing it as a spreadsheet
const data = await parseSpreadsheet(info.mimeType, buffer);
// (3) Do something with the data?
return data;
}
I'm trying to implement an API endpoint that allows for multiple file uploads.
I don't want to write any file to disk, but to buffer them and pipe to S3.
Here's my code for uploading a single file. Once I attempt to post multiple files to the the endpoint in route.js, it doesn't work.
route.js - I'll keep this as framework agnostic as possible
import Busboy from 'busboy'
// or const Busboy = require('busboy')
const parseForm = async req => {
return new Promise((resolve, reject) => {
const form = new Busboy({ headers: req.headers })
let chunks = []
form.on('file', (field, file, filename, enc, mime) => {
file.on('data', data => {
chunks.push(data)
})
})
form.on('error', err => {
reject(err)
})
form.on('finish', () => {
const buf = Buffer.concat(chunks)
resolve({
fileBuffer: buf,
fileType: mime,
fileName: filename,
fileEnc: enc,
})
})
req.pipe(form)
})
}
export default async (req, res) => {
// or module.exports = async (req, res) => {
try {
const { fileBuffer, ...fileParams } = await parseForm(req)
const result = uploadFile(fileBuffer, fileParams)
res.status(200).json({ success: true, fileUrl: result.Location })
} catch (err) {
console.error(err)
res.status(500).json({ success: false, error: err.message })
}
}
upload.js
import S3 from 'aws-sdk/clients/s3'
// or const S3 = require('aws-sdk/clients/s3')
export default (buffer, fileParams) => {
// or module.exports = (buffer, fileParams) => {
const params = {
Bucket: 'my-s3-bucket',
Key: fileParams.fileName,
Body: buffer,
ContentType: fileParams.fileType,
ContentEncoding: fileParams.fileEnc,
}
return s3.upload(params).promise()
}
I couldn't find a lot of documentation for this but I think I've patched together a solution.
Most implementations appear to write the file to disk before uploading it to S3, but I wanted to be able to buffer the files and upload to S3 without writing to disk.
I created this implementation that could handle a single file upload, but when I attempted to provide multiple files, it merged the buffers together into one file.
The one limitation I can't seem to overcome is the field name. For example, you could setup the FormData() like this:
const formData = new FormData()
fileData.append('file[]', form.firstFile[0])
fileData.append('file[]', form.secondFile[0])
fileData.append('file[]', form.thirdFile[0])
await fetch('/api/upload', {
method: 'POST',
body: formData,
}
This structure is laid out in the FormData.append() MDN example. However, I'm not certain how to process that in. In the end, I setup my FormData() like this:
Form Data
const formData = new FormData()
fileData.append('file1', form.firstFile[0])
fileData.append('file2', form.secondFile[0])
fileData.append('file3', form.thirdFile[0])
await fetch('/api/upload', {
method: 'POST',
body: formData,
}
As far as I can tell, this isn't explicitly wrong, but it's not the preferred method.
Here's my updated code
route.js
import Busboy from 'busboy'
// or const Busboy = require('busboy')
const parseForm = async req => {
return new Promise((resolve, reject) => {
const form = new Busboy({ headers: req.headers })
const files = [] // create an empty array to hold the processed files
const buffers = {} // create an empty object to contain the buffers
form.on('file', (field, file, filename, enc, mime) => {
buffers[field] = [] // add a new key to the buffers object
file.on('data', data => {
buffers[field].push(data)
})
file.on('end', () => {
files.push({
fileBuffer: Buffer.concat(buffers[field]),
fileType: mime,
fileName: filename,
fileEnc: enc,
})
})
})
form.on('error', err => {
reject(err)
})
form.on('finish', () => {
resolve(files)
})
req.pipe(form) // pipe the request to the form handler
})
}
export default async (req, res) => {
// or module.exports = async (req, res) => {
try {
const files = await parseForm(req)
const fileUrls = []
for (const file of files) {
const { fileBuffer, ...fileParams } = file
const result = uploadFile(fileBuffer, fileParams)
urls.push({ filename: result.key, url: result.Location })
}
res.status(200).json({ success: true, fileUrls: urls })
} catch (err) {
console.error(err)
res.status(500).json({ success: false, error: err.message })
}
}
upload.js
import S3 from 'aws-sdk/clients/s3'
// or const S3 = require('aws-sdk/clients/s3')
export default (buffer, fileParams) => {
// or module.exports = (buffer, fileParams) => {
const params = {
Bucket: 'my-s3-bucket',
Key: fileParams.fileName,
Body: buffer,
ContentType: fileParams.fileType,
ContentEncoding: fileParams.fileEnc,
}
return s3.upload(params).promise()
}
Using the help of busboy I am attempting to save FileStream into a Firebase bucket.
code:
const admin = require('firebase-admin');
const userFilesBucket = admin.storage().bucket(USER_FILES_BUCKET_NAME);
function handlePost(req, res){
const busboy = new Busboy({ headers: req.headers })
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
file.on('data', function(data) {
});
file.on('end', function() {
uploadFile({filename: filename, file:file, mimetype:mimetype})
.catch(err => {
console.log("attemptFileUpload | err:", err)
reject(err)
});
});
});
}
function uploadFile(fileContainer){
const filePath = fileContainer.filename
const file = userFilesBucket.file(filePath);
file.save(fileContainer.file, function(err) {
if (!err) console.log('Sucess | uploaded a blob or file!');
});
}
This will succeed and the file is saved to bucket but at the same time the above Promise catches exception:
The "chunk" argument must be one of type string or Buffer. Received type object
as well as the files are corrupt.
This error tells me I should convert the FileStream to Buffer?
I should also note, that the fileContainer.file is of type FileSream.
Thanks.
The solution was very simple, a miss-read of busboy doc on my part.
Needed to use busboy's file.on(data) listener to access the file data as Buffer, not the original incoming file as FileStream.
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
file.on('data', data => {
uploadFile({filename: filename, file:data, mimetype:mimetype})
.catch(err => {
console.log("attemptFileUpload | err:", err)
});
});
file.on('end', function() {
// move from here, up to file.on(data)
});
});
I'm glad to see AWS now supports multipart/form-data on AWS Lambda, but now that the raw data is in my lambda function how do I process it?
I see multiparty is a good multipart library in Node for multipart processing, but its constructor expects a request, not a raw string.
The input message I am receiving on my Lambda function (after the body mapping template has been applied) is:
{ "rawBody": "--ce0741b2-93d4-4865-a7d6-20ca51fe2689\r\nContent-Disposition: form-data; name=\"Content-Type\"\r\n\r\nmultipart/mixed; boundary=\"------------020601070403020003080006\"\r\n--ce0741b2-93d4-4865-a7d6-20ca51fe2689\r\nContent-Disposition: form-data; name=\"Date\"\r\n\r\nFri, 26 Apr 2013 11:50:29 -0700\r\n--ce0741b2-93d4-4865-a7d6-20ca51fe2689\r\nContent-Disposition: form-data; name=\"From\"\r\n\r\nBob <bob#mg.mydomain.io>\r\n--ce0741b2-93d4-4865-a7d6-20ca51fe2689\r\nContent-Disposition: form-data; name=\"In-Reply-To\"\r...
etc and some file data.
The body mapping template I'm using is
{
"rawBody" : "$util.escapeJavaScript($input.body).replaceAll("\\'", "'")"
}
How can I parse this data to acecss the fields and files posted to my Lambda function?
busboy doesn't work for me in the "file" case. It didn't throw an exception so I couldn't handle exception in lambda at all.
I'm using aws-lambda-multipart-parser lib wasn't hard like so. It just parses data from event.body and returns data as Buffer or text.
Usage:
const multipart = require('aws-lambda-multipart-parser');
const result = multipart.parse(event, spotText) // spotText === true response file will be Buffer and spotText === false: String
Response data:
{
"file": {
"type": "file",
"filename": "lorem.txt",
"contentType": "text/plain",
"content": {
"type": "Buffer",
"data": [ ... byte array ... ]
} or String
},
"field": "value"
}
This worked for me - using busboy
credits owed to Parse multipart/form-data from Buffer in Node.js which I copied most of this from.
const busboy = require('busboy');
const headers = {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'OPTIONS, POST',
'Access-Control-Allow-Headers': 'Content-Type'
};
function handler(event, context) {
var contentType = event.headers['Content-Type'] || event.headers['content-type'];
var bb = new busboy({ headers: { 'content-type': contentType }});
bb.on('file', function (fieldname, file, filename, encoding, mimetype) {
console.log('File [%s]: filename=%j; encoding=%j; mimetype=%j', fieldname, filename, encoding, mimetype);
file
.on('data', data => console.log('File [%s] got %d bytes', fieldname, data.length))
.on('end', () => console.log('File [%s] Finished', fieldname));
})
.on('field', (fieldname, val) =>console.log('Field [%s]: value: %j', fieldname, val))
.on('finish', () => {
console.log('Done parsing form!');
context.succeed({ statusCode: 200, body: 'all done', headers });
})
.on('error', err => {
console.log('failed', err);
context.fail({ statusCode: 500, body: err, headers });
});
bb.end(event.body);
}
module.exports = { handler };
Building on #AvnerSo :s answer, here's a simpler version of a function that gets the request body and headers as parameters and returns a promise of an object containing the form fields and values (skipping files):
const parseForm = (body, headers) => new Promise((resolve, reject) => {
const contentType = headers['Content-Type'] || headers['content-type'];
const bb = new busboy({ headers: { 'content-type': contentType }});
var data = {};
bb.on('field', (fieldname, val) => {
data[fieldname] = val;
}).on('finish', () => {
resolve(data);
}).on('error', err => {
reject(err);
});
bb.end(body);
});
If you want to get a ready to use object, here is the function I use. It returns a promise of it and handle errors:
import Busboy from 'busboy';
import YError from 'yerror';
import getRawBody from 'raw-body';
const getBody = (content, headers) =>
new Promise((resolve, reject) => {
const filePromises = [];
const data = {};
const parser = new Busboy({
headers,
},
});
parser.on('field', (name, value) => {
data[name] = value;
});
parser.on('file', (name, file, filename, encoding, mimetype) => {
data[name] = {
filename,
encoding,
mimetype,
};
filePromises.push(
getRawBody(file).then(rawFile => (data[name].content = rawFile))
);
});
parser.on('error', err => reject(YError.wrap(err)));
parser.on('finish', () =>
resolve(Promise.all(filePromises).then(() => data))
);
parser.write(content);
parser.end();
})