hashing a streaming file before uploading to S3 - node.js

I am trying to stream a file to S3 without storing the file to disk/ssd. I would like to have part of the hash of the file as a part of the filename when uploading to S3.
EDIT_v1:
Been trying to follow this post using busboy as the parser: Calculate a file hash and save the file. I took an example from the busboy docs and adabpted it with an answer from the post:
const server = http.createServer();
server.on('request', async (req, res) => {
if (req.method === 'POST') {
const bb = busboy({ headers: req.headers });
bb.on('file', (name, file, info) => {
const { filename, encoding, mimeType } = info;
console.log(
`File [${name}]: filename: %j, encoding: %j, mimeType: %j`,
filename,
encoding,
mimeType
);
const fileHashSource = new PassThrough();
const writeSource = new PassThrough();
file.pipe(fileHashSource);
file.pipe(writeSource);
fileHashSource.resume();
writeSource.resume();
createFileHash(fileHashSource, (err, hash) => {
if (err) {
console.log('err', err)
return res.end('some err');
}
const writeStream = fs.createWriteStream(`test_${hash.slice(0, 8)}.png`);
writeStream.on('error', function(err) {
console.log('write error', err);
return res.end('write error')
});
writeStream.on('finish', function() {
console.log('write finished')
return res.end('done')
});
writeSource.pipe(writeStream);
});
});
bb.on('field', (name, val, info) => {
console.log(`Field [${name}]: value: %j`, val);
});
bb.on('close', () => {
console.log('Done parsing form!');
req.unpipe(bb);
res.writeHead(201, { Connection: 'close' });
res.end('done!');
});
req.pipe(bb);
} else if (req.method === 'GET') {
res.writeHead(200, { Connection: 'close' });
res.end(`
<body style="background-color: black">
<form enctype="multipart/form-data" method="post">
<label>file name
<input type="text" name="textfield" />
</label><br />
<label>single file
<input type="file" name="filefield" />
</label><br />
<br />
<button type="submit">Upload</button>
</form>
</body>
`);
}
})
server.listen(3000, () => {
console.info(`NodeJS process: ${process.pid}`)
console.info(`Listening on port: 3000`)
});
function createFileHash(readStream, next) {
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
hash.on('error', function(err) {
console.log('hash error')
return next(err);
});
hash.on('finish', function(data) {
console.log('hash finished');
return next(null, hash.read());
});
readStream.pipe(hash);
}
EDIT_v2:
see first answer below for a solution

I put the task flow in a pipeline, implemented late piping with PassThrough, and finally used a function that returns an async generator that uploads to S3
const { fileStream, mimeType } = createFromBusBoy();
const s3Source = new PassThrough();
filestream.on('data', chunk => {
s3Source.write(chunk);
});
filestream.on('end', () => {
s3Source.end();
});
const hash = createHash('sha256');
hash.setEncoding('hex');
try {
await pipeline(
filestream,
hash,
uploadImage(s3Source, mimeType),
);
} catch (err) {
console.log(err)
throw err;
}
function uploadImage(fileStream, mimeType) {
return async function* (source, signal) {
let hash;
for await (const chunk of source) {
hash = chunk;
}
yield await uploadToS3(filestream, hash, mimeType);
};
}

Related

Upload a stream to s3

I read Pipe a stream to s3.upload()
but im having difficulty with I am not sure if that actually solves and I have tried.
What I am doing is a get call to www.example.com. this returns a stream, I want to upload that stream to s3.
heres my try.
fetch('https://www.example.com',fileName{
method: 'GET',
headers: {
'Authorization': "Bearer " + myAccessToken,
},
})
.then(function(response) {
return response.text();
})
.then(function(data) {
uploadToS3(data)
});
const uploadToS3 = (data) => {
// Setting up S3 upload parameters
const params = {
Bucket:myBucket,
Key: "fileName",
Body: data
};
// Uploading files to the bucket
s3.upload(params, function(err, data) {
if (err) {
throw err;
}
console.log(`File uploaded successfully. ${data.Location}`);
});
};
output: ///File uploaded successfully. https://exampleBucket.s3.amazonaws.com/fileName.pdf
however this is blank.
I figured it out, but i did not keep using fetch.
and I actually download the file, then upload it. then delete the file.
function getNewFilesFromExampleDotCom(myAccessToken, fileName, fileKey) {
let url2 = 'https://example.com' + fileKey;
axios
.get(url2, {
headers: { 'Authorization': "Bearer " + myAccessToken },
responseType: 'stream',
})
.then(response => {
let file = fileName;
response.data.pipe(fs.createWriteStream(file))
let myFileInfo = [];
if( myFileInfo.length > 0){
myFileInfo.splice(0, myFileInfo.length)
}
myFileInfo.push(file)
processArray(myFileInfo)
console.log(file + " saved")
})
.catch(error => console.log(error));
}
async function processArray(array) {
for (const item of array) {
await delayedLog(item);
}
console.log('Downloaded!');
console.log('Uploading to s3!');
}
function delay() {
return new Promise(resolve => setTimeout(resolve, 300));
}
async function delayedLog(item) {
await delay();
uploadFiles(item)
}
async function uploadFiles(file){
uploadToS3List(file)
await new Promise((resolve, reject) => setTimeout(resolve, 1000));
deleteMyFiles(file)
}
const uploadToS3List = (fileName) => {
// Read content from the file
const fileContent = fs.readFileSync(fileName);
// Setting up S3 upload parameters
const params = {
Bucket:"myBucketName",
Key: fileName,
Body: fileContent
};
// Uploading files to the bucket
s3.upload(params, function(err, data) {
if (err) {
throw err;
}
console.log(`File uploaded successfully. ${data.Location}`);
});
};
function deleteMyFiles(path){
fs.unlink(path, (err) => {
console.log(path + " has been deleted")
if (err) {
console.error(err)
return
}
})
}

busboy on field and on file not firing

It worked yesterday, and now it stopped without any changes made to the code. What is going on?
Client
async function uploadFile(file) {
let formData = new FormData();
formData.append("recordUid", recordUid);
formData.append("fieldUid", fieldUid);
formData.append("file", file);
await fetchPostFormData("/api/files", formData);
}
async function fetchPostFormData(url, formData) {);
try {
let result = await (
await fetch(url, {
method: "POST",
withCredentials: true,
credentials: "include",
headers: {
Authorization: localStorage.getItem("token"),
},
body: formData,
})
).json();
return result;
} catch (err) {
return err;
}
}
Server
router.post("/api/files", async (req, res, next) => {
try {
console.log("starting upload..."); // <------------------- THIS ONE IS LOGGED
let bb = busboy({
headers: req.headers,
limits: {
fileSize: 20 * 1024 * 1024, // 20 mb
},
});
let fields = {};
// Get any text values
bb.on("field", (fieldname, val, fieldnameTruncated, valTruncated) => {
console.log("on.field", fieldname, val); // <------------------ NOT FIRING
fields[fieldname] = val;
});
bb.on("file", (fieldname, file, filename, encoding, mimetype) => {
console.log("on.file"); // <----------------------------------- NOT FIRING
let parts = filename.filename.split(".");
let name = parts[0];
let extension = parts[parts.length - 1];
let finalName = `${+new Date()}-${name}.${extension}`;
let filePath = `${filesFolderPath}${finalName}`;
// Open writeable stream to path
let writeStream = fs.createWriteStream(filePath);
// Pipe the file to the opened stream
file.pipe(writeStream);
// Check for errors
writeStream.on("error", (err) => {
console.log(err);
});
writeStream.on("close", async (err) => {
let sizeBytes = fs.statSync(filePath).size;
});
});
bb.on("finish", () => {
res.status(200).send({ success: true });
});
} catch (err) {
next(err);
}
});
Managed to solve it.
The problem was the missing req.pipe(bb) at the very end.
// previous code... ^^^^^
bb.on("finish", () => {
res.status(200).send({ success: true });
});
req.pipe(bb) // <------------- THIS SHIT RIGHT HERE
} catch (err) {
next(err);
}
});

how to assemble a stream pipleine with node-formidable fileWriteStreamHandler?

I'm trying to upload a file to S3 using the node-formidable method fileWriteStreamHandler.Before the upload to S3 I want to create a hash of file. This means implementing a stream pipe that first pass the data through a hash then passes that data to the S3 upload.
When trying to implement the pipe I kept running into issues. So below is a simplified function that more or less represents what I want to do.
formHandler.js
const form = formidable({
encoding: 'utf-8',
keepExtensions: true,
allowEmptyFiles: false,
maxFiles, maxFileSize, maxTotalFileSize,
maxFields, maxFieldsSize, minFileSize,
multiples: true,
fileWriteStreamHandler: streamUploadImage,
});
streamUploadImage.js
function streamUploadImage() {
const firstStream = new PassThrough();
const lastStream = new PassThrough();
const hash = createHash('SHA2-256');
hash.setEncoding('hex');
const transform = new Transform({
transform(chunk, encoding, cb) {
hash.write(chunk);
cb();
},
flush(cb) {
hash.end();
console.log('all done', hash.read());
cb();
}
});
firstStream.on('data', () => console.log('first'));
lastStream.on('data', () => console.log('last'));
return first.pipe(transform).pipe(last);
};
When using the above streamUploadImage only the lastStream is called. firstStream & transform are never called.
Why is that? Is the pipeline not implemented correctly? Does the formidable fileWriteStreamHandler not work with pipes?
using formidable#3.2.1
UPDATE:
see below for a quick reproduction of my issue:
var server = http.createServer(function (req, res) {
if (req.url == '/') {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end(`
<form action="/upload" enctype="multipart/form-data" method="post">
<label>file name<input type="text" name="file_name" autofocus /></label><br />
<label>single file<input type="file" name="file_single" /></label><br />
<label>multiple files<input type="file" name="filearray_with_multiple[]" multiple /></label><br />
<br />
<button>Upload</button>
</form>
`);
res.end();
} else if (req.url === '/upload') {
const form = formidable({
encoding: 'utf-8',
keepExtensions: true,
allowEmptyFiles: false,
multiples: true,
fileWriteStreamHandler: streamUploadImage,
});
form.parse(req, (err, fields, files) => {
if (err) throw err;
console.log('parsed file upload');
console.log({ fields, files });
res.writeHead(201, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ err, fields, files}, null, 2))
})
}
});
function streamUploadImage() {
const firstStream = new PassThrough();
const lastStream = new PassThrough();
const hash = createHash('SHA2-256');
hash.setEncoding('hex');
const transform = new Transform({
transform(chunk, encoding, cb) {
hash.write(chunk);
cb();
},
flush(cb) {
hash.end();
console.log('all done', hash.read());
cb();
}
});
firstStream.on('data', () => console.log('first'));
lastStream.on('data', () => console.log('last'));
return firstStream.pipe(transform).pipe(lastStream);
};
server.listen(5000);
stream.pipe() returns the destination stream to allow for chaining.
You need to return the head of the pipeline from streamUploadImage() (firstStream in your example), rather than the tail.
function streamUploadImage() {
const firstStream = new PassThrough();
const lastStream = new PassThrough();
// *snip*
// Wire up the pipeline
firstStream.pipe(transform).pipe(lastStream);
// Return the head of the pipeline
return firstStream;
};

Uploading files in forlder with react/express/multer

I'm trying to upload big files with express and multer.
The call back request send 200 but I don't know really how upload my files in a specific folder. I tried two way without success.
The first one:
app.post('/upload', (req, res, next) => {
let myFile = req.files;
let i = 0;
for (i; i < myFile.length; i++) {
let filemName = MyFile[i].name
myFile[i].mv(`${__dirname}/uploads/${fileName}`, function(err) {
if (err) {
return res.status(500).send(err);
}
res.json({file: `uploads/${fileName}`});
});
}
})
this code return 500.
The second way (tried only with one file):
app.post ('/uploader', (req, res, next) => {
var file = req.files.file;
// file.mv(`${__dirname}/uploads/${file.name}`), function(err) {
// if (err) {
// return res.status(500).send(err);
// }
// }
console.log('file');
console.log(file[0]);
fs.rename(req.file[0], '~/dev/file-upload/backend/uploads' + file[0].name, function (err) {
if (err) throw err;
console.log('Move complete');
})
this code doesn't return any error but doesn't put the file in the folder.
And finaly my client side code:
handleUploadFile(event) {
event.preventDefault();
const data = new FormData();
let c = this.uploadInput.files.length;
console.log("c = ", c);
console.log("mydata :", this.uploadInput.files);
for (var i = 0; i < c; i++){
data.append('filename', this.uploadInput.files[i].name);
data.append('file', this.uploadInput.files[i]);
}
var options = {
method: 'post',
body: data,
}
fetch(apiBaseUrl + '/uploader', options).then((response) => {
console.log('res', response);
}).catch(function (err) {
console.error("!!! error :", err);
})
}
render() {
return (
<form onSubmit={this.handleUploadFile}>
<div>
<input ref={(ref) => { this.uploadInput = ref; }} type="file" multiple="multiple" />
</div>
<br />
<div>
<button>Upload</button>
</div>
</form>
);
}
Thanks by advance for your help :)
I fixed my issue.
body-parser doesn’t support parsing the body of a multipart/form-data request.
The following link gave me all the answers needed.

Calculate a file hash and save the file

Users upload files into my express app. I need to calc hash of the uploaded file and then write file to disk using calculated hash as a filename. I try to do it using the following code:
function storeFileStream(file, next) {
createFileHash(file, function(err, hash) {
if (err) {
return next(err);
}
var fileName = path.join(config.storagePath, hash),
stream = fs.createWriteStream(fileName);
stream.on('error', function(err) {
return next(err);
});
stream.on('finish', function() {
return next();
});
file.pipe(stream);
});
}
function createFileHash(file, next) {
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
file.on('error', function(err) {
return next(err);
});
file.on('end', function(data) {
hash.end();
return next(null, hash.read());
});
file.pipe(hash);
}
The problem is that after I calc file hash the writed file size is 0. What is the best way do solve this task?
Update
According #poke suggestion I try to duplicate my stream. Now my code is:
function storeFileStream(file, next) {
var s1 = new pass;
var s2 = new pass;
file.pipe(s1);
file.pipe(s2);
createFileHash(s1, function(err, hash) {
if (err) {
return next(err);
}
var fileName = path.join(config.storagePath, hash),
stream = fs.createWriteStream(fileName);
stream.on('error', function(err) {
return next(err);
});
stream.on('finish', function() {
return next();
});
s2.pipe(stream);
});
}
function createFileHash(file, next) {
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
file.on('error', function(err) {
return next(err);
});
file.on('end', function(data) {
hash.end();
return next(null, hash.read());
});
file.pipe(hash);
}
The problem of this code is that events end and finish are not emited. If I comment file.pipe(s2); events are emited, but I again get my origin problem.
This code fix the problem:
var s1 = new passThrough,
s2 = new passThrough;
file.on('data', function(data) {
s1.write(data);
s2.write(data);
});
file.on('end', function() {
s1.end();
s2.end();
});
The correct and simple way should be as follow:
we should resume the passthroughed stream
function storeFileStream(file, directory, version, reject, resolve) {
const fileHashSource = new PassThrough();
const writeSource = new PassThrough();
file.pipe(fileHashSource);
file.pipe(writeSource);
// this is the key point, see https://nodejs.org/api/stream.html#stream_three_states
fileHashSource.resume();
writeSource.resume();
createFileHash(fileHashSource, function(err, hash) {
if (err) {
return reject(err);
}
const fileName = path.join(directory, version + '_' + hash.slice(0, 8) + '.zip');
const writeStream = fs.createWriteStream(fileName);
writeStream.on('error', function(err) {
return reject(err);
});
writeStream.on('finish', function() {
return resolve();
});
writeSource.pipe(writeStream);
});
}
function createFileHash(readStream, next) {
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
hash.on('error', function(err) {
return next(err);
});
hash.on('finish', function(data) {
return next(null, hash.read());
});
readStream.pipe(hash);
}
You could use the async module (not tested but should work):
async.waterfall([
function(done) {
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
file.on('error', function(err) {
done(err);
});
file.on('end', function(data) {
done(null, hash.read);
});
file.pipe(hash);
},
function(hash, done) {
var fileName = path.join(config.storagePath, hash),
stream = fs.createWriteStream(fileName);
stream.on('error', function(err) {
done(err);
});
stream.on('finish', function() {
done(null);
});
file.pipe(stream);
}
], function (err) {
console.log("Everything is done!");
});

Resources