How do I pipe the stream from my downloadWithBuffers function into the res of my sequence_stream API? I can get the stream to work when I run the downloadWithBuffers function directly in the file passing in the params with correct bucket and object key but I cant seem to figure out how to pipe the results to the APIs res
I appreciate any help
const S3 = require('aws-sdk').S3
const awsSettings = {
region: 'us-east-2',
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
}
const s3 = new S3(awsSettings)
function downloadWithBuffers(params) {
s3.getObject(params)
.on('httpData', function (chunk) {
console.log(chunk)
})
.on('httpDone', function () {
console.log('Done streaming file')
})
.send()
}
module.exports = { downloadWithBuffers }
Controller for route and API
const getStream = required('../functions/getStream')
// Serve stream for a specific image file.
exports.sequence_stream = (req, res) => {
try {
const params = { Bucket: '----', Key: '----' }
getStream.downloadWithBuffers(params).pipe(res)
} catch (error) {
res.send(error)
}
}
Related
I have a Lambda Node function which is called by a webhook from a thirdparty server. The TP server sends a file download URL and some other data.
The download URL is temporary, so I need to push the file to an S3 for long term storage.
The rudimentary function below, downloads the file and then tries to upload to the S3.
This works when the file is a plain text, but images/pdfs etcs are corrupted when they reach the S3.
const AWS = require("aws-sdk");
const https = require('https');
const path = require('path');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
exports.handler = async (event, context, callback) => {
var payload = event.body;
const url_host = payload.host;
const url_path = payload.path; //URL of file which needs to be downloaded
const get_params = {
host: url_host,
path: url_path,
port: 443,
method: 'GET',
headers: { }
};
var resp = await https_get_processor(get_params); //File downloaded here
var uploadParams = {
Bucket: "bucket_name",
Key: '',
Body: resp //Preparing to upload the received file
};
uploadParams.Key = path.basename(url_path); //Generating filename
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
}
});
response = {...} //Generic Response
return response;
};
async function https_get_processor(get_params)
{
return await new Promise((resolve, reject) =>
{
var data = "";
const req = https.request(get_params, res => {
res.on('data', chunk => { data += chunk })
res.on('end', () =>
{
resolve(data);
})
});
req.on('error', (err) => {
reject(err);
});
req.end();
});
}
Response is a Buffer in such case, so try changing request processing by pushing each chunk into an array, and then merge Buffer chunks and pass them.
Try this:
var data = [];
const req = https.request(get_params, res => {
res.on('data', chunk => data.push(chunk))
res.on('end', () =>
{
resolve(Buffer.concat(data));
})
I am building a NodeJS Express API that accepts a filepath as a parameter, gets an object from S3 and then sends the file back in the API response.
I am using the NodeJS Amazon AWS S3 SDK #aws-sdk/client-s3.
I have everything working up to returning the file in the API response.
Here is the controller. This is working to get the object as a readable stream from S3.
const consoleLogger = require('../logger/logger.js').console;
const { S3Client, GetObjectCommand } = require('#aws-sdk/client-s3');
const s3client = new S3Client({ region: process.env.AWS_REGION || 'us-east-1' });
const awsCtrl = {};
awsCtrl.getObject = async (key) => {
// Get object from Amazon S3 bucket
let data;
try {
// Data is returned as a ReadableStream
data = await s3client.send(new GetObjectCommand({ Bucket: process.env.AWS_BUCKET, Key: key }));
console.log("Success", data);
} catch (e) {
consoleLogger.error("AWS S3 error: ", e);
const awsS3Error = {
name: e.name || null,
status: e.$metadata.httpStatusCode || 500
};
throw awsS3Error;
}
return data;
}
module.exports = awsCtrl;
Here is where the controller is called.
router.get('/:fileId', (req, res, next) => {
return filesCtrl.deliverFile(req, res);
});
filesCtrl.deliverFile = async (req, res) => {
/* Get object from AWS S3 */
let fileStream;
try {
fileStream = await awsCtrl.getObject(filePath);
} catch (e) {
consoleLogger.error(`Unable to get object from AWS S3`, e);
if (e.status && e.status === 404) {
result.error = `Not found`;
result.status = 404;
return res.status(result.status).json(result);
}
return res.status(e.status || 500).json(result);
}
// Set file header
res.attachment(filename);
// HOW TO RETURN THE FILE IN API RESPONSE TO DOWNLOAD?
// not working
//return fileStream.pipe(res);
}
Question:
How do I return the file in the Node JS Express API response to download?
Note: I've already tried other suggestions in a few other questions but none of the solutions have worked to return the file in the API response.
I tried the answer in this question here but the file is downloaded on the server side, the file needs to be returned in the API response. How to save file from S3 using aws-sdk v3
Also the answer in this question here is using the v2 version of the SDK and this is v3. There is no .createReadStream() method anymore. NodeJS download file from AWS S3 Bucket
Server Side
const aws = require('aws-sdk');
router.get('/getfilefroms3', async (req, res, next) => {
aws.config.update({
secretAccessKey: config.secret_access_key,
accessKeyId: config.access_key_id,
signatureVersion: config.signature_version,
region: config.region
})
const s3 = new aws.S3({ });
var params = { Bucket: config.sample_bucket_name, Key: req.query.filename };
s3.getObject(params, function (err, data) {
if (err) {
res.status(200);
res.end('Error Fetching File');
}
else {
res.attachment(params.Key); // Set Filename
res.type(data.ContentType); // Set FileType
res.send(data.Body); // Send File Buffer
}
});
})
Client Side
If you are using Web Application you can use any HTTP REST API Client like Axios or Fetch, The Download Manager will capture the file.
curl --location --request GET 'http://localhost:5001/getfilefroms3?filename=sample.pdf'
If you are using NodeJS Application
var http = require('http');
var fs = require('fs');
var download = function (url, destination, callback) {
var file = fs.createWriteStream(destination);
http.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close(callback);
});
});
}
var fileToDownload = "sample.pdf"
download("http://localhost:5001/getfilefroms3?filename=" + fileToDownload, "./" + fileToDownload, () => { console.log("File Downloaded") })
I'm trying to upload a base64 encoded image to S3 through this route, but the callbacks get completely ignored and the code jumps straight to res.json("SUCCESS");
route
AWS.config.update({
accessKeyId: "xxxxxxxxxxxxxx",
secetAccessKey: "xxxxxxxxxxxxxxxxxxxxxx",
region: "us-east-1"
});
const s3 = new AWS.S3();
....
router.post("/imageupload", async (req, res) => {
const base64 = req.body.base64;
try {
const params = {
Bucket: process.env.bucketName,
Key: "images/newImage",
Body: base64
};
await s3.putObject(params, function(err, data) {
if (err) res.json(err);
else res.json(data);
});
res.json("SUCCESS");
} catch (e) {
console.log(e.message);
res.status(500).json(e.message);
}
});
Any help is much appreciated thanks!
EDIT FIXED:
I figured out what the problem was:
I had recently reformatted my computer which meant I had to reinstall AWS cli AND reconfigure aws creds.
That was it.
The AWS documentation for using-promises.
var s3 = new AWS.S3({apiVersion: '2006-03-01', region: 'us-west-2'});
var params = {
Bucket: 'bucket',
Key: 'example2.txt',
Body: 'Uploaded text using the promise-based method!'
};
var putObjectPromise = s3.putObject(params).promise();
putObjectPromise.then(function(data) {
console.log('Success');
}).catch(function(err) {
console.log(err);
});
You can also promisify all functions by using a library such as bluebird
AWS.config.setPromisesDependency(require('bluebird'));
Here's an example using your code
router.post("/imageupload", async (req, res) => {
const base64 = req.body.base64;
try {
const params = {
Bucket: process.env.bucketName,
Key: "images/newImage",
Body: base64
};
const data = await s3.putObject(params).promise()
res.json(data);
} catch (e) {
console.log(e.message);
res.status(500).json(e.message);
}
});
I am trying to download a file from s3 and directly put into into a file on the filesystem using a writeStream in nodejs. This is my code:
downloadFile = function(bucketName, fileName, localFileName) {
//Donwload the file
var bucket = new AWS.S3({
params: { Bucket: bucketName },
signatureVersion: 'v4'
});
var file = require('fs').createWriteStream(localFileName);
var request = bucket.getObject({ Key: fileName });
request.createReadStream().pipe(file);
request.send();
return request.promise();
}
Running this function I get this error:
Uncaught Error: write after end
What is happening? Is the file closed before the write is finished? Why?
var s3 = new AWS.S3({
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
}),
file = fs.createWriteStream(localFileName);
s3
.getObject({
Bucket: bucketName,
Key: fileName
})
.on('error', function (err) {
console.log(err);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
})
.send();
Also AWS notes an example of using promises like this:
const s3 = new aws.S3({apiVersion: '2006-03-01'});
const params = { Bucket: 'yourBucket', Key: 'yourKey' };
const file = require('fs').createWriteStream('./local_file_path');
const s3Promise = s3.getObject(params).promise();
s3Promise.then((data) => {
file.write(data.Body, () => {
file.end();
fooCallbackFunction();
});
}).catch((err) => {
console.log(err);
});
This works perfect for me.
https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/using-promises.html
EDIT: (15 Feb 2018) Updated the code, as you have to end the write stream (file.end()).
I have combined the above response with a typical gunzip operation in pipe:
var s3DestFile = "./archive.gz";
var s3UnzippedFile = './deflated.csv';
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( s3DestFile );
s3.getObject({ Bucket: Bucket, Key: Key })
.on('error', function (err) {
console.log(err);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
console.log("downloaded file to" + s3DestFile);
fs.createReadStream( s3DestFile )
.on('error', console.error)
.on('end', () => {
console.log("deflated to "+s3UnzippedFile)
})
.pipe(gunzip)
.pipe(fs.createWriteStream( s3UnzippedFile ))
})
.send();
I am attempting to read a file that is in a aws s3 bucket using
fs.readFile(file, function (err, contents) {
var myLines = contents.Body.toString().split('\n')
})
I've been able to download and upload a file using the node aws-sdk, but I am at a loss as to how to simply read it and parse the contents.
Here is an example of how I am reading the file from s3:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myKey.csv'}
var s3file = s3.getObject(params)
You have a couple options. You can include a callback as a second argument, which will be invoked with any error message and the object. This example is straight from the AWS documentation:
s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
Alternatively, you can convert the output to a stream. There's also an example in the AWS documentation:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
This will do it:
new AWS.S3().getObject({ Bucket: this.awsBucketName, Key: keyName }, function(err, data)
{
if (!err)
console.log(data.Body.toString());
});
Since you seem to want to process an S3 text file line-by-line. Here is a Node version that uses the standard readline module and AWS' createReadStream()
const readline = require('readline');
const rl = readline.createInterface({
input: s3.getObject(params).createReadStream()
});
rl.on('line', function(line) {
console.log(line);
})
.on('close', function() {
});
If you are looking to avoid the callbacks you can take advantage of the sdk .promise() function like this:
const s3 = new AWS.S3();
const params = {Bucket: 'myBucket', Key: 'myKey.csv'}
const response = await s3.getObject(params).promise() // await the promise
const fileContent = response.Body.toString('utf-8'); // can also do 'base64' here if desired
I'm sure the other ways mentioned here have their advantages but this works great for me. Sourced from this thread (see the last response from AWS): https://forums.aws.amazon.com/thread.jspa?threadID=116788
here is the example which i used to retrive and parse json data from s3.
var params = {Bucket: BUCKET_NAME, Key: KEY_NAME};
new AWS.S3().getObject(params, function(err, json_data)
{
if (!err) {
var json = JSON.parse(new Buffer(json_data.Body).toString("utf8"));
// PROCESS JSON DATA
......
}
});
I couldn't figure why yet, but the createReadStream/pipe approach didn't work for me. I was trying to download a large CSV file (300MB+) and I got duplicated lines. It seemed a random issue. The final file size varied in each attempt to download it.
I ended up using another way, based on AWS JS SDK examples:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).
on('httpData', function(chunk) { file.write(chunk); }).
on('httpDone', function() { file.end(); }).
send();
This way, it worked like a charm.
I prefer Buffer.from(data.Body).toString('utf8'). It supports encoding parameters. With other AWS services (ex. Kinesis Streams) someone may want to replace 'utf8' encoding with 'base64'.
new AWS.S3().getObject(
{ Bucket: this.awsBucketName, Key: keyName },
function(err, data) {
if (!err) {
const body = Buffer.from(data.Body).toString('utf8');
console.log(body);
}
}
);
I had exactly the same issue when downloading from S3 very large files.
The example solution from AWS docs just does not work:
var file = fs.createWriteStream(options.filePath);
file.on('close', function(){
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
});
s3.getObject({ Key: documentKey }).createReadStream().on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
}).pipe(file);
While this solution will work:
var file = fs.createWriteStream(options.filePath);
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
})
.on('httpData', function(chunk) { file.write(chunk); })
.on('httpDone', function() {
file.end();
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
})
.send();
The createReadStream attempt just does not fire the end, close or error callback for some reason. See here about this.
I'm using that solution also for writing down archives to gzip, since the first one (AWS example) does not work in this case either:
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( options.filePath );
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function (error) {
if(self.logger) self.logger.error("%#",error);
return callback(error);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
if(self.logger) self.logger.info("downloadArchive downloaded %s", options.filePath);
fs.createReadStream( options.filePath )
.on('error', (error) => {
return callback(error);
})
.on('end', () => {
if(self.logger) self.logger.info("downloadArchive unarchived %s", options.fileDest);
return callback(null, options.fileDest);
})
.pipe(gunzip)
.pipe(fs.createWriteStream(options.fileDest))
})
.send();
With the new version of sdk, the accepted answer does not work - it does not wait for the object to be downloaded. The following code snippet will help with the new version:
// dependencies
const AWS = require('aws-sdk');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
var bucket = "TestBucket"
var key = "TestKey"
try {
const params = {
Bucket: Bucket,
Key: Key
};
var theObject = await s3.getObject(params).promise();
} catch (error) {
console.log(error);
return;
}
}
If you want to save memory and want to obtain each row as a json object, then you can use fast-csv to create readstream and can read each row as a json object as follows:
const csv = require('fast-csv');
const AWS = require('aws-sdk');
const credentials = new AWS.Credentials("ACCESSKEY", "SECRETEKEY", "SESSIONTOKEN");
AWS.config.update({
credentials: credentials, // credentials required for local execution
region: 'your_region'
});
const dynamoS3Bucket = new AWS.S3();
const stream = dynamoS3Bucket.getObject({ Bucket: 'your_bucket', Key: 'example.csv' }).createReadStream();
var parser = csv.fromStream(stream, { headers: true }).on("data", function (data) {
parser.pause(); //can pause reading using this at a particular row
parser.resume(); // to continue reading
console.log(data);
}).on("end", function () {
console.log('process finished');
});
var fileStream = fs.createWriteStream('/path/to/file.jpg');
var s3Stream = s3.getObject({Bucket: 'myBucket', Key: 'myImageFile.jpg'}).createReadStream();
// Listen for errors returned by the service
s3Stream.on('error', function(err) {
// NoSuchKey: The specified key does not exist
console.error(err);
});
s3Stream.pipe(fileStream).on('error', function(err) {
// capture any errors that occur when writing data to the file
console.error('File Stream:', err);
}).on('close', function() {
console.log('Done.');
});
Reference: https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/requests-using-stream-objects.html