Trying to download a file from aws s3 into nodejs writeStream - node.js

I am trying to download a file from s3 and directly put into into a file on the filesystem using a writeStream in nodejs. This is my code:
downloadFile = function(bucketName, fileName, localFileName) {
//Donwload the file
var bucket = new AWS.S3({
params: { Bucket: bucketName },
signatureVersion: 'v4'
});
var file = require('fs').createWriteStream(localFileName);
var request = bucket.getObject({ Key: fileName });
request.createReadStream().pipe(file);
request.send();
return request.promise();
}
Running this function I get this error:
Uncaught Error: write after end
What is happening? Is the file closed before the write is finished? Why?

var s3 = new AWS.S3({
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
}),
file = fs.createWriteStream(localFileName);
s3
.getObject({
Bucket: bucketName,
Key: fileName
})
.on('error', function (err) {
console.log(err);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
})
.send();

Also AWS notes an example of using promises like this:
const s3 = new aws.S3({apiVersion: '2006-03-01'});
const params = { Bucket: 'yourBucket', Key: 'yourKey' };
const file = require('fs').createWriteStream('./local_file_path');
const s3Promise = s3.getObject(params).promise();
s3Promise.then((data) => {
file.write(data.Body, () => {
file.end();
fooCallbackFunction();
});
}).catch((err) => {
console.log(err);
});
This works perfect for me.
https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/using-promises.html
EDIT: (15 Feb 2018) Updated the code, as you have to end the write stream (file.end()).

I have combined the above response with a typical gunzip operation in pipe:
var s3DestFile = "./archive.gz";
var s3UnzippedFile = './deflated.csv';
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( s3DestFile );
s3.getObject({ Bucket: Bucket, Key: Key })
.on('error', function (err) {
console.log(err);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
console.log("downloaded file to" + s3DestFile);
fs.createReadStream( s3DestFile )
.on('error', console.error)
.on('end', () => {
console.log("deflated to "+s3UnzippedFile)
})
.pipe(gunzip)
.pipe(fs.createWriteStream( s3UnzippedFile ))
})
.send();

Related

Nodejs express how to pipe stream from function to API res

How do I pipe the stream from my downloadWithBuffers function into the res of my sequence_stream API? I can get the stream to work when I run the downloadWithBuffers function directly in the file passing in the params with correct bucket and object key but I cant seem to figure out how to pipe the results to the APIs res
I appreciate any help
const S3 = require('aws-sdk').S3
const awsSettings = {
region: 'us-east-2',
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
}
const s3 = new S3(awsSettings)
function downloadWithBuffers(params) {
s3.getObject(params)
.on('httpData', function (chunk) {
console.log(chunk)
})
.on('httpDone', function () {
console.log('Done streaming file')
})
.send()
}
module.exports = { downloadWithBuffers }
Controller for route and API
const getStream = required('../functions/getStream')
// Serve stream for a specific image file.
exports.sequence_stream = (req, res) => {
try {
const params = { Bucket: '----', Key: '----' }
getStream.downloadWithBuffers(params).pipe(res)
} catch (error) {
res.send(error)
}
}

SFTP createReadStream not writing into s3 bucket

I'm trying to read a file from an sftp server and stream that file into an s3 bucket. I'm not able to stream the file into the s3 bucket. Yes the file path is exactly correct. I'm not sure what I am doing wrong. When I run the code, it doesn't even try to upload the stream into the bucket because I don't get any upload console logs.
const aws = require('aws-sdk');
const s3 = new aws.S3();
const Client = require('ssh2').Client;
const conn = new Client();
const connSettings = {
host: event.serverHost,
port: event.port,
username: event.username,
password: event.password
};
exports.handler = function(event) {
conn.on('ready', function() {
conn.sftp(function(err, sftp) {
if (err) {
console.log("Errror in connection", err);
conn.end()
} else {
console.log("Connection established");
let readStream = sftp.createReadStream(remoteFilePath);
console.log(`Read Stream ${readStream}`)
// readStream outputs [object Object] to the console
const uploadParams = {
Bucket: s3Bucket,
Key: 'fileName',
Body: readStream
}
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
}
});
conn.end()
}
});
}).connect(connSettings);
}
I want to be able to stream the readStream from sftp server into s3 bucket.
conn.end() ends the connection immediately. Move that to inside your s3.upload() callback so that your data actually gets transferred before the connection is closed.
This is a working Node 12 example of what I believe you are trying to accomplish:
const aws = require('aws-sdk');
const s3 = new aws.S3();
const Client = require('ssh2').Client;
const conn = new Client();
const { PassThrough } = require('stream');
conn.on('ready', () => {
conn.sftp((err, sftp) => {
const transferStream = new PassThrough();
s3.upload({
Bucket: s3Bucket,
Key: 'test_file.txt',
Body: transferStream
}, (err, data) => {
if (err) {
console.log(`Upload error: ${err}`);
}
if (data) {
console.log(`Uploaded to [${data.Location}].`);
}
});
sftp.createReadStream(remoteFilePath)
.pipe(transferStream)
.on('end', () => {
transferStream.end();
conn.end();
});
});
}).connect(connectionSettings);

aws s3 upload upload only 128 kb

I need to upload a File from a SFTP to a S3 Bucket.
I'm using aws-sdk, defining a bucket S3 and then upload a Stream.
The problem is the file on the bucket is only 128 KB of the starting 8 MB.
Am i missing something?
I think the problem is that the upload method doesn't pass through the whole stream and stops at the first "step" of 128 KB.
Maybe some "await" put wrong?
export const saveFileToS3 = (stream: any, filePath : string) => {
return new Promise<string>(async (resolve: Function, reject: Function) => {
try {
const fileName = filePath.split('/')[3];
const s3 = new AWS.S3({
endpoint: 's3-eu-central-1.amazonaws.com',
signatureVersion: 'v4',
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY
});
let params = {
Bucket: process.env.S3_BUCKET_NAME,
Key: process.env.S3_PATH + fileName,
Body: stream
};
var options = {
partSize: 1500 * 1024 * 1024,
queueSize: 100
};
await s3.upload(params, options, function(s3Err, data) {
if (s3Err) throw s3Err
console.log(`File uploaded successfully at ${data.Location}`)
});
resolve(params);
} catch (error) {
reject(error);
}
});
};
export const getFileFromFtp = (filePath: string, ftpConf: Appp.FTPConf) => {
return new Promise<Stream>(async (resolve, reject) => {
logger.info(`getFileFromFtp - file path: ${filePath}`);
let client: SFTP;
try {
client = new SFTP();
const msg = await client.connect(ftpConf);
logger.info(`ftp client connected to ${ftpConf.host}`, msg);
const stream: Stream = await client.get(filePath);
client.end();
resolve(stream);
} catch (error) {
logger.error("error: " + error.message);
client && client.end();
reject(error);
}
});
};
I expect the s3.upload to upload all the stream.

Download images from s3 bucket using Promise in node.js

I want to download some image files from s3 bucket on my local system using Promises in node.js.
var params = {
Bucket: bucket_name',
Key: 'key'
};
var fileStream = fs.createWriteStream('path/to/file.jpg');
I tried this which is working
s3.getObject(params).createReadStream.pipe(fileStream);
But I want my code look like this
return s3.getObject(params).promise()
.then(function(data) {
//console.log(data.Body);
// No idea about this section
})
.catch(function(err) {
throw err;
});
I have to use Promise to ensure all images should be downloaded.
One possible solution is to use bluebird and create a function that returns a promise on the end of the stream:
const B = require('bluebird');
function downloadFromS3 (object) {
var p = B.Promise.defer();
var stream = s3.getObject(params).createReadStream()
stream.pipe(fileStream);
stream.on('error', (e) => p.reject(e))
stream.on('end', () => p.resolve())
return p.promise;
}
downloadFromS3(params)
.then(() => console.log('finished'))
.catch(() => console.log('failed'))
Not sure if this code specifically would work, but it may give you a direction to look into.
The below snippet worked for me;
async function getObjectStreamSync(params, dest) {
return new Promise((resolve, reject) => {
// create read stream for object
let stream = s3.getObject(params).createReadStream();
var fileStream = fs.createWriteStream(dest);
stream.pipe(fileStream);
// on error reject the Promise
stream.on('error', (err) => reject(new Error(err)));
// on end resolve the Promise
stream.on('end', () => resolve());
});
}
await getObjectStreamSync(params, "path/to/file/file.ext");
Here, wrapped the stream within Promise. And by listening to the emitted events reject/resolve the Promise.
streamToPromise = require('stream-to-promise');
var fileStream = fs.createWriteStream('path/to/file.jpg');
streamToPromise(fileStream).then(function () {
console.log('Image saved to file.');
});
s3.getObject(params).createReadStream.pipe(fileStream);
Here's a native promise solution with error detection on the read stream and on the write stream.
function streamPromise(stream) {
return new Promise((resolve, reject) => {
stream.on('end', () => {
resolve('end');
});
stream.on('finish', () => {
resolve('finish');
});
stream.on('error', (error) => {
reject(error);
});
});
}
async function s3Download(srcBucket, srcKey, outputPath) {
var objReq = s3.getObject({
Bucket: srcBucket,
Key: srcKey
});
let outStream = fs.createWriteStream(outputPath);
let readStream = objReq.createReadStream();
readStream.on('error', (err) => {
console.warn('s3download error', err);
outStream.emit("error", err);
});
readStream.pipe(outStream);
return streamPromise(outStream);
}
Here is a snippet to use async/await with NodeJS 8:
const AWS = require('aws-sdk');
const fs = require('fs-extra');
const decompress = require('decompress');
const s3 = new AWS.S3();
const s3Params = {
Bucket: s3Location.bucketName,
Key: s3Location.objectKey,
};
const s3Object = await s3.getObject(s3Params).promise();
await fs.writeFile('myfile.zip', s3Object.Body);
await decompress('myfile.zip', 'myFileDir');
/* The compressed file is retrieved as "myfile.zip".
Content will be extracted in myFileDir directory */

Read file from aws s3 bucket using node fs

I am attempting to read a file that is in a aws s3 bucket using
fs.readFile(file, function (err, contents) {
var myLines = contents.Body.toString().split('\n')
})
I've been able to download and upload a file using the node aws-sdk, but I am at a loss as to how to simply read it and parse the contents.
Here is an example of how I am reading the file from s3:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myKey.csv'}
var s3file = s3.getObject(params)
You have a couple options. You can include a callback as a second argument, which will be invoked with any error message and the object. This example is straight from the AWS documentation:
s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
Alternatively, you can convert the output to a stream. There's also an example in the AWS documentation:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
This will do it:
new AWS.S3().getObject({ Bucket: this.awsBucketName, Key: keyName }, function(err, data)
{
if (!err)
console.log(data.Body.toString());
});
Since you seem to want to process an S3 text file line-by-line. Here is a Node version that uses the standard readline module and AWS' createReadStream()
const readline = require('readline');
const rl = readline.createInterface({
input: s3.getObject(params).createReadStream()
});
rl.on('line', function(line) {
console.log(line);
})
.on('close', function() {
});
If you are looking to avoid the callbacks you can take advantage of the sdk .promise() function like this:
const s3 = new AWS.S3();
const params = {Bucket: 'myBucket', Key: 'myKey.csv'}
const response = await s3.getObject(params).promise() // await the promise
const fileContent = response.Body.toString('utf-8'); // can also do 'base64' here if desired
I'm sure the other ways mentioned here have their advantages but this works great for me. Sourced from this thread (see the last response from AWS): https://forums.aws.amazon.com/thread.jspa?threadID=116788
here is the example which i used to retrive and parse json data from s3.
var params = {Bucket: BUCKET_NAME, Key: KEY_NAME};
new AWS.S3().getObject(params, function(err, json_data)
{
if (!err) {
var json = JSON.parse(new Buffer(json_data.Body).toString("utf8"));
// PROCESS JSON DATA
......
}
});
I couldn't figure why yet, but the createReadStream/pipe approach didn't work for me. I was trying to download a large CSV file (300MB+) and I got duplicated lines. It seemed a random issue. The final file size varied in each attempt to download it.
I ended up using another way, based on AWS JS SDK examples:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).
on('httpData', function(chunk) { file.write(chunk); }).
on('httpDone', function() { file.end(); }).
send();
This way, it worked like a charm.
I prefer Buffer.from(data.Body).toString('utf8'). It supports encoding parameters. With other AWS services (ex. Kinesis Streams) someone may want to replace 'utf8' encoding with 'base64'.
new AWS.S3().getObject(
{ Bucket: this.awsBucketName, Key: keyName },
function(err, data) {
if (!err) {
const body = Buffer.from(data.Body).toString('utf8');
console.log(body);
}
}
);
I had exactly the same issue when downloading from S3 very large files.
The example solution from AWS docs just does not work:
var file = fs.createWriteStream(options.filePath);
file.on('close', function(){
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
});
s3.getObject({ Key: documentKey }).createReadStream().on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
}).pipe(file);
While this solution will work:
var file = fs.createWriteStream(options.filePath);
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
})
.on('httpData', function(chunk) { file.write(chunk); })
.on('httpDone', function() {
file.end();
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
})
.send();
The createReadStream attempt just does not fire the end, close or error callback for some reason. See here about this.
I'm using that solution also for writing down archives to gzip, since the first one (AWS example) does not work in this case either:
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( options.filePath );
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function (error) {
if(self.logger) self.logger.error("%#",error);
return callback(error);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
if(self.logger) self.logger.info("downloadArchive downloaded %s", options.filePath);
fs.createReadStream( options.filePath )
.on('error', (error) => {
return callback(error);
})
.on('end', () => {
if(self.logger) self.logger.info("downloadArchive unarchived %s", options.fileDest);
return callback(null, options.fileDest);
})
.pipe(gunzip)
.pipe(fs.createWriteStream(options.fileDest))
})
.send();
With the new version of sdk, the accepted answer does not work - it does not wait for the object to be downloaded. The following code snippet will help with the new version:
// dependencies
const AWS = require('aws-sdk');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
var bucket = "TestBucket"
var key = "TestKey"
try {
const params = {
Bucket: Bucket,
Key: Key
};
var theObject = await s3.getObject(params).promise();
} catch (error) {
console.log(error);
return;
}
}
If you want to save memory and want to obtain each row as a json object, then you can use fast-csv to create readstream and can read each row as a json object as follows:
const csv = require('fast-csv');
const AWS = require('aws-sdk');
const credentials = new AWS.Credentials("ACCESSKEY", "SECRETEKEY", "SESSIONTOKEN");
AWS.config.update({
credentials: credentials, // credentials required for local execution
region: 'your_region'
});
const dynamoS3Bucket = new AWS.S3();
const stream = dynamoS3Bucket.getObject({ Bucket: 'your_bucket', Key: 'example.csv' }).createReadStream();
var parser = csv.fromStream(stream, { headers: true }).on("data", function (data) {
parser.pause(); //can pause reading using this at a particular row
parser.resume(); // to continue reading
console.log(data);
}).on("end", function () {
console.log('process finished');
});
var fileStream = fs.createWriteStream('/path/to/file.jpg');
var s3Stream = s3.getObject({Bucket: 'myBucket', Key: 'myImageFile.jpg'}).createReadStream();
// Listen for errors returned by the service
s3Stream.on('error', function(err) {
// NoSuchKey: The specified key does not exist
console.error(err);
});
s3Stream.pipe(fileStream).on('error', function(err) {
// capture any errors that occur when writing data to the file
console.error('File Stream:', err);
}).on('close', function() {
console.log('Done.');
});
Reference: https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/requests-using-stream-objects.html

Resources