Saving file to AWS S3 - node.js

I've been following some examples here and on Google of how to write a Nodejs Lambda function to fetch an (audio) file from a URL and save it to S3. I've arrived at this so far:
var AWS = require('aws-sdk');
var https = require('https');
var s3 = new AWS.S3();
const querystring = require('querystring');
exports.handler = function(event, context) {
const params = querystring.parse(event.body);
const audioUrl = params['audioUrl'];
https.get(audioUrl, function(res) {
var body = '';
res.on('data', function(chunk) {
// Agregates chunks
body += chunk;
});
res.on('end', function() {
// Once you received all chunks, send to S3
var params = {
Bucket: 'bucket_name',
Key: 'filename.wav',
Body: body
};
s3.putObject(params, function(err, data) {
if (err) {
console.error("ERROR: ", err, err.stack);
} else {
console.log("DATA:", data);
}
});
});
});
};
This results in file with filename filename.wav being saved to S3 bucket but the file is not a WAV audio file which is what I want. It seems that in the process of retrieving the file from audioUrl and saving it to S3 the format/encoding of the file is lost.
Any hints or solutions to this would be appreciated!
Thanks!

You're converting a binary file, into a string, that's why your uploaded file is corrupted. What you need to do is work with Buffer instead of string.
const chunks = [];
res.on('data', function(chunk) {
// Agregates chunks
chunks.push(chunk)
});
res.on('end', function() {
// Once you received all chunks, send to S3
var params = {
Bucket: 'bucket_name',
Key: 'filename.wav',
Body: Buffer.concat(chunks)
};
s3.putObject(params, function(err, data) {
if (err) {
console.error("ERROR: ", err, err.stack);
} else {
console.log("DATA:", data);
}
});
});
In any case, instead of buffering the chunks, is better to just pass the stream directly, and let the S3 SDK handle it.
var params = {
Bucket: 'bucket_name',
Key: 'filename.wav',
Body: res // pass the readable stream directly
};
s3.putObject(params, function(err, data) {});
The only caveat of passing a stream directly, is that the library can only work with streams that it can determine the length of. If that request sets the content-length correctly, you won't have any issue.
S3.putObject only accepts streams that it can determine the length of
streaming images doesnt work
You can bypass this limitation, by using s3.upload instead.
Note: Haven't worked with the S3 SDK in a while, but they didn't support pipe last time I used it, to do directly: res.pipe(s3.putObject())

Related

Download file from third party server and upload to S3

I have a Lambda Node function which is called by a webhook from a thirdparty server. The TP server sends a file download URL and some other data.
The download URL is temporary, so I need to push the file to an S3 for long term storage.
The rudimentary function below, downloads the file and then tries to upload to the S3.
This works when the file is a plain text, but images/pdfs etcs are corrupted when they reach the S3.
const AWS = require("aws-sdk");
const https = require('https');
const path = require('path');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
exports.handler = async (event, context, callback) => {
var payload = event.body;
const url_host = payload.host;
const url_path = payload.path; //URL of file which needs to be downloaded
const get_params = {
host: url_host,
path: url_path,
port: 443,
method: 'GET',
headers: { }
};
var resp = await https_get_processor(get_params); //File downloaded here
var uploadParams = {
Bucket: "bucket_name",
Key: '',
Body: resp //Preparing to upload the received file
};
uploadParams.Key = path.basename(url_path); //Generating filename
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
}
});
response = {...} //Generic Response
return response;
};
async function https_get_processor(get_params)
{
return await new Promise((resolve, reject) =>
{
var data = "";
const req = https.request(get_params, res => {
res.on('data', chunk => { data += chunk })
res.on('end', () =>
{
resolve(data);
})
});
req.on('error', (err) => {
reject(err);
});
req.end();
});
}
Response is a Buffer in such case, so try changing request processing by pushing each chunk into an array, and then merge Buffer chunks and pass them.
Try this:
var data = [];
const req = https.request(get_params, res => {
res.on('data', chunk => data.push(chunk))
res.on('end', () =>
{
resolve(Buffer.concat(data));
})

Trying to send readable stream (audio) to amazon s3 - runtime error

I am trying to write a function that takes the mp3 url of the recording and then uploads that to S3. However, I keep getting a runtime error and the callback is never reached. If I move the callback below s3.upload(...) then the statement "attempting to upload mp3 is never logged.
exports.handler = function(context, event, callback) {
const twiml = new Twilio.twiml.VoiceResponse();
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
var getUri = require('get-uri');
AWS.config.update({
accessKeyId: "...",
secretAccessKey: "..."
});
var client = context.getTwilioClient();
const recording_id = event.RecordingSid;
const uri = event.RecordingUrl + ".mp3";
getUri(uri, function (err, rs) {
if (err) {
console.log(err.message);
throw err;
}
var params = {
ACL: "public-read",
Body: rs,
Bucket: "...",
Key: "audio.mp3",
ContentType: 'audio/mp3'
};
s3.upload(params, function(err,data) {
console.log("attempting to upload mp3");
if (err) {
console.log("there is an error");
console.log(err.status);
throw err.message;
}
else {
console.log("Your upload has been successful.");
}
callback(null, twiml);
});
});
console.log("at the end");
};
Is there any other way to access the recording and put them in my public s3 bucket? Why is this never executing s3.upload(...).
Any insights into this is helpful! Thanks in advance!
app.get('/uploadsong',function(req,res){
console.log("Hi there")
var URI = 'http://sensongsmp3download.info/Kaala%20(2018)%20-%20Sensongsmp3.info/Thanga%20Sela%20--%20Sensongsmp3.Info.mp3';
var buffer = [];
request
.get(URI)
.on('error', function(err) {
console.log("error")
}).on('data',function(data){
buffer.push(data);
}).on('end',function(){
var completeSong = Buffer.concat(buffer);
var data = {
Body:completeSong,
Key: 'sample.mp3',
ContentType: 'audio/mp3'
}
s3Bucket.putObject(data, function(err, data){
if (err)
{
console.log('Error uploading data: ', data);
} else
{
console.log('upload successfull')
res.send('done');
}
})
})
})
here are the modules i have used
var request = require('request');
I contacted Twilio regarding this they responded that Twilio Functions have a strict 5 second time-out and the upload from the Twilio Function to S3 Bucket takes more than 5 seconds. My workaround was sending a string with all mp3 URLs separated by comma and a space. The lambda function would then parse through all the links and store all links in an array which would be used for audio playback.

NodeJS - reading Image Binary File

I am working on the API for store a file uploaded by the user.
function uploadPhoto(req, res) {
var imagedata = new Buffer('');
req.body.on('data', function (chunk) {
imagedata = Buffer.concat([imagedata, chunk]);
});
req.body.on('end', function (chunk) {
fs.writeFile('success.jpeg', imagedata, 'binary', function (err) {
if (err) throw err
console.log('File saved.')
})
});
}
There is an error when I used req.body.on('data').
The data from postman
When I print out the value of req.body with console.log("message: "+req.body), it was string and has value:
I tried to write to file by using Buffer like this
var writeFile = function (type, cb) {
var data = new Buffer(req.body, type);
fs.writeFile(type + '.jpeg', data, type, function (err) {
cb(null, data.length);
});
}
async.parallel([
writeFile.bind(null, 'binary'),
writeFile.bind(null, 'utf8'),
writeFile.bind(null, 'ascii'),
writeFile.bind(null, 'ucs2'),
writeFile.bind(null, 'base64')
], function (err, results) {
response.status(200).send({});
})
This will create some jpeg files with different size but can't read them as an image.
How can I store this image from the user?
Thank you very much.
This looks like a good case for streams.
function uploadPhoto(req, res) {
var file = fs.createWriteStream(__dirname + '/success.jpeg')
req.pipe(file).on('error', function(err) { console.log(err) })
}
Headers could also help determine what file type and character encoding it has.
var file = fs.createWriteStream(__dirname + '/success.jpeg', {defaultEncoding: req.headers.encoding || 'utf8'})

aws lambda how to store an image retrieved via https in S3

I am trying to write a lambda script that can pull an image from a site and store it in S3. The problem I'm having is what kind of object to pass as the Body attribute into the S3.putObject method. In the documentation here it says it should be either new Buffer('...') || 'STRING_VALUE' || streamObject, but I'm not sure how to convert the https response into one of those. Here is what I've tried:
var AWS = require('aws-sdk');
var https = require('https');
var Readable = require('stream').Readable;
var s3 = new AWS.S3();
var fs = require('fs');
var url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png';
exports.handler = function(event, context) {
https.get(url, function(response) {
var params = {
Bucket: 'example',
Key: 'aws-logo.png',
Body: response // fs.createReadStream(response); doesn't work, arg should be a path to a file...
// just putting response errors out with "Cannot determine length of [object Object]"
};
s3.putObject(params, function(err, data) {
if (err) {
console.error(err, err.stack);
} else {
console.log(data);
}
});
});
};
As indicated in the comments, Lambda allows to save files in /tmp. But you don't really need it...
response does not contain the content of the file, but the http response (with its status code and headers).
You could try something like this:
var AWS = require('aws-sdk');
var https = require('https');
var s3 = new AWS.S3();
var url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png';
exports.handler = function(event, context) {
https.get(url, function(res) {
var body = '';
res.on('data', function(chunk) {
// Agregates chunks
body += chunk;
});
res.on('end', function() {
// Once you received all chunks, send to S3
var params = {
Bucket: 'example',
Key: 'aws-logo.png',
Body: body
};
s3.putObject(params, function(err, data) {
if (err) {
console.error(err, err.stack);
} else {
console.log(data);
}
});
});
});
};
try this package https://www.npmjs.com/package/request
var request = require('request');
exports.handler = function (event, context) {
s3.putObject({
Bucket: 'example',
Key: 'aws-logo.png',
Body: request.get(url, {followRedirect: false})
}, function (err, data) {
if (err) console.error(err, err.stack);
else console.log(data);
})
}

Read file from aws s3 bucket using node fs

I am attempting to read a file that is in a aws s3 bucket using
fs.readFile(file, function (err, contents) {
var myLines = contents.Body.toString().split('\n')
})
I've been able to download and upload a file using the node aws-sdk, but I am at a loss as to how to simply read it and parse the contents.
Here is an example of how I am reading the file from s3:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myKey.csv'}
var s3file = s3.getObject(params)
You have a couple options. You can include a callback as a second argument, which will be invoked with any error message and the object. This example is straight from the AWS documentation:
s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
Alternatively, you can convert the output to a stream. There's also an example in the AWS documentation:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
This will do it:
new AWS.S3().getObject({ Bucket: this.awsBucketName, Key: keyName }, function(err, data)
{
if (!err)
console.log(data.Body.toString());
});
Since you seem to want to process an S3 text file line-by-line. Here is a Node version that uses the standard readline module and AWS' createReadStream()
const readline = require('readline');
const rl = readline.createInterface({
input: s3.getObject(params).createReadStream()
});
rl.on('line', function(line) {
console.log(line);
})
.on('close', function() {
});
If you are looking to avoid the callbacks you can take advantage of the sdk .promise() function like this:
const s3 = new AWS.S3();
const params = {Bucket: 'myBucket', Key: 'myKey.csv'}
const response = await s3.getObject(params).promise() // await the promise
const fileContent = response.Body.toString('utf-8'); // can also do 'base64' here if desired
I'm sure the other ways mentioned here have their advantages but this works great for me. Sourced from this thread (see the last response from AWS): https://forums.aws.amazon.com/thread.jspa?threadID=116788
here is the example which i used to retrive and parse json data from s3.
var params = {Bucket: BUCKET_NAME, Key: KEY_NAME};
new AWS.S3().getObject(params, function(err, json_data)
{
if (!err) {
var json = JSON.parse(new Buffer(json_data.Body).toString("utf8"));
// PROCESS JSON DATA
......
}
});
I couldn't figure why yet, but the createReadStream/pipe approach didn't work for me. I was trying to download a large CSV file (300MB+) and I got duplicated lines. It seemed a random issue. The final file size varied in each attempt to download it.
I ended up using another way, based on AWS JS SDK examples:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).
on('httpData', function(chunk) { file.write(chunk); }).
on('httpDone', function() { file.end(); }).
send();
This way, it worked like a charm.
I prefer Buffer.from(data.Body).toString('utf8'). It supports encoding parameters. With other AWS services (ex. Kinesis Streams) someone may want to replace 'utf8' encoding with 'base64'.
new AWS.S3().getObject(
{ Bucket: this.awsBucketName, Key: keyName },
function(err, data) {
if (!err) {
const body = Buffer.from(data.Body).toString('utf8');
console.log(body);
}
}
);
I had exactly the same issue when downloading from S3 very large files.
The example solution from AWS docs just does not work:
var file = fs.createWriteStream(options.filePath);
file.on('close', function(){
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
});
s3.getObject({ Key: documentKey }).createReadStream().on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
}).pipe(file);
While this solution will work:
var file = fs.createWriteStream(options.filePath);
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%#", options.fileName, error);
return callback(error);
})
.on('httpData', function(chunk) { file.write(chunk); })
.on('httpDone', function() {
file.end();
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
})
.send();
The createReadStream attempt just does not fire the end, close or error callback for some reason. See here about this.
I'm using that solution also for writing down archives to gzip, since the first one (AWS example) does not work in this case either:
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( options.filePath );
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function (error) {
if(self.logger) self.logger.error("%#",error);
return callback(error);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
if(self.logger) self.logger.info("downloadArchive downloaded %s", options.filePath);
fs.createReadStream( options.filePath )
.on('error', (error) => {
return callback(error);
})
.on('end', () => {
if(self.logger) self.logger.info("downloadArchive unarchived %s", options.fileDest);
return callback(null, options.fileDest);
})
.pipe(gunzip)
.pipe(fs.createWriteStream(options.fileDest))
})
.send();
With the new version of sdk, the accepted answer does not work - it does not wait for the object to be downloaded. The following code snippet will help with the new version:
// dependencies
const AWS = require('aws-sdk');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
var bucket = "TestBucket"
var key = "TestKey"
try {
const params = {
Bucket: Bucket,
Key: Key
};
var theObject = await s3.getObject(params).promise();
} catch (error) {
console.log(error);
return;
}
}
If you want to save memory and want to obtain each row as a json object, then you can use fast-csv to create readstream and can read each row as a json object as follows:
const csv = require('fast-csv');
const AWS = require('aws-sdk');
const credentials = new AWS.Credentials("ACCESSKEY", "SECRETEKEY", "SESSIONTOKEN");
AWS.config.update({
credentials: credentials, // credentials required for local execution
region: 'your_region'
});
const dynamoS3Bucket = new AWS.S3();
const stream = dynamoS3Bucket.getObject({ Bucket: 'your_bucket', Key: 'example.csv' }).createReadStream();
var parser = csv.fromStream(stream, { headers: true }).on("data", function (data) {
parser.pause(); //can pause reading using this at a particular row
parser.resume(); // to continue reading
console.log(data);
}).on("end", function () {
console.log('process finished');
});
var fileStream = fs.createWriteStream('/path/to/file.jpg');
var s3Stream = s3.getObject({Bucket: 'myBucket', Key: 'myImageFile.jpg'}).createReadStream();
// Listen for errors returned by the service
s3Stream.on('error', function(err) {
// NoSuchKey: The specified key does not exist
console.error(err);
});
s3Stream.pipe(fileStream).on('error', function(err) {
// capture any errors that occur when writing data to the file
console.error('File Stream:', err);
}).on('close', function() {
console.log('Done.');
});
Reference: https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/requests-using-stream-objects.html

Resources