The s3 folder contains large number of files(eg 1 million file), each file contains only one json record compressed with gz format.
how to read all data/files one by one.
we have tried
const AWS = require('aws-sdk');
var s3 = new AWS.S3();
var LineStream = require('byline').LineStream;
var params = {
Bucket: 'xxxx',
params: 'xxxxx'
};
s3.headObject(params, function (err, data) {
var stream = s3.getObject(params).createReadStream();
stream.on('data', (data) => {
console.log('Served by Amazon S3: ' + data);
});
});
any other approach is also fine
thanks in advance
Sundar
Related
I am able to download the file from s3 bucket like so:
const fileStream = s3.getObject(options).createReadStream();
const writableStream = createWriteStream(
"./files/master_driver_profile_pic/image.jpeg"
);
fileStream.pipe(fileStream).pipe(writableStream);
But the image is not getting written properly. Only a little bit of the image is visible and the rest is blank.
I think you should first createWriteStream and then createReadStream. (Check the docs)
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
OR
you can go without streams:
// Download file
let content = await (await s3.getObject(params).promise()).Body;
// Write file
fs.writeFile(downloadPath, content, (err) => {
if (err) { console.log(err); }
});
I would like to read the content of a .txt file stored within an s3 bucket.
I tried :
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'My-Bucket', Key: 'MyFile.txt'};
var s3file = s3.getObject(params)
But the s3file object that i get does not contain the content of the file.
Do you have an idea on what to do ?
Agree with zishone and here is the code with exception handling:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'My-Bucket', Key: 'MyFile.txt'};
s3.getObject(params , function (err, data) {
if (err) {
console.log(err);
} else {
console.log(data.Body.toString());
}
})
According to the docs the contents of your file will be in the Body field of the result and it will be a Buffer.
And another problem there is that s3.getObject( should have a callback.
s3.getObject(params, (err, s3file) => {
const text = s3file.Body.toString();
})
How can I upload single file to multiple path on aws-s3 using nodeJS?
Problem: I have 1 images file: images.jpg. And now, I want to upload this file to aws-s3 with different path.
Uploading file to different directory in s3, Give your accesskeyid and secretaccesskey,
const fs = require('fs'),
AWS = require('aws-sdk'),
{promisify} = require('util')
const uploadFile =async (file_path, folder) => {
try {
var s3 = new AWS.S3({
accessKeyId: '',
secretAccessKey: ''
});
//reading the file and converting it into buffer
const readFile = promisify(fs.readFile).bind(fs);
const data =await readFile(file_path)
if (!data) throw "reading file failed";
const params = {
Bucket: 'testbucketnayan', // bucket name
Key: `${folder}/file.js`,
Body: data.toString()
};
const upload = promisify(s3.upload).bind(s3);
const up_data = await upload(params)
if (!up_data) throw "Upload failed"
console.log(up_data)
} catch(err) {
console.log(err);
}
}
The upload location is having the directory names. Where the file will get uploaded. looping through the array and uploading the file, you can use Promise.all, it will increase the performance.
const upload_location = ['abc', 'def'];
(async () => {
for (var folder of upload_location)
await uploadFile('resposetwi.js', folder);
})()
Please ask your question with the code you tried, it makes it easier to understand where is the issue.
So I'm trying to retrieve an mp3 audio file (approximately 9 MB) from s3, pipe the data to a write stream, and then upload to another destination s3 bucket using a readStream from the /tmp/ file. This is a lambda function that receives an s3 upload event and attempts to write the data from the created object to another bucket.
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
var sourceBucket = event.Records[0].s3.bucket.name;
var sourceKey = event.Records[0].s3.object.key;
var getParams = {
Bucket: sourceBucket,
Key: sourceKey
};
const inputFilename = '/tmp/' + sourceKey;
//writing and reading streams
const writeStream = fs.createWriteStream(inputFilename);
s3.getObject(getParams).createReadStream().pipe(writeStream);
var putParams = {
Body: fs.createReadStream(inputFilename),
Bucket: "example-destination-bucket",
Key: 'transfer-' + sourceKey
};
s3.upload(putParams, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log('logging data' + data); // successful response
});
};
This results in the key successfully being put to the s3 bucket, but the file uploaded is 0 bytes in size. Any idea why this may result in an empty upload?
The file needs to be downloaded, which takes some time, so you need to use the file.on('finish') call like this..
const writeStream = fs.createWriteStream(inputFilename);
s3.getObject(getParams).createReadStream().pipe(writeStream);
writeStream.on('finish', function() {
//upload to S3 code
}
Instead of writing a lambda to copy from one s3 bucket to another, why not set a replication rule on the source s3 bucket? It'll automatically copy over any files that get uploaded, and you can do it cross-account.
I want to concatenate the files uploaded on Amazon S3 server.
How can I do this.
Concatenation on local machine i can do using following code.
var fs = require('fs'),
files = fs.readdirSync('./files'),
clips = [],
stream,
currentfile,
dhh = fs.createWriteStream('./concatfile.mp3');
files.forEach(function (file) {
clips.push(file.substring(0, 6));
});
function main() {
if (!clips.length) {
dhh.end("Done");
return;
}
currentfile = './files/' + clips.shift() + '.mp3';
stream = fs.createReadStream(currentfile);
stream.pipe(dhh, {end: false});
stream.on("end", function() {
main();
});
}
main();
You can achieve what you want by breaking it into two steps:
Manipulating files on s3
Since s3 is a remote file storage, you can't run code on s3 server to do the operation locally (as #Andrey mentioned).
what you will need to do in your code is to fetch each input file, process them locally and upload the results back to s3. checkout the code examples from amazon:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'mp3-input1.mp3'};
var file = require('fs').createWriteStream('/path/to/input.mp3');
s3.getObject(params).createReadStream().pipe(file);
at this stage you'll run your concatenation code, and upload the results back:
var fs = require('fs');
var zlib = require('zlib');
var body = fs.createReadStream('bigfile.mp3').pipe(zlib.createGzip());
var s3obj = new AWS.S3({params: {Bucket: 'myBucket', Key: 'myKey'}});
s3obj.upload({Body: body}).
on('httpUploadProgress', function(evt) { console.log(evt); }).
send(function(err, data) { console.log(err, data) });
Merging two (or more) mp3 files
Since MP3 file include a header that specifies some information like bitrate, simply concatenating them together might introduce playback issues.
See: https://stackoverflow.com/a/5364985/1265980
what you want to use a tool to that. you can have one approach of saving your input mp3 files in tmp folder, and executing an external program like to change the bitrate, contcatenate files and fix the header.
alternatively you can use an library that allows you to use ffmpeg within node.js.
in their code example shown, you can see how their merge two files together within the node api.
ffmpeg('/path/to/part1.avi')
.input('/path/to/part2.avi')
.input('/path/to/part2.avi')
.on('error', function(err) {
console.log('An error occurred: ' + err.message);
})
.on('end', function() {
console.log('Merging finished !');
})
.mergeToFile('/path/to/merged.avi', '/path/to/tempDir');
Here's my quick take on the problem of downloading and processing S3 objects. My example is focused mostly on getting the data local and then processing it once it's all downloaded. I suggest you use one of the ffmpeg approaches mentioned above.
var RSVP = require('rsvp');
var s3 = new AWS.S3();
var bucket = '<your bucket name>';
var getFile = function(key, filePath) {
return new RSVP.Promise(function(resolve, reject) {
var file = require('fs').createWriteStream(filePath);
if(!file) {
reject('unable to open file');
}
s3.getObject({
Bucket: bucket,
Key: key
}).on('httpData', function(chunk) {
file.write(chunk);
}).on('httpDone', function() {
file.end();
resolve(filePath);
});
});
};
var tempFiles = ['<local temp filename 1>', '<local temp filename 2>'];
var keys = ['<s3 object key 1>', '<s3 object key 2>'];
var promises = [];
for(var i = 0; i < keys.length; ++i) {
var promise = getFile(keys[i], tempFiles[i]);
promises.push(promise);
}
RSVP.all(promises).then(function(data) {
//do something with your files
}).catch(function(error) {
//handle errors
});