Javascript AWS SDK S3 upload method with Body stream generating empty file - node.js

I'm trying to use the method upload from s3 using a ReadableStream from the module fs.
The documentation says that a ReadableStream can be used at Bodyparam:
Body — (Buffer, Typed Array, Blob, String, ReadableStream) Object data.
Also the upload method description is:
Uploads an arbitrarily sized buffer, blob, or stream, using intelligent concurrent handling of parts if the payload is large enough.
Also, here: Upload pdf generated to AWS S3 using nodejs aws sdk the #shivendra says he can use a ReadableStream and it works.
This is my code:
const fs = require('fs')
const S3 = require('aws-sdk/clients/s3')
const s3 = new S3()
const send = async () => {
const rs = fs.createReadStream('/home/osman/Downloads/input.txt')
rs.on('open', () => {
console.log('OPEN')
})
rs.on('end', () => {
console.log('END')
})
rs.on('close', () => {
console.log('CLOSE')
})
rs.on('data', (chunk) => {
console.log('DATA: ', chunk)
})
console.log('START UPLOAD')
const response = await s3.upload({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: rs,
}).promise()
console.log('response:')
console.log(response)
}
send().catch(err => { console.log(err) })
It's getting this output:
START UPLOAD
OPEN
DATA: <Buffer 73 6f 6d 65 74 68 69 6e 67>
END
CLOSE
response:
{ ETag: '"d41d8cd98f00b204e9800998ecf8427e"',
Location: 'https://test-bucket.s3.amazonaws.com/output.txt',
key: 'output.txt',
Key: 'output.txt',
Bucket: 'test-bucket' }
The problem is that my file generated at S3 (output.txt) has 0 Bytes.
Someone know what am I doing wrong?
If I pass a buffer on Body it works.
Body: Buffer.alloc(8 * 1024 * 1024, 'something'),
But it's not what I want to do. I'd like to do this using a stream to generate a file and pipe a stream to S3 as long as I generate it.

It's an API interface issue using NodeJS ReadableStreams.
Just comment the code related to listen event 'data', solves the problem.
const fs = require('fs')
const S3 = require('aws-sdk/clients/s3')
const s3 = new S3()
const send = async () => {
const rs = fs.createReadStream('/home/osman/Downloads/input.txt')
rs.on('open', () => {
console.log('OPEN')
})
rs.on('end', () => {
console.log('END')
})
rs.on('close', () => {
console.log('CLOSE')
})
// rs.on('data', (chunk) => {
// console.log('DATA: ', chunk)
// })
console.log('START UPLOAD')
const response = await s3.upload({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: rs,
}).promise()
console.log('response:')
console.log(response)
}
send().catch(err => { console.log(err) })
Though it's an strange API, when we listen to 'data' event, the ReadableStream starts the flowing mode (listening to an event changing publisher/EventEmitter state? Yes, very error prone...). For some reason the S3 need a paused ReadableStream. If whe put rs.on('data'...) after await s3.upload(...) it works. If we put rs.pause() after rs.on('data'...) and befote await s3.upload(...), it works too.
Now, what does it happen? I don't know yet...
But the problem was solved, even it isn't completely explained.

Check if file /home/osman/Downloads/input.txt actually exists and accessible by node.js process
Consider to use putObject method
Example:
const fs = require('fs');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
s3.putObject({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: fs.createReadStream('/home/osman/Downloads/input.txt'),
}, (err, response) => {
if (err) {
throw err;
}
console.log('response:')
console.log(response)
});
Not sure how this will work with async .. await, better to make upload to AWS:S3 work first, then change the flow.
UPDATE:
Try to implement upload directly via ManagedUpload
const fs = require('fs');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
const upload = new S3.ManagedUpload({
service: s3,
params: {
Bucket: 'test-bucket',
Key: 'output.txt',
Body: fs.createReadStream('/home/osman/Downloads/input.txt')
}
});
upload.send((err, response) => {
if (err) {
throw err;
}
console.log('response:')
console.log(response)
});

Related

How do I upload a large Audio file longer than 30sec direct from the browser to AwS S3?

I would like to save audio recording to S3. I am using the functions below to load direct to awsS3 direct from the browser. It works for short audio recordings of up to around 25 seconds but fails for larger files.
Currently the functions is as follows: I speak into the microphone using recorder.js. Once the recording is complete I press stop which then saves the file to AWS
From the browser:
getSignedRequest(file,fileLoc);
function getFetchSignedRequest(file,fileLoc){
const fetchUrl = `/xxxxxxxxx?file-name=${file.name}&file-type=${file.type}&fileLoc=${fileLoc}`;
fetch(fetchUrl )
.then((response) => {
console.log('response',response)
if(!response.ok){
console.log('Network response was not OK',response.ok)
} else {
putAudioFetchFile(file, response.signedRequest, response.url)
}
})
.catch((error) => {
console.error('Could not get signed URL:', error);
})
}
This send a get request to the NodeJs server which calls this :
const aws = require('aws-sdk');
const fs = require('fs');
aws.config.region = 'xxxxxx';
const S3_BUCKET = process.env.AWS_S3_BUCKET
this.uploadToAWSDrive =
async function uploadToAWSDrive(req,res){
const s3 = new aws.S3();
const URL_EXPIRATION_SECONDS = 3000;
const subFolderName = req.query['fileLoc'];
const fileName = req.query['file-name'];
const fileType = req.query['file-type'];
const fileLocName = subFolderName + fileName;
const s3Params = {
Bucket: S3_BUCKET,
Key: fileLocName,
Expires: URL_EXPIRATION_SECONDS,
ContentType: fileType,
ACL: 'public-read'
};
await s3.getSignedUrl('putObject', s3Params, (err, data) => {
if(err){
console.log(err);
return res.end();
}
const returnData = {
signedRequest: data,
url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileLocName}`
};
console.log('audio uploaded',returnData)
res.write(JSON.stringify(returnData));
res.end();
});
}
Which then calls this:
function uploadFile(file, signedRequest, url){
const xhr = new XMLHttpRequest();
xhr.open('PUT', signedRequest);
xhr.onreadystatechange = () => {
if(xhr.readyState === 4){
if(xhr.status === 200){
console.log('destination url= ', url,xhr.readyState,xhr.status)
}
else{
alert('Could not upload file.');
}
}
};
xhr.send(file);
}
This then sends the file to the awsS3 server. Ok for audio less than 30secs, but fails for longer audio files.
What do I need to do to enable this to work with audio files of greater than 20secs and upto 3 mins?
Any help most appreciated
Not very elegant but the issue was resolved by adding a timer to the origanal function call. A function that followed also needed to be delayed to I think allow processor time. I am sure there will be better ways to do this.
setTimeout( getSignedRequest( myAudioFile,fileLoc), proccessTime) ;

Reading multiple files and uploading to AWS S3

Requirement:
I have multiple files in a folder on my express server. I pass these file names as an API call, the backend function needs to read all these files, upload them to AWS S3 and then return an array of public URLs.
const s3 = new aws.S3();
const fs = require("fs");
module.exports = {
upload: async function (req, res, next) {
console.log("Inside upload controller");
let publicUrls = [];
const result = await Promise.all(
req.body.files.map(async (uploadFile) => {
fs.promises.readFile(uploadFile).then((file) => {
let body = fs.createReadStream(uploadFile);
const s3PutParams = {
Bucket: process.env.S3_BUCKET_NAME,
Key: uploadFile.substr(15),
Body: body,
ACL: "public-read",
};
s3.upload(s3PutParams)
.promise()
.then((response) => {
console.log(response.Location);
publicUrls.push(response.Location);
});
});
})
);
if (result) {
console.log("Result", result);
res.json(publicUrls);
}
},
};
Observed Output:
Inside upload controller
Result [ undefined, undefined, undefined, undefined ]
https://xxx.s3.amazonaws.com/2_30062022.pdf
https://xxx.s3.amazonaws.com/1_30062022.pdf
https://xxx.s3.amazonaws.com/1_30062022.pdf
https://xxx.s3.amazonaws.com/2_30062022.pdf
I am passing an array of 4 file names, hence 4 "undefined" while logging "result"
Issue:
The code is not awaiting for the Promise.all to be completed.
It right away returns the json response, which is an empty array at that point.
How can this be resolved?
Solved by referring to NodeJS write file to AWS S3 - Promise.All with async/await not waiting

How to pipe a s3 getSignedUrl

I'm trying to pipe a signed url of an image I got stored in a bucket in S3.
When using regular "getObject" method I can do it like this
app.get("/images/:key", (req, res) => {
const key = req.params.key;
const downloadParams = {
Key: key,
Bucket: bucketName,
};
const readStream = s3.getObject(downloadParams).createReadStream();
readStream.pipe(res);
});
But when I try with getSignedUrlPromise, I can't use the createReadStream method because it says it's not a function.
const readStreamSigned = await s3
.getSignedUrlPromise("getObject", downloadParams).createdReadStream // throws createReadStream is not a function
readStreamSigned.pipe(res)
How can I achieve that with getSignedUrl or getSignedUrlPromise?
Found a solution! inspired by this answer https://stackoverflow.com/a/65976684/4179240 in a high level what we want to do is this:
Get the key of the item to search.
Pass it with the params to getSignedUrlPromises.
Get the generated url from the promise.
Pipe the result from the callback of the get().
I ended up doing it like this
app.get("/images/:key", async (req, res) => {
const key = req.params.key;
const downloadParams = {
Key: key,
Bucket: bucketName,
};
const url = await s3.getSignedUrlPromise("getObject", downloadParams);
https.get(readStream, (stream) => {
stream.pipe(res);
});
});
If you find a better way let me know! 😃

Use Cloud Function download a JSON file from URL then upload to a Cloud Storage bucket, status 200 but JSON file uploaded is only 20 bytes and empty

I'm trying to use the cloud function to download a JSON file from here: http://jsonplaceholder.typicode.com/posts? then upload it to Cloud Storage bucket.
Log of function execution seems fine, the status returns 200. However, the JSON file uploaded to the bucket is only 20 Bytes and it is empty (while the original file is ~27 KB)
So please help me if I missed something, there is code and logs:
index.js
const {Storage} = require('#google-cloud/storage');
exports.writeToBucket = (req, res) => {
const http = require('http');
const fs = require('fs');
const file = fs.createWriteStream("/tmp/post.json");
const request = http.get("http://jsonplaceholder.typicode.com/posts?", function(response) {
response.pipe(file);
});
console.log('file downloaded');
// Imports the Google Cloud client library
const {Storage} = require('#google-cloud/storage');
// Creates a client
const storage = new Storage();
const bucketName = 'tft-test-48c87.appspot.com';
const filename = '/tmp/post.json';
// Uploads a local file to the bucket
storage.bucket(bucketName).upload(filename, {
gzip: true,
metadata: {
cacheControl: 'no-cache',
},
});
res.status(200).send(`${filename} uploaded to ${bucketName}.`);
};
package.json
{
"name": "sample-http",
"version": "0.0.1",
"dependencies": {
"#google-cloud/storage": "^3.0.3"
}
}
Result:
Log:
As pointed by #DazWilkin, there are issues with asynchronous code. You must wait for onfinish() to trigger and then proceed. Also the upload() method returns a promise too. Try refactoring your function in async-await syntax as shown below:
exports.writeToBucket = async (req, res) => {
const http = require('http');
const fs = require('fs');
// Imports the Google Cloud client library
const {Storage} = require('#google-cloud/storage');
// Creates a client
const storage = new Storage();
const bucketName = 'tft-test-48c87.appspot.com';
const filename = '/tmp/post.json';
await downloadJson()
// Uploads a local file to the bucket
await storage.bucket(bucketName).upload(filename, {
gzip: true,
metadata: {
cacheControl: 'no-cache',
},
});
res.status(200).send(`${filename} uploaded to ${bucketName}.`);
}
const downloadJson = async () => {
const Axios = require('axios')
const fs = require("fs")
const writer = fs.createWriteStream("/tmp/post.json")
const response = await Axios({
url: "http://jsonplaceholder.typicode.com/posts",
method: 'GET',
responseType: 'stream'
})
response.data.pipe(writer)
return new Promise((resolve, reject) => {
writer.on('finish', resolve)
writer.on('error', reject)
})
}
This example uses Axios but you can do the same with http.
Do note that you can directly upload the fetched JSON as a file like this:
exports.writeToBucket = async (req, res) => {
const Axios = require("axios");
const { Storage } = require("#google-cloud/storage");
const storage = new Storage();
const bucketName = "tft-test-48c87.appspot.com";
const filename = "/tmp/post.json";
const { data } = await Axios.get("http://jsonplaceholder.typicode.com/posts");
const file = storage.bucket(bucketName).file("file.json");
const contents = JSON.stringify(data);
await file.save(contents);
res.status(200).send(`${filename} uploaded to ${bucketName}.`);
};
You can read more about the save() method in the documentation.
I don't write much NodeJS but I think your issue is with async code.
You create the stream and then issue the http.get but you don't block on the callback (piping the file) completing before you start the GCS upload.
You may want to attach an .on("finish", () => {...}) to the pipe and in that callback, upload the file to GCS.
NOTE IIRC GCS has a method that will let you write a stream directly from memory rather than going through a file.
NOTE if you pull the storage object up into the global namespace, it will only be created whenever the instance is created and not every time the function is invoked.
You don't need a write stream to get the URL data, fetch the URL, await the response to resolve, call the appropriate response.toJson() method.
Personally, I prefer to use Fetch and Axios over http as they are cleaner to work with. But with Nodes http you can do the following:
https.get(url,(res) => {
let body = "";
res.on("data", (chunk) => {
body += chunk;
});
res.on("end", () => {
try {
let json = JSON.parse(body);
// do something with JSON
} catch (error) {
console.error(error.message);
};
});
}).on("error", (error) => {
console.error(error.message);
});
Once you have that, you can pass it directly to a storage method as a data blob or byte array.
byte[] byteArray = resultJson.toString().getBytes("UTF-8");

Serverless lambda trigger read json file

I have lambda (Node) which has trigger to fire when a new JSON file added to our S3 bucket. Here is my lambda code
module.exports.bookInfo = (event, context) => {
console.log('Events ', JSON.stringify(event));
event.Records.forEach((record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
let logMsg = [];
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
console.log(s3File)
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
try {
s3.getObject({
Bucket: bucketname,
Key: filename
}, function(err, data) {
logMsg.push('Data is ', JSON.stringify(data.Body))
if (err) {
logMsg.push('Generate Error :', err);
console.log(logMsg)
return null;
}
logMsg.push(`File downloaded successfully. Processing started for ${s3File}`);
logMsg.push('Data is ', JSON.stringify(data.Body))
});
} catch (e) {console.log(e)}
});
}
When i run this, i don't get file content and i suspect that lambda finishes execution before file read operation complete. I tried with async await without success. What i am missing here ? I was able to read small file of 1 kb but when my file grows like 100 MB, it causes issue.
Thanks in advance
I was able to do it through async/await. Here is my code
module.exports.bookInfo = (event, context) => {
event.Records.forEach(async(record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
let response = await s3.getObject({
Bucket: bucketname,
Key: filename
}).promise();
})
}

Resources