How to save .wav to s3 bucket from URL in lambda function - node.js

I have a url for a .wav file. I'd like to save it to an S3 bucket from a Lambda function.
Do I have to download it first?
What's the best way to do this?
exports.handler = async (event) => {
// imports
const fs = require('fs');
const AWS = require('aws-sdk');
AWS.config.update({ region: process.env.REGION || 'us-east-1' })
const s3 = new AWS.S3();
// get URL
const body = parseBody(event['body']);
const url = body.url;
// download file?
// HOW TO DO THIS using async?
const file_name = magic_save(url)
// upload to S3
var bucketName = `some_bucket`;
var keyName = 'audio.wav';
const fileContent = fs.readFileSync(file_name);
var params = { 'Bucket': bucketName, 'Key': keyName, 'Body': fileContent };
try {
console.log('saving...');
const data = await s3.putObject(params).promise();
console.log("Successfully saved object to " + bucketName + "/" + keyName);
} catch (err) {
console.log('err');
console.log(err);
};

Best will be to stream the file directly to s3 like this
const got = require("got");
const aws = require("aws-sdk");
const s3Client = new aws.S3();
const Bucket = 'somebucket';
const Key = "some/audio.wav";
exports.handler = async (event) => {
// get URL
const body = parseBody(event['body']);
const url = body.url;
const stream = got.stream(url);
const response = await s3Client.upload({Bucket, Key, Body: stream}).promise();
console.log(response);
};

Related

Is there any way to upload fluent-ffmpeg converted videos directly to s3 without storing them on local?

Is it possible to store ffmpeg output directly to s3 without downloading it in local or any other storage?
Below is my understanding of ffmpeg which converts format of video. I have done conversion part but i need to store it's output directly to s3 bucket so anyone have idea regarding this problem ?
const AWS = require('aws-sdk');
const fs = require('fs');
const ffmpeg = require('fluent-ffmpeg');
const axios = require('axios');
const s3 = new AWS.S3({
endpoint: 's3-ap-south-1.amazonaws.com', // Put you region
accessKeyId: S3_ACCESS_KEY_ID, // Put you accessKeyId
secretAccessKey: S3_ACCESS_SECRET_KEY, // Put you accessKeyId
Bucket: S3_BUCKET_NAME, // Put your bucket name
signatureVersion: 'v4',
region: 'ap-south-1' // Put you region
});
var params = {
Bucket: S3_BUCKET_NAME,
Delimiter: '',
Prefix: S3_STORE_PATH
};
s3.listObjects(params, function (err, data) {
if (err) throw err;
console.log(data);
data.Contents.forEach(function (obj, index) {
const file_name = obj.Key;
const type = "mp4";
console.log(obj.Key)
const url = s3.getSignedUrl('getObject', {
Bucket: S3_BUCKET_NAME,
Key: obj.Key,
Expires: signedUrlExpireSeconds
});
console.log("SIGNED URL= ", url);
const filename = file_name.split('.').slice(0, -1).join('.');
const localFileOutput = `${filename}.${type}`;
// const localFileOutput = `${bucket_url}${filename}.${type}`;
console.log(localFileOutput);
const key = `${filename}.${type}`;
const convert_video = async (req,res) => {
await new Promise((resolve, reject) => {
ffmpeg().input(url)
.toFormat('mp4')
.output(localFileOutput)
.on('end', async () => {
const params = {
Bucket: S3_BUCKET_NAME,
Key: key,
Body: localFileOutput
}
// const fileContent = await fs.readFileSync(localFileOutput);
await s3.putObject(params).promise();
resolve();
}).run();
});
// res.send("success")
}
convert_video();
});
});

In NodeJS, how to download files from S3

In ExpressJS, I would like to download files previously uploaded to an Amazon S3 bucket.
Here is my current route:
const express = require('express');
const AWS = require('aws-sdk');
const mammoth = require('mammoth');
const fs = require('fs').promises
const path = require('path')
const router = express.Router();
router.put('/:id/download', async (req, res, next) => {
console.log('hitting download route')
var id = req.params.id;
let upload = await Upload.query().findById( id ).eager('user');
console.log("file to download is: ", upload.name)
AWS.config.update({
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
});
const s3 = new AWS.S3();
// var fileStream = fs.createWriteStream('/tmp/file.docx');
// var s3Stream = s3.getObject(params).createReadStream();
const downloadFromS3 = async () => {
const params = {
Bucket: process.env.AWS_BUCKET,
Key: upload.file_url.split("com/").reverse()[0]
};
const { Body } = await s3.getObject(params).promise()
await fs.writeFile(`${ __dirname }/download.docx`, Body)
return Body
}
// mammoth.convertToHtml({ path: '/Users/dariusgoore/Downloads/1585930968750.docx' })
// .then(async function(result) {
// await Upload.query().findById( id )
// .patch({
// html: result.value,
// conversion_messages: result.messages
// })
// res.json(result);
// })
// .done();
res.send(downloadFromS3)
});
I get no errors, but the file is not created, or if I manually create the file, it remains empty.
If I've understood you correctly the issue is that you're not waiting for the file to be written to the local file system, you're returning it in the response via express.
Give this code a go.
const express = require('express')
const AWS = require('aws-sdk')
const mammoth = require('mammoth')
const fs = require('fs').promises
const path = require('path')
const router = express.Router()
const s3 = new AWS.S3()
AWS.config.update({
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
})
const downloadFromS3 = async (key, location) => {
const params = {
Bucket: process.env.AWS_BUCKET,
Key: key,
}
const { Body } = await s3.getObject(params).promise()
await fs.writeFile(location, Body)
return true
}
router.put('/:id/download', async (req, res, next) => {
console.log('hitting download route')
const upload = await Upload.query()
.findById(req.params.id)
.eager('user')
console.log('file to download is: ', upload.name)
const key = upload.file_url.split('com/').reverse()[0]
const location = `${__dirname}/${key}.docx`
await downloadFromS3(key, location)
res.send({ key, location })
})
import { S3 } from 'aws-sdk';
import fs from 'fs';
export default class S3Service {
s3: S3;
constructor() {
this.s3 = new S3({
apiVersion: *****,
region: ********
});
}
//Download File
async download(bucketName: string, keyName: string, localDest?: string): Promise<any> {
if (typeof localDest == 'undefined') {
localDest = keyName;
}
const params = {
Bucket: bucketName,
Key: keyName
};
console.log("params: ", params);
let writeStream = fs.createWriteStream(localDest);
return new Promise<any>((resolve, reject) => {
const readStream = this.s3.getObject(params).createReadStream();
// Error handling in read stream
readStream.on("error", (e) => {
console.error(e);
reject(e);
});
// Resolve only if we are done writing
writeStream.once('finish', () => {
resolve(keyName);
});
// pipe will automatically finish the write stream once done
readStream.pipe(writeStream);
});
}
}

Read excel file uploaded to s3 via node lambda function

I am trying to parse through an excel file that is uploaded to s3 using read-excel-file in a node lambda function that triggers on any s3 put. Here is my code which currently doesn't work. Can somebody tell me where I am going wrong?
const aws = require("aws-sdk");
const s3 = new aws.S3({ apiVersion: "2006-03-01" });
const readXlsxFile = require("read-excel-file/node");
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, " ")
);
const params = {
Bucket: bucket,
Key: key
};
try {
const doc = await s3.getObject(params);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};
I haven't used lambda functions, but I have done something very similar in firebase functions. I used convert-excel-to-json.
I first downloaded the excel file from firebase storage to the firebase functions machine. Then use this npm module to extract the information.
I don't have time to format the code, but I can leave it here for reference:
// Runs when excel file is uploaded to storage
exports.uploadOrder = functions.storage.object().onFinalize(async (file) => {
const fileBucket = file.bucket;
const filePath = file.name || "null";
const filePathList = filePath?.split("/") || ["null"];
const fileName = path.basename(filePath);
if (filePathList[0] !== "excel_orders") {
return;
}
const uid = filePathList[1];
console.log("User ID: " + uid);
const bucket = admin.storage().bucket(fileBucket);
const tempFilePath = path.join(os.tmpdir(), fileName);
console.log(tempFilePath);
await bucket.file(filePath).download({ destination: tempFilePath });
const result = excelToJson({
sourceFile: tempFilePath,
});
var ordersObj: any[] = result.Sheet1;
ordersObj.shift();
console.log(ordersObj);
var orders: any[] = [];
for (let i = 0; i < ordersObj.length; i++) {
const order: Order = {
package_description: ordersObj[i].A,
package_type: ordersObj[i].B,
country: ordersObj[i].C,
address: ordersObj[i].D,
curstomer_name: ordersObj[i].E,
customer_phone: ordersObj[i].F,
collection_ammount: ordersObj[i].G,
order_date: ordersObj[i].H,
delivery_date: ordersObj[i].I,
delivery_time: ordersObj[i].J,
status: "pending",
assignedTo: "",
merchantID: uid,
};
orders.push(order);
}
});

How to chain writeFile() and OCR with NodeJS in Google Cloud Functions?

The scenario is as follows:
From an Amazon S3 bucket a file is fetched, then it is stored in a temporary folder and then Object Character Recognition is to be performed using the API.
Unfortunately, this doesn't work, I think it's due to the asynchronous/synchronous execution, but I've already tried several variants with callbacks/promises and didn't get any further.
If someone can give me a hint on how to construct this scenario I would be grateful!
The current error is:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
/**
* Responds to any HTTP request.
*
* #param {!express:Request} req HTTP request context.
* #param {!express:Response} res HTTP response context.
*/
const AWS = require('aws-sdk');
const fs = require('fs').promises;
const Vision = require('#google-cloud/vision');
var os = require('os');
exports.helloWorld = async (req,res) => {
var bucket, fileName, fileUrl;
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
req.on('end', () => {
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
var s3 = new AWS.S3();
s3.getObject({
Bucket: bucket,
Key: fileName
},
async function(error, data) {
if (error != null) {
console.log("Failed to retrieve an object: " + error);
} else {
console.log("Loaded " + data.ContentType + " bytes");
var tmpdir = os.tmpdir();
var dir = tmpdir+'/'+fileName;
try{
await fs.writeFile(dir,data);
const vision = new Vision.ImageAnnotatorClient();
let text;
await vision
.textDetection('/tmp/' + fileName)
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
console.log(1);
text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
console.log(1);
console.log(text);
resolve("Finished ocr successfully");
})
.catch(error =>{
console.log(error);
reject("Error with OCR");
})
}catch(error){
console.log(error);
}
}
},
);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
});
};
You're getting that error, because you're running on an older version of Node (< 10.0.0), where fs.promises is not available. That's why fs is undefined, and you're getting:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
Either use a newer version, or just promisify the code.
const { promisify } = require('util');
const fs = require('fs');
// const fs = require('fs').promises
const writeFile = promisify(fs.writeFile);
And now use writeFile instead of fs.writeFile in your code.
Aside from that, there are a few issues with your code.
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
data is not defined anywhere, and it doesn't make sense to push data into an array and then running JSON.parse on that array, given the next few lines.
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
Furthermore, JSON.parse should be called only once, instead of parsing the same string (which is an array in your code, and will yield an error) 3 times.
const values = JSON.parse(body); // should be body instead of data with the posted code
bucket = values.value1;
fileName = values.value2;
fileUrl = values.value3;
This can be improved greatly by just posting bucket, fileName & fileUrl in the JSON instead of valueN.
const { bucket, fileName, fileUrl } = JSON.parse(body);
The whole code can be rewritten into:
const AWS = require('aws-sdk');
const { promisify } = require('util');
const fs = require('fs');
const Vision = require('#google-cloud/vision');
const os = require('os');
const path = require('path');
const writeFile = promisify(fs.writeFile);
exports.helloWorld = async (req,res) => {
let body = '';
req.on('data', chunk => {
body += chunk.toString();
});
req.on('end', async() => {
// post { "bucket": "x", "fileName": "x", "fileUrl": "x" }
const { bucket, fileName, fileUrl } = JSON.parse(body);
var s3 = new AWS.S3();
try {
const data = await s3.getObject({
Bucket: bucket,
Key: fileName
}).promise();
const tmpdir = os.tmpdir();
const filePath = path.join(tmpdir, fileName)
await writeFile(filePath, data);
const vision = new Vision.ImageAnnotatorClient();
const [detections] = await vision.textDetection(filePath)
const annotation = detections.textAnnotations[0];
const text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
} catch(e) {
console.error(e);
res.status(500).send(e.message);
}
});
};
NOTE: I don't know if Vision API works like this, but I used the same logic and parameters that you're using.

stream the contents of an S3 object into hash algorithm node.js

I'm new to node.js and I'm trying to write a AWS lambda function that would stream the content of an s3 object into the node's crypto module to create a md5 checksum value of the s3 object. Not sure why but everytime I run the code it would generate different hash values on the console.log. can anyone point me in the right direction to fix my code? appreciate the help!
var crypto = require('crypto');
var fs = require('fs');
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
var params = {
Bucket: 'bucket_name',
Key: 'key',
};
var hash = crypto.createHash('md5');
var stream = s3.getObject(params, function(err, data) {
if (err){
console.log(err);
return;
}
}).createReadStream();
stream.on('data', function (data) {
hash.update(data, 'utf-8')
})
stream.on('end', function () {
console.log(hash.digest('hex'))
})
};
You were close. You are mixing the "callback" style method signature with a "createReadStream" signature. Try this:
const crypto = require('crypto');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
let params = {
Bucket: 'bucket_name',
Key: 'key',
};
let hash = crypto.createHash('md5');
let stream = s3.getObject(params).createReadStream();
stream.on('data', (data) => {
hash.update(data);
});
stream.on('end', () => {
let digest = hash.digest('hex');
console.log(digest);
callback(null, digest);
});
};
Not directly an answer, but you can also add the md5 has as a ETag when uploading a file to S3.
const crypt = require('crypto');
const fs = require('fs').promises;
const aws = require('aws-sdk');
async function uploadFileToS3WithMd5Hash(bucket, filename, s3Key = null) {
const data = await fs.readFile(filename);
const md5Base64 = crypt.createHash("md5").update(data).digest('base64');
if (!s3Key) {
s3Key = filename;
}
/** Should you want to get the MD5 in hex format: */
// const md5Hex = Buffer.from(md5Base64, 'base64').toString('hex');
return new Promise((res, rej) => {
const s3 = new aws.S3();
s3.putObject({
Bucket: bucket,
Key: s3Key,
Body: data,
ContentMD5: md5Base64,
}, (err, resp) => err ? rej(err) : res(resp));
})
}
uploadFileToS3WithMd5Hash('your-own-bucket', 'file.txt')
.then(console.log)
.catch(console.error);
So by checking the ETag for an object on S3, you would get the hex-string of the files MD5 hash.
In some cases (see this post by Dennis), MD5 checksum is computed automatically upon upload.

Resources