How to run binary from a lambda properly? - node.js

I'm trying to make sox audio binary run from the lambda, I followed this guide: http://marcelog.github.io/articles/static_sox_transcoding_lambda_mp3.html
So I created sox binary using docker with last Amazon Linux version and deployed lambda as zip package with structure:
sox
index.js
Lambda code:
exports.handler = async (event) => {
initializeContext();
try {
const object = await getFile("test.mp3");
const fs = require('fs');
fs.writeFileSync("/tmp/test.mp3", object.Body);
let result = child_process.execFileSync('sox', ['/tmp/test.mp3', '/tmp/testOutput.mp3', ... <filter params here>], {
encoding: 'ascii'
// shell: true
});
const file = fs.readFileSync("/tmp/testOutput.mp3");
await putFile("testOutput.mp3", file);
}
catch(err) {
try {
await log("error", err);
}
catch(err) {}
}
};
let getFile = async function(fileName) {
const params = {
Bucket: bucket,
Key: fileName
};
return await s3.getObject(params).promise();
};
let putFile = async function(fileName, body) {
const params = {
Bucket: bucket,
Key: fileName,
Body: body
};
await s3.putObject(params).promise();
};
So, I need to get file from S3, process it and return result. S3 loading is tested to work. However, I get "EACCES" error on trying to start "sox" process.
What could I miss? Can it originate from the fact that I used the last Amazon Linux image but Lambda use more old version?

I was struggling with the same for processing audio files. The npm package came handy.
https://github.com/stojanovic/lambda-audio
If your command is,
sox input.mp3 -c 1 output.wav
your node code will be like the following,
const lambdaAudio = require('lambda-audio')
lambdaAudio.sox('./input.mp3 -c 1 /tmp/output.wav')
.then(response => {
// Do something when the file was converted
})
.catch(errorResponse => {
console.log('Error from the sox command:', errorResponse)
})
and leave all the complex issues to the npm package.
Hope it helps.

Finally I've found that to ensure file permissions I have to create zip package inside Amazon Linux docker image. So you need to
npm i lambda-audio
there, then zip node_modules along with your index.js.
My lambda package zip root folder is:
node_modules // npm i lambda-audio result
index.js
Working solution for Node 8.10 Runtime with memory consumption optimizations:
process.env["PATH"] = process.env["PATH"] + ":" + process.env["LAMBDA_TASK_ROOT"];
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
const lambdaAudio = require("lambda-audio");
let bucket;
exports.handler = async (event) => {
try {
bucket = event.bucket;
const { inputFileName, outputFileName } = event;
const fs = require("fs");
const inputStream = fs.createWriteStream("/tmp/input.mp3");
await getFileToStream(inputFileName, inputStream);
await lambdaAudio.sox("/tmp/input.mp3 /tmp/output.mp3 <YOUR FILTERS HERE>);
fs.unlinkSync("/tmp/input.mp3"); // removing unused input file
const outputStream = fs.createReadStream("/tmp/output.mp3");
await uploadFileFromStream(outputFileName, outputStream);
}
catch(err) {
// Logging
}
};
let getFileToStream = async function(fileName, writeStream) {
const params = {
Bucket: bucket,
Key: fileName
};
const readStream = s3.getObject(params).createReadStream();
const end = new Promise((resolve, reject) => {
writeStream.on('close', () => resolve());
writeStream.on('error', (err) => reject(err));
readStream.on('error', (err) => reject(err));
});
readStream.pipe(writeStream);
return end;
};
let uploadFileFromStream = async function(fileName, readStream) {
const params = {
Bucket: bucket,
Key: fileName,
Body: readStream
};
return await s3.upload(params).promise();
};

Related

How to read JSON from S3 by AWS Lambda Node.js 18.x runtime?

#TBA gave the solution.
The root cause is not by runtime. It came from SDK v3.
Point: Do not update the code with mixed things (like both of runtime & SDK version together 🥲)
Thanks again, TBA.
I was using Node.js 14.x version runtime Lambda to read some json file from S3.
Brief code is below
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
exports.handler = (event) => {
const { bucketName, objKey } = event
const params = {
Bucket: bucketName,
Key: objKey
};
return new Promise((resolve) => {
s3.getObject(params, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const contents = JSON.parse(data.Body)
resolve(contents);
}
});
})
};
and it returned the json data as I expected.
And today I tried to create a new lambda with runtime Node.js 18.x but it returned null or some errors...
Q) Could you give me some advice to solve this 🥲 ?
+) I used same json file for each lambda
+) Not sure why, but in my case, data.Body.toString() didn't work (I saw some answers in stackoverflow provide that and tried but no lucks)
Thanks in advance!
Case A (returns null)
import { S3Client, GetObjectCommand } from "#aws-sdk/client-s3";
const s3Client = new S3Client({ region: "ap-northeast-2" });
export const handler = (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
return new Promise((resolve) => {
s3Client.send(getObjCommand, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const list = JSON.parse(data.Body)
resolve(list);
}
});
})
};
Case B (returns "Unexpected token o in JSON at position 1")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
const response = await s3Client.send(getObjCommand)
console.log("JSON.parse(response.Body)", JSON.parse(response.Body))
};
Case C (returns "TypeError: Converting circular structure to JSON")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
try {
const response = await s3Client.send(getObjCommand)
return JSON.stringify(response.Body)
} catch(err) {
console.log("error", err)
return err
}
};

Node.js async calls do not run in sequence

I am completely new to node js .
I am trying to code below steps:
Download a file from AWS S3 folder
Then upload it to some other AWS s3 folder.
So I have searched online and created similar code in node js .
The below code is for the same .
What I see here is the downloadFile and uploadFile functions run in parallel and uploadFile runs first, it seems.
How to run them in sequence?
const aws = require('aws-sdk');
var s3 = new aws.S3();
var fs = require('fs');
// TODO implement
var params = { Bucket: "buckets3", Key: "input_pdf_img/Gas_bill_sample.pdf" };
const filename = 'Gas_bill_sample.pdf';
const bucketName = "translation-bucket-qa-v1";
const key = "input_pdf_img/Gas_bill_sample.pdf";
const key2 = "output_pdf2docx_img/"+filename;
//console.log(filename);
const tmp_filename = "/tmp/Gas_bill_sample.pdf";
console.log(filename);
const downloadFile = (tmp_filename, bucketName, key) => {
const params2 = {
Bucket: bucketName,
Key: key
};
s3.getObject(params, (err, data) => {
if (err) console.error(err);
fs.writeFileSync(tmp_filename, data.Body.toString());
//console.log(`${filePath} has been created!`);
});
};
//downloadFile(tmp_filename, bucketName, key);
//console.log('download done');
//await sleep(1000);
//upload
const uploadFile = (tmp_filename) => {
// Read content from the file
const fileContent = fs.readFileSync(tmp_filename);
// Setting up S3 upload parameters
const params2 = {
Bucket: bucketName,
Key: key2, // File name you want to save as in S3
Body: fileContent
};
// Uploading files to the bucket
s3.upload(params2, function(err, data) {
if (err) {
throw err;
}
console.log(`File uploaded successfully. ${data.Location}`);
});
};
downloadFile(tmp_filename, bucketName, key);
console.log('download done');
//setTimeout(() => {console.log("Let the download finish")}, 6000);
uploadFile(tmp_filename);
//setTimeout(() => {console.log("Let the download finish")}, 6000);const aws = require('aws-sdk');
var s3 = new aws.S3();
var fs = require('fs');
// TODO implement
var params = { Bucket: "buckets3", Key: "input_pdf_img/Gas_bill_sample.pdf" };
const filename = 'Gas_bill_sample.pdf';
const bucketName = "translation-bucket-qa-v1";
const key = "input_pdf_img/Gas_bill_sample.pdf";
const key2 = "output_pdf2docx_img/"+filename;
//console.log(filename);
const tmp_filename = "/tmp/Gas_bill_sample.pdf";
console.log(filename);
const downloadFile = (tmp_filename, bucketName, key) => {
const params2 = {
Bucket: bucketName,
Key: key
};
s3.getObject(params, (err, data) => {
if (err) console.error(err);
fs.writeFileSync(tmp_filename, data.Body.toString());
//console.log(`${filePath} has been created!`);
});
};
//downloadFile(tmp_filename, bucketName, key);
//console.log('download done');
//await sleep(1000);
//upload
const uploadFile = (tmp_filename) => {
// Read content from the file
const fileContent = fs.readFileSync(tmp_filename);
// Setting up S3 upload parameters
const params2 = {
Bucket: bucketName,
Key: key2, // File name you want to save as in S3
Body: fileContent
};
// Uploading files to the bucket
s3.upload(params2, function(err, data) {
if (err) {
throw err;
}
console.log(`File uploaded successfully. ${data.Location}`);
});
};
downloadFile(tmp_filename, bucketName, key);
console.log('download done');
//setTimeout(() => {console.log("Let the download finish")}, 6000);
uploadFile(tmp_filename);
//setTimeout(() => {console.log("Let the download finish")}, 6000);
Tried time out and other ways but no help.
Since the const runs in parallel error is "No such file or directory" as the download file runs after uploadFile.

AWS Lambda unzip from S3 to S3

I'm trying to write an Lambda function that unzips zip files in one S3 directory and extract into another. I had this working in Python but nobody else in my group likes Python so I'm converting it to Node.js which I'm not very good at.
I'm trying to use the unzipper package and I'm able to get a list of files in the zip file using unzipper.Open.S3, but I can't figure out how to stream the files in the zip file into S3.
The meat of the code looks like
const directory = await unzipper.Open.s3(s3,{Bucket: bucket, Key: zip_file});
directory.files.forEach(file => {
console.log("file name = " + file.path + ", type = " + file.type)
const key = dir[0] + "/output/" + file.path;
const params = { Bucket: bucket, Key: key };
const { writeStream, promise } = uploadStream(params)
file.stream().pipe(writeStream);
promise.then(() => {
console.log('upload completed successfully');
}).catch((err) => {
console.log('upload failed.', err.message);
});
});
const uploadStream = ({ Bucket, Key }) => {
const pass = new stream.PassThrough();
return {
writeStream: pass,
promise: s3.upload({ Bucket, Key, Body: pass }).promise()
};
}
I get the console.log for each file, but neither of the logs in promise.then and .catch comes out and no new files appear in S3.
Never mind, I found this code that works better:
exports.handler = async (event) => {
const params = {
Key: zip_directory + "/" + zip_file,
Bucket: input_bucket
};
const zip = s3
.getObject(params)
.createReadStream()
.pipe(unzipper.Parse({ forceStream: true }));
const promises = [];
let num = 0;
for await (const e of zip) {
const entry = e;
const fileName = entry.path;
const type = entry.type;
if (type === 'File') {
const uploadParams = {
Bucket: output_bucket,
Key: output_directory + fileName,
Body: entry,
};
promises.push(s3.upload(uploadParams).promise());
num++;
} else {
entry.autodrain();
}
}
await Promise.all(promises);
};

application/octet-stream issue while using google moderate images trigger (blur image)

I,m using moderate images solution trigger from google.
I taked this solution from here.
I ask some to upgrade for me this solution & here is code:
'use strict'
const gm = require('gm').subClass({imageMagick: true})
const functions = require('firebase-functions')
const admin = require('firebase-admin')
admin.initializeApp()
const Vision = require('#google-cloud/vision')
const vision = new Vision.ImageAnnotatorClient()
const spawn = require('child-process-promise').spawn
const path = require('path')
const fs = require('fs')
const { Storage } = require('#google-cloud/storage')
const gcs = new Storage({
projectId: xxxxxxxxxxx,
})
exports.blurOffensiveImages = functions.storage
.object()
.onFinalize(async (object) => {
const file = gcs.bucket(object.bucket).file(object.name)
const filePath = `gs://${object.bucket}/${object.name}`
console.log(`Analyzing ${file.name}.`)
try {
const [result] = await vision.safeSearchDetection(filePath)
const detections = result.safeSearchAnnotation || {}
if (
detections.adult === 'VERY_LIKELY' ||
detections.violence === 'VERY_LIKELY'
) {
console.log(`Detected ${file.name} as inappropriate.`)
await blurImage(file, object.bucket, object.metadata)
console.log('Deleted local file', file)
return null
} else {
console.log(`Detected ${file.name} as OK.`)
}
} catch (err) {
console.error(`Failed to analyze ${file.name}.`, err)
throw err
}
})
async function blurImage(file, bucketName, metadata) {
const tempLocalPath = `/tmp/${path.parse(file.name).base}`
const bucket = gcs.bucket(bucketName)
await file.download({ destination: tempLocalPath })
console.log('The file has been downloaded to', tempLocalPath)
// Blur the image using ImageMagick.
await new Promise((resolve, reject) => {
gm(tempLocalPath)
.blur(0, 20)
.write(tempLocalPath, (err, stdout) => {
if (err) {
console.error('Failed to blur image.', err);
reject(err);
} else {
console.log(`Blurred image: ${file.name}`);
resolve(stdout);
}
});
});
console.log('Blurred image created at', tempLocalPath)
await bucket.upload(tempLocalPath, {
destination: file.name,
metadata: { metadata: metadata },
})
console.log('Blurred image uploaded to Storage at', file)
return fs.unlink(tempLocalPath, (e) => { if (e) {console.log(e)}})
}
End it's worked perfect, with one bad issue.
Sometimes when user sending list of photos i have "application/octet-stream" file type, but it should be "image/jpg" all media files at my project should be image/jpg.
one user's publication with error in image data type
It's looks like this trigger stuck when it executing.
I made delay in uploading images in my project, but it's doesn't helps me.
I tested - when i delete this trigger - all uploading photos is well & no issues at all.
Help me fix it.
P.S. want to say also, after uploading - image should have all data like original. (Destination, name etc.)

AWS S3 Loading many files

I need to upload a lot of files (about 65.000) splitted in subdirectory.
I tried to iterate and load every single file like this:
const fs = require("fs");
const path = require("path");
const async = require("async");
const AWS = require("aws-sdk");
const readdir = require("recursive-readdir");
const slash = require("slash");
const { BUCKET, KEY, SECRET } = process.env;
const rootFolder = path.resolve(__dirname, "./");
const uploadFolder = "./test_files/15";
const s3 = new AWS.S3({
signatureVersion: "v4",
accessKeyId: KEY,
secretAccessKey: SECRET,
});
function getFiles(dirPath) {
return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}
async function deploy(upload) {
if (!BUCKET || !KEY || !SECRET) {
throw new Error("you must provide env. variables: [BUCKET, KEY, SECRET]");
}
const filesToUpload = await getFiles(path.resolve(__dirname, upload));
return new Promise((resolve, reject) => {
async.eachOfLimit(
filesToUpload,
10,
async.asyncify(async (file) => {
const Key = file.replace(rootFolder + path.sep, "");
console.log(`uploading: [${slash(Key)}]`);
var options = { partSize: 5 * 1024 * 1024, queueSize: 4 };
return new Promise((res, rej) => {
s3.upload(
{
Key: slash(Key),
Bucket: BUCKET,
Body: fs.readFileSync(file),
},
(err) => {
if (err) {
return rej(new Error(err));
}
res({ result: true });
}
);
});
}),
(err) => {
if (err) {
return reject(new Error(err));
}
resolve({ result: true });
}
);
});
}
deploy(uploadFolder)
.then(() => {
console.log("task complete");
process.exit(0);
})
.catch((err) => {
console.error(err);
process.exit(1);
});
but after a considerable number of uploads i have this:
Error: Error: NetworkingError: connect ETIMEDOUT IP_S3_AWS
I need to upload this set of files from ec2 instance (because its a result of a image processing). I have this behavior from my pc, i don't know if from ec2 have the same problem.
I have considered the way of zip all and upload but i need to keep the original directory structure.
I accept also new way to resolve the problem.
Sorry for my bad english.
It would probably be much simpler to use the AWS CLI aws s3 sync command instead of building this yourself.

Resources