Amazon Polly Character Limit Increase - node.js

According to a recent AWS announcement, the new character limit for Polly is 3,000 -- double the previous limit of 1,500.
Previously, my application had a character limit of 1,450 to account for characters I'm adding programatically to the user's input. In view of the above announcement, I thought increasing my character limit to 2,950 would be safe. However, my audio files are empty (0 kb) anytime I surpass about 2,450 characters. I'm baffled by this and of course I would like to use the extra 500 characters if possible.
Here is my code:
var AWS = require('aws-sdk'),
fs = require('fs');
const Fs = require('fs')
const path = require('path');
AWS.config.loadFromPath(path.join(__dirname, 'config.json'));
var mysql = require('mysql');
var localfile = path.join(__dirname, 'myverse.mp3');
var connection = mysql.createConnection({
connectionLimit : 10,
host : '...',
user : '...',
password : '...',
database: '...',
});
[some irrelevant code omitted here.]
connection.query('SELECT versetext, book, mp3, id, reference, userid FROM publicverses where mp3 = "empty" limit 1',
function (error, results, fields) {
console.log(error);
var scripture = results[0].versetext + ".";
var userid = results[0].userid;
var book = results[0].book;
var reference = results[0].reference.replace(":", " verse ").replace(",", " and ");
if (reference.includes("-")){
var reference = reference.replace("verse", "verses");
}
console.log(scripture + " " + book.replace("1", "first").replace("2", "second").replace("3", "third") + " " + reference);
var myverse = "<speak><prosody volume='x-loud'><break time='1s'/>" + scripture + " " + book.replace("1", "first").replace("2", "second").replace("3", "third") + " " + reference + "<break time='1s'/></prosody></speak>";
var link = "https://s3.amazonaws.com/publicverses/" + book.replace(/ /g, "")+reference.replace(/ /g, "")+"_user"+userid+"_"+randomnumber+".mp3";
writeit();
fs.createWriteStream(localfile);
var myvalue = fs.createReadStream(localfile);
setTimeout(uploadit, 2000)
function linkit(){
'use strict';
connection.query('update publicverses set mp3 = ? where mp3 = "empty" limit 1', [link],
function (error, results, fields) {
console.log(error)
})
}
function writeit() {
'use strict';
const Polly = new AWS.Polly({
signatureVersion: 'v4',
region: 'us-east-1'
})
let params = {
'Text': myverse.replace(" Job ", " Jobe "),
'LexiconNames': [ 'kjv' ],
'TextType': 'ssml',
'OutputFormat': 'mp3',
'VoiceId': 'Matthew'
}
Polly.synthesizeSpeech(params, (err, data) => {
if (err) {
console.log(err.code)
} else if (data) {
if (data.AudioStream instanceof Buffer) {
Fs.writeFile("./myverse.mp3", data.AudioStream, function(err) {
if (err) {
return console.log(err)
}
console.log("Verse recorded successfully!")
})
}
}
})
}
function uploadit () {
'use strict';
console.log('Preparing to upload the verse.')
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var uploadParams = {Bucket: 'publicverses', key: '/test.mp3', Body: myvalue, ACL: 'public-read'};
var file = 'MyVerse.mp3';
var fs = require('fs');
var fileStream = fs.createReadStream(file);
fileStream.on('error', function(err) {
console.log('File Error', err);
});
uploadParams.Body = fileStream;
var path = require('path');
uploadParams.Key = book.replace(/ /g, "")+reference.replace(/ /g, "")+"_user"+userid+"_"+randomnumber+".mp3";
// call S3 to retrieve upload file to specified bucket
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
linkit();
addanother();
}
});
}
});
}

Related

Unsupported body payload object

I want to upload a large file using nodejs!
I do as described in the AWS documentation
like this:
var fs = require('fs');
var zlib = require('zlib');
var body = fs.createReadStream('bigfile').pipe(zlib.createGzip());
var s3obj = new AWS.S3({params: {Bucket: 'myBucket', Key: 'myKey'}});
s3obj.upload({Body: body}).
on('httpUploadProgress', function(evt) {
console.log('Progress:', evt.loaded, '/', evt.total);
}).
send(function(err, data) { console.log(err, data) });
Or as shown in the aws-sdk example
var AWS = require('aws-sdk')
var uploadParams = {Bucket: process.argv[2], Key: '', Body: ''}
var file = process.argv[3]
var fs = require('fs')
var fileStream = fs.createReadStream(file)
fileStream.on('error', function(err) {
console.log('File Error', err)
})
uploadParams.Body = fileStream
var path = require('path')
uploadParams.Key = path.basename(file)
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err)
} if (data) {
console.log("Upload Success", data.Location)
}
})
But anyway I get the error
Unsupported body payload object
I searched the entire Internet and did not find an answer, what could be the problem ((does anyone have any ideas why it does not work ??

An issue with reading a gzipped file (.gz) with IBM Cloud Function (Action: Node.js 12)

I can read the data.json.gz file on my local machine with the code mentioned below (node --version: v14.15.0). But when I try to use the same in IBM Cloud with an Action (Node.js 12) to read the same file from an Object Store Bucket, I get the below error
["stderr: ERROR: undefined - input_buf.on is not a function"].
I am very new to NodeJS; Can someone help to identify the issue here?
I do appreciate your support.
Code that works on Local machine (Windows 10):
function decompressFile(filename) {
var fs = require("fs"),
zlib = require("zlib"),
var input = fs.createReadStream(filename);
var data = [];
input.on('data', function(chunk){
data.push(chunk);
}).on('end', function(){
var buf = Buffer.concat(data);
zlib.gunzip(buf, function(err, buffer) {
if (!err) {
var dataString = buffer.toString()
console.log(dataString, dataString+'\n');
var dataJSON = JSON.parse(dataString.toString('utf8'));
}else{
console.log(err);
}
});
});
}
decompressFile("data.json.gz");
Code that does not work on IBM Cloud Function and Object Store Bucket:
// Get file contents of gzipped item
async function getGzippedItem(cosClient, bucketName, itemName) { // <<< async keyword added
const fs = require('fs');
const zlib = require('zlib');
return await cosClient.getObject({ // <<< turned into assignment with await
Bucket: bucketName,
Key: itemName
}).promise()
.then((instream=fs.createReadStream(itemName)) => {
if (instream != null) {
var data = [];
var input_buf = instream.Body
input_buf.on('data', function(chunk){
data.push(chunk);
}).on('end', function() {
var buf = Buffer.concat(data);
zlib.gunzip(buf, function (err, buffer) {
if (!err) {
var dataString = buffer.toString()
var dataJSON = JSON.parse(dataString.toString('utf8'));
} else {
console.log(err);
}
});
});
return buf
}
})
.catch((e) => {
console.error(`ERROR: ${e.code} - ${e.message}\n`);
});
};
async function main(params) {
bucketName = 'bucket'
itemName = 'data.json.gz'
var ibm = require('ibm-cos-sdk');
var util = require('util');
var fs = require('fs');
// Initializing configuration
const myCOS = require('ibm-cos-sdk');
var config = {
endpoint: 'endpoint',
apiKeyId: 'apiKeyId',
ibmAuthEndpoint: 'ibmAuthEndpoint',
serviceInstanceId: 'serviceInstanceId',
};
var cosClient = new myCOS.S3(config);
gzippedItemContent = await getGzippedItem(cosClient, bucketName, itemName) // <<< await keyword added
console.log(">>>>>>>>>>>>>>>: ", typeof gzippedItemContent, gzippedItemContent )
}
The message is telling you, that your input_buf object is not of the type you expect it to be. The result of your createReadStream() call is just a stream:
[Stream] the readable stream object that can be piped or read from (by registering 'data' event listeners).
So you should be able to access the value directly
(not declaring var input_buf = instream.Body):
var getObjectStream = cosClient.getObject({
Bucket: 'BUCKET',
Key: 'KEY'
}).createReadStream();
getObjectStream.on('data', function(c) {
data += c.toString();
});
Have a look at the test section of the ibm-cos-sdk-js project, it is describing how to use the API.

When piping a movie from S3 the file isn't seekable

We have an application that sometimes serves a MP4 file which is stored on S3, since only specific people should be able to see each file, the file is private and inside our service we will only show it to authorised people.
The movie starts playing correctly (in the browser's built in video tag), however if we seek to a point in the movie that hasn't been buffered yet, the player will buffer for a bit, then stop playing. Afterwards clicking Play will cause the movie to start from the beginning. If I make the file public and access it directly form S3 seeking to an unbuffered point works correctly.
I created a standalone node program that reproduces this problem. I tried to make the response headers identical to those that S3 sends but the problem remains.
const http = require("http");
const AWS = require("aws-sdk");
const proxy = require("proxy-agent");
Object.assign(process.env, {
AWS_ACCESS_KEY_ID: "REDACTED",
AWS_SECRET_ACCESS_KEY: "REDACTED",
AWS_EC2_REGION: "us-west-2"
});
const s3 = new AWS.S3({
s3ForcePathStyle: 'true',
signatureVersion: 'v4',
httpOptions: { timeout: 300000 },
endpoint: 'https://s3.us-west-2.amazonaws.com',
region: 'us-west-2'
});
const objectParams = {
Bucket: 'REDACTED',
Key: 'some-movie.mp4'
};
let request = 0;
function serve(req, res) {
console.log("Handling request", ++request, req.url);
s3.headObject(objectParams, (err, data) => {
if (err)
throw err;
const { ContentType: type, ContentLength: length} = data;
console.log("Got", data);
if (data.ETag)
res.setHeader("ETag", data.ETag);
const range = req.headers.range;
if (range) {
console.log("Serving range", range);
const parts = range.replace("bytes=", "").split("-");
const start = parseInt(parts[0], 10);
const end = parts[1]? parseInt(parts[1], 10): length -1;
let headers = {
"Content-Range": `bytes ${start}-${end}/${length}`,
"Accept-Ranges": "bytes",
"Content-Type": type,
"Content-Length": end - start + 1,
"Last-Modified": data.LastModified,
};
if (req.headers["if-range"]) {
console.log("Setting if-range to", req.headers["if-range"]);
headers["If-Range"] = req.headers["if-range"];
}
res.writeHead(206, headers);
}
else {
console.log("Whole file");
res.setHeader("Accept-Ranges", "bytes");
res.setHeader("Content-Type", type);
res.setHeader("Content-Length", length);
res.setHeader("Last-Modified", data.LastModified);
}
const stream = s3.getObject(objectParams).createReadStream();
stream.on("error", err => console.error("stream error:", err));
stream.pipe(res).on("finish", data => {
console.log("Finished streaming");
});
});
}
http.createServer(serve).listen(1234);
What am I missing?
Here is the code with seekbar working just fine. You can test by integrating the below code and just open the api url in the browser.
import mime from 'mime-types';
const key = 'S3_BUCKET KEY';
const params = { Key: key, Bucket: AWS_BUCKET };
//s3 here refers to AWS.S3 object.
s3.headObject(params, function (err, data) {
if (err) {
console.error(err);
return next(err);
}
if (req.headers.range) {
const range = req.headers.range;
const bytes = range.replace(/bytes=/, '').split('-');
const start = parseInt(bytes[0], 10);
const total = data.ContentLength;
const end = bytes[1] ? parseInt(bytes[1], 10) : total - 1;
const chunkSize = end - start + 1;
res.set('Content-Range', 'bytes ' + start + '-' + end + '/' + total);
res.set('Accept-Ranges', 'bytes');
res.set('Content-Length', chunkSize.toString());
params['Range'] = range;
console.log('video buffering - range, total, start, end ,params', range, total, start, end, params);
} else {
res.set('Content-Length', data.ContentLength.toString());
console.log('video buffering - ,params', params);
}
res.status(206);
res.set('Content-Type', mime.lookup(key));
res.set('Last-Modified', data.LastModified.toString());
res.set('ETag', data.ETag);
const stream = s3.getObject(params).createReadStream();
stream.on('error', function error(err) {
return next(err);
});
stream.on('end', () => {
console.log('Served by Amazon S3: ' + key);
});
stream.pipe(res);
});

s3 bucket upload not working

var async = require('async');
var AWS = require('aws-sdk');
var util = require('util');
var im = require('imagemagick');
var fs = require('fs');
// constants
var MAX_WIDTH = 100;
var MAX_HEIGHT = 100;
var s3 = require('s3');
var client = s3.createClient({
maxAsyncS3: 20, // this is the default
s3RetryCount: 3, // this is the default
s3RetryDelay: 1000, // this is the default
multipartUploadThreshold: 20971520, // this is the default (20 MB)
multipartUploadSize: 15728640, // this is the default (15 MB)
s3Options: {
accessKeyId: "xx",
secretAccessKey: "xx",
},
});
exports.handler = function(event, context, callback) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
var srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
var srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
var dstBucket = srcBucket + "resized";
var dstKey = "resized-" + srcKey;
// Sanity check: validate that source and destination are different buckets.
if (srcBucket == dstBucket) {
callback("Source and destination buckets are the same.");
return;
}
// Infer the image type.
var typeMatch = srcKey.match(/\.([^.]*)$/);
if (!typeMatch) {
callback("Could not determine the image type.");
return;
}
var imageType = typeMatch[1];
if (imageType != "jpg" && imageType != "png") {
callback('Unsupported image type: ${imageType}');
return;
}
// Download the image from S3, transform, and upload to a different S3 bucket.
async.waterfall([
function download(next) {
var params = {
localFile: "/tmp/"+srcKey,
s3Params: {
Bucket: srcBucket,
Key: srcKey,
},
};
var downloader = client.downloadFile(params);
downloader.on('error', function(err) {
console.error("unable to download:", err.stack);
});
downloader.on('progress', function() {
console.log("progress", downloader.progressAmount, downloader.progressTotal);
});
downloader.on('end', function() {
console.log("done downloading");
});
//upload a file
var uploadparams = {
localFile: "/tmp/"+srcKey,
s3Params: {
Bucket: dstBucket,
Key: dstKey,
},
};
var uploader = client.uploadFile(uploadparams);
uploader.on('error', function(err) {
console.error("unable to upload:", err.stack);
});
uploader.on('progress', function() {
console.log("progress", uploader.progressMd5Amount,
uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function() {
console.log("done uploading");
});
}
], function (err) {
if (err) {
console.error(
'Unable to resize ' + srcBucket + '/' + srcKey +
' and upload to ' + destBucket + '/' + destKey +
' due to an error: ' + err
);
} else {
console.log(
'Successfully resized ' + srcBucket + '/' + srcKey +
' and uploaded to ' + destBucket + '/' + destKey
);
}
}
);
};
I am trying to download a file from s3 bucket and upload it to a different s3 bucket. I need to do some to other conversions before uploading. So, just want to try, downloading and uploading first. On execution , it says done downloading. But I am unable to upload the file. Not sure what the problem is. I have followed the advice from https://github.com/andrewrk/node-s3-client/blob/master/README.md
Uploading is not at all working. Can you please help. Thx.
You are trying to upload at the same time you are downloading...
You need to call upload inside downloader.on('end', method

AWS S3 performance using Node.js SDK

I am trying to pushing the upload(and download) performance of my program to its limits.
I am getting about 1000Mbps when uploading 256MB files using aws's command line interface.
But I get stuck at about 600Mbps upload with the following program
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv[1] + " <config> <region> <bucket> <key> <file>")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var multipartMap = { Parts: [] }
var uploadStartTime // = new Date()
var partSize = 1024 * 1024 * 8 // at least 5MB, specified by amazon
var partNum
var multipartParams = {
Bucket: bucketName,
Key: key,
ContentType: "binary",
StorageClass: "REDUCED_REDUNDANCY",
}
var part = 0
var maxRetry = 3
var fs = require ('fs')
var aws = require ('aws-sdk')
function upload (bucket, multipart, partParams, trial) {
var trial = trial || 1;
bucket.uploadPart (partParams, function (err, data) {
if (err) {
console.log ("failed: ", err)
if (trial < maxRetry) {
console.log ("retrying part: ", partParams.PartNumber)
upload (bucket, multipart, partParams, trial + 1)
} else {
console.log ("failed: ", err, " unable to upload part: ", partParams.PartNumber)
}
return;
}
multipartMap.Parts[this.request.params.PartNumber - 1] = {
ETag: data.ETag,
PartNumber: Number (this.request.params.PartNumber)
}
if (--partNum > 0) return;
var doneParams = {
Bucket: bucketName,
Key: key,
MultipartUpload: multipartMap,
UploadId: multipart.UploadId
}
console.log ("success")
bucket.completeMultipartUpload (doneParams, function (err, data){
if (err) {
console.log("An error occurred while completing the multipart upload");
console.log(err);
} else {
var delta = (new Date() - uploadStartTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Final upload data:', data);
}
})
})
}
var kickoffTime = new Date ()
aws.config.loadFromPath (config)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
console.log ("filename: ", file)
buffer = fs.readFileSync (file)
partNum = Math.ceil (buffer.length / partSize) // number of parts
var totalPart = partNum
uploadStartTime = new Date ()
bucket.createMultipartUpload (multipartParams, function (err, multipart) {
if (err) {
console.log ("cannot create multipart upload: ", err)
return -1
}
for (var i = 0; i < buffer.length; i += partSize) {
++part
var end = Math.min (i + partSize, buffer.length)
var body = buffer.slice (i, end)
var partParams = {
Body: body,
Bucket: bucketName,
Key: key,
PartNumber: String (part),
UploadId: multipart.UploadId,
ContentLength: end - i
}
upload (bucket, multipart, partParams);
}
})
var kickoffTimeDelta = (new Date () - kickoffTime) / 1000
console.log ("Kickoff time: ", kickoffTimeDelta)
This program will not work for empty files, but please ignore this case. The above program is coded with reference to this.
As for downloading, the speed also stuck at about 600Mbps, here is the code
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv1 + " ")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var fs = require ('fs')
var aws = require ('aws-sdk')
fs.readFile (config, "utf8", function (err, configFile) {
if (err) {
console.log ("Config file cannot be read: ", err)
return -1
}
aws.config = JSON.parse (configFile)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
bucket.createBucket (function () {
var data = {Key: key}
bucket.getObject (data, function (err, fileData) {
if (err) {
console.log ("Error downloading data: ", err)
} else {
fs.writeFile (file, fileData.Body, function (err) {
if (err) {
console.log ("Error writing data: ", err)
} else {
console.log ("Successfully downloaded!")
}
})
}
})
})
})
I am new to node.js and aws sdk, is there anything missing to achieve better throughtput?
Thanks
Hmm...had a clarifying question but don't have the reputation to post as such.
How many requests per second are you seeing on both ends? If you're regularly hitting S3 with more than 100 requests per second, you'll get better performance by randomizing the start of your key name.
See this article for an explanation and some suggestions:
http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html
Basically if you have a bunch of files with a key (subdirectory) that starts with the same characters, you can overwhelm the index partition...so for high-volume read/write activities, random keynames speed up the performance.

Resources