Get download progress in Node.js with request - node.js

I'm creating an updater that downloads application files using the Node module request. How can I use chunk.length to estimate the remaining file size? Here's part of my code:
var file_url = 'http://foo.com/bar.zip';
var out = fs.createWriteStream('baz.zip');
var req = request({
method: 'GET',
uri: file_url
});
req.pipe(out);
req.on('data', function (chunk) {
console.log(chunk.length);
});
req.on('end', function() {
//Do something
});

This should get you the total you want:
req.on( 'response', function ( data ) {
console.log( data.headers[ 'content-length' ] );
} );
I get a content length of 9404541

function download(url, callback, encoding){
var request = http.get(url, function(response) {
if (encoding){
response.setEncoding(encoding);
}
var len = parseInt(response.headers['content-length'], 10);
var body = "";
var cur = 0;
var obj = document.getElementById('js-progress');
var total = len / 1048576; //1048576 - bytes in 1Megabyte
response.on("data", function(chunk) {
body += chunk;
cur += chunk.length;
obj.innerHTML = "Downloading " + (100.0 * cur / len).toFixed(2) + "% " + (cur / 1048576).toFixed(2) + " mb\r" + ".<br/> Total size: " + total.toFixed(2) + " mb";
});
response.on("end", function() {
callback(body);
obj.innerHTML = "Downloading complete";
});
request.on("error", function(e){
console.log("Error: " + e.message);
});
});
};

If you are using "request" module and want to display downloading percentage without using any extra module, you can use the following code:
function getInstallerFile (installerfileURL,installerfilename) {
// Variable to save downloading progress
var received_bytes = 0;
var total_bytes = 0;
var outStream = fs.createWriteStream(installerfilename);
request
.get(installerfileURL)
.on('error', function(err) {
console.log(err);
})
.on('response', function(data) {
total_bytes = parseInt(data.headers['content-length']);
})
.on('data', function(chunk) {
received_bytes += chunk.length;
showDownloadingProgress(received_bytes, total_bytes);
})
.pipe(outStream);
};
function showDownloadingProgress(received, total) {
var platform = "win32"; // Form windows system use win32 for else leave it empty
var percentage = ((received * 100) / total).toFixed(2);
process.stdout.write((platform == 'win32') ? "\033[0G": "\r");
process.stdout.write(percentage + "% | " + received + " bytes downloaded out of " + total + " bytes.");
}
Usage :
getInstallerFile("http://example.com/bar.zip","bar.zip");

Using the cool node-request-progress module, you could do something like this in es2015:
import { createWriteStream } from 'fs'
import request from 'request'
import progress from 'request-progress'
progress(request('http://foo.com/bar.zip'))
.on('progress', state => {
console.log(state)
/*
{
percentage: 0.5, // Overall percentage (between 0 to 1)
speed: 554732, // The download speed in bytes/sec
size: {
total: 90044871, // The total payload size in bytes
transferred: 27610959 // The transferred payload size in bytes
},
time: {
elapsed: 36.235, // The total elapsed seconds since the start (3 decimals)
remaining: 81.403 // The remaining seconds to finish (3 decimals)
}
}
*/
})
.on('error', err => console.log(err))
.on('end', () => {})
.pipe(createWriteStream('bar.zip'))

In case someone wants to know the progress without the use of other library but only request, then you can use the following method :
function downloadFile(file_url , targetPath){
// Save variable to know progress
var received_bytes = 0;
var total_bytes = 0;
var req = request({
method: 'GET',
uri: file_url
});
var out = fs.createWriteStream(targetPath);
req.pipe(out);
req.on('response', function ( data ) {
// Change the total bytes value to get progress later.
total_bytes = parseInt(data.headers['content-length' ]);
});
req.on('data', function(chunk) {
// Update the received bytes
received_bytes += chunk.length;
showProgress(received_bytes, total_bytes);
});
req.on('end', function() {
alert("File succesfully downloaded");
});
}
function showProgress(received,total){
var percentage = (received * 100) / total;
console.log(percentage + "% | " + received + " bytes out of " + total + " bytes.");
// 50% | 50000 bytes received out of 100000 bytes.
}
downloadFile("https://static.pexels.com/photos/36487/above-adventure-aerial-air.jpg","c:/path/to/local-image.jpg");
The received_bytes variable saves the total of every sent chunk length and according to the total_bytes, the progress is retrieven.

I wrote a module that just does what you want: status-bar.
var bar = statusBar.create ({ total: res.headers["content-length"] })
.on ("render", function (stats){
websockets.send (stats);
})
req.pipe (bar);

Related

Lambda function taking >3 seconds to run + 5-10 secs warmup each time

I have a simple node.js function with 2 REST API calls and a socket connection output hosted in an AWS lambda. It takes 5-10 secs warmup time and >3+ secs execution time.
When the code is run locally it executes both requests, socket connection and completes in about ~1300ms. Why is AWS more then double the execution time? I have set-timeout to 120seconds and memory at 128mb (default).
I appreciate the code is not very tidy; I am working on cleaning it but needed something going for the time being.
The project simply gets info from ServiceM8 via API when called by a webhook subscription, then formats the info into ZPL strings and forwards them to a tcp server for printing via thermal printer.
My questions are:
Is it my code bottle necking?
Can it be optimized to run faster?
Do i simply need to employ a warming plugin for my function to allow hot starting?
My function:
'use strict';
//Require libraries
var request = require("request");
var net = require('net');
exports.handler = (event, context, callback) => {
if (event.eventName != 'webhook_subscription') {
callback(null, {});
}
//Global Variables
var strAssetUUID;
var strAssetURL;
var strFormUUID;
var strTestDate;
var strRetestDate;
var appliancePass = true;
var strAccessToken;
var strResponseUUID;
//Printer Access
const tcpUrl = 'example.com';
const tcpPort = 12345;
var client = new net.Socket();
//UUID of Appliance Test Form.
const strTestFormUUID = 'UUID_of_form';
//Begin function
/**
* Inspect the `eventArgs.entry` argument to get details of the change that caused the webhook
* to fire.
*/
strResponseUUID = event.eventArgs.entry[0].uuid;
strAccessToken = event.auth.accessToken;
console.log('Response UUID: ' + strResponseUUID);
console.log('Access Token: ' + strAccessToken);
//URL Options for FormResponse UUID query
const urlFormResponse = {
url: 'https://api.servicem8.com/api_1.0/formresponse.json?%24filter=uuid%20eq%20' + strResponseUUID,
headers: {
// Use the temporary Access Token that was issued for this event
'Authorization': 'Bearer ' + strAccessToken
}
};
//Query form Response UUID to get information required.
request.get(urlFormResponse, function(err, res, body) {
//Check response code from API query
if (res.statusCode != 200) {
// Unable to query form response records
callback(null, {err: "Unable to query form response records, received HTTP " + res.statusCode + "\n\n" + body});
return;
}
//If we do recieve a 200 status code, begin
var arrRecords = JSON.parse(body);
//Store the UUID of the form used for the form response.
strFormUUID = arrRecords[0].form_uuid;
console.log('Form UUID: ' + strFormUUID);
//Store the UUID of the asset the form response relates to.
strAssetUUID = arrRecords[0].asset_uuid;
console.log('Asset UUID: ' + strAssetUUID);
if (strFormUUID == strTestFormUUID){
//Get the edited date and parse it into a JSON date object.
var strEditDate = new Date(arrRecords[0].edit_date);
//Reassemble JSON date to dd-mm-yyyy.
strTestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
//Extract the response for retest period.
var strRetestAnswer = JSON.parse(arrRecords[0].field_data);
strRetestAnswer = strRetestAnswer[0].Response;
//Appropriate function based on retest response.
switch(strRetestAnswer) {
case '3 Months':
//Add x months to current test date object
strEditDate.setMonth(strEditDate.getMonth() + 3);
strRetestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
break;
case '6 Months':
strEditDate.setMonth(strEditDate.getMonth() + 6);
strRetestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
break;
case '12 Months':
strEditDate.setMonth(strEditDate.getMonth() + 12);
strRetestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
break;
case '2 Years':
strEditDate.setMonth(strEditDate.getMonth() + 24);
strRetestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
break;
case '5 Years':
strEditDate.setMonth(strEditDate.getMonth() + 60);
strRetestDate = strEditDate.getDate() + '/' + (strEditDate.getMonth() + 1) + '/' + strEditDate.getFullYear();
break;
default:
strRetestDate = "FAIL";
appliancePass = false;
}
console.log('Appliance Pass: ' + appliancePass);
console.log('Test Date: ' + strTestDate);
console.log('Retest Period: ' + strRetestAnswer);
console.log('Retest Date: ' + strRetestDate);
//URL Options for Asset UUID query
const urlAssetResponse = {
url: 'https://api.servicem8.com/api_1.0/asset/' + strAssetUUID + '.json',
headers: {
// Use the temporary Access Token that was issued for this event
'Authorization': 'Bearer ' + strAccessToken
}
};
//Query the api for the asset URL of the provided asset UUID.
request.get(urlAssetResponse, function(err, res, body) {
//Check response code from API query
if (res.statusCode != 200) {
// Unable to query asset records
callback(null, {err: "Unable to query asset records, received HTTP " + res.statusCode + "\n\n" + body});
return;
}
//If we do recieve a 200 status code, begin
var strAssetResponse = JSON.parse(body);
//Store the asset URL
strAssetURL = 'https://sm8.io/' + strAssetResponse.asset_code;
console.log('Asset URL: ' + strAssetURL);
//generate tag and send to printer
var strZPLPass = ('^XA....^XZ\n');
var strZPLFail = ('^XA....^XZ\n');
//Now that we have our ZPL generated from our dates and URLs
//Send the correct ZPL to the printer.
client.connect(tcpPort, tcpUrl, function() {
console.log('Connected');
//Send Appropriate ZPL
if (appliancePass) {
client.write(strZPLPass);
}else {
client.write(strZPLFail);
}
console.log('Tag Successfully Printed!');
//As the tcp server receiving the string does not return any communication
//there is no way to know when the data has been succesfully received in full.
//So we simply timeout the connection after 750ms which is generally long enough
//to ensure complete transmission.
setTimeout(function () {
console.log('Timeout, connection closing...');
client.destroy();
}, 750);
});
});
}
});
};
First of all, I would suggest you stop using the request module and switch to native. Everything can be done without a tons of lines these days. request is a module with 48 total dependencies; if you do the math, that's thousands of lines for a simple GET request.
You should always minimize the complexity of your dependencies. I use a Lambda to check the health of my sites, grabbing the whole request and checking the HTML on completely different servers. VPS is located in Frankfurt, AWS in Ireland. My ms/request is ranging between 100~150 ms.
Here's a simple promise request I'm using:
function request(obj, timeout) {
return new Promise(function(res, rej) {
if (typeof obj !== "object") {
rej("Argument must be a valid http request options object")
}
obj.timeout = timeout;
obj.rejectUnauthorized = false;
let request = http.get(obj, (response) => {
if (response.statusCode !== 200) {
rej("Connection error");
}
var body = '';
response.on('data', (chunk) => {
body += chunk;
});
response.on('end', () => {
res(body);
});
response.on('error', (error) => {
rej(error);
});
});
request.setTimeout(timeout);
request.on('error', (error) => {
rej(error);
})
request.on('timeout', () => {
request.abort();
rej("Timeout!")
})
});
}
Example
const reqOpts = {
hostname: 'www.example.com',
port: 443,
path: '/hello',
method: 'GET',
headers: {
handshake: "eXTNxFMxQL4pRrj6JfzQycn3obHL",
remoteIpAddress: event.sourceIp || "lambda"
}
}
try {
httpTestCall = await request(reqOpts, 250);
}
catch (e) {
console.error(e);
}
Now based on that change switch your handler to async using exports.handler = async(event, context, callback) => {} and use console to measure the execution time of every request using console.time() and console.timeEnd() for your request or anything. From there you could see what's stepping down your code using Cloudwatch logs. Here's another example based on your code:
let reqOpts = {
hostname: 'api.servicem8.com',
port: 443,
path: '/api_1.0/formresponse.json?%24filter=uuid%20eq%20' + strResponseUUID,
method: 'GET',
headers: {
// Use the temporary Access Token that was issued for this event
'Authorization': 'Bearer ' + strAccessToken
}
}
console.time("=========MEASURE_servicem8=========")
let error = null;
await request(reqOpts, 5555).catch((e)=>{
error = e;
})
console.timerEnd("=========MEASURE_servicem8=========")
if (error){
callback(null, {err: "Unable to query form response records, received HTTP" + error}); /* or anything similar */
}
References
https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html
https://docs.aws.amazon.com/lambda/latest/dg/nodejs-prog-model-handler.html
aws lambdas are not fast by nature (as of writing this answer). The startup time is not guaranteed, and it is known to be high.
If you need performance - you will not get it this way.

Azure BlobStorage: 400 (One of the request inputs is out of range.)

When I try to upload blobs to my azure storage account I get the following error response
<?xml version="1.0" encoding="utf-8"?>
<Error>
<Code>OutOfRangeInput</Code>
<Message>One of the request inputs is out of range.
RequestId:--------------------------
Time:2017-10-29T07:13:37.4218874Z
</Message>
</Error>
I am uploading multiple blobs of which some are uploaded successfully while others are not. The ones that throw the error have large blob-names (about 100 characters) so assume it may be due to blob-names size. But according to https://blogs.msdn.microsoft.com/jmstall/2014/06/12/azure-storage-naming-rules/ the maximum blob-names can be 1024 and my blob-names are way less than that limit.
An example blob-name would be "65/36/aluminium_03_group67_product_02pCube1_product_02group2_product_02Flow000_Albedo.png"
Edit Code to upload the blob.
The code to upload is in Javascript. I am breaking the file into multiple chunks and uploading. Here is the function responsible for uploading files
function AzureFileUpload(file, uploadUrl, successCallback, progressCallback, errorCallback){
this.file = file;
this.uploadUrl = uploadUrl;
this.successCallback = successCallback;
this.progressCallback = progressCallback;
this.errorCallback = errorCallback;
this.reader = new FileReader();
this.maxBlockSize = 256 * 1024;
this.blockIds = [];
this.totalBytesRemaining = this.file.size;
this.currentFilePointer = 0;
this.bytesUploaded = 0;
this.uploadFlag = true;
var self = this;
this.reader.onloadend = function(evt) {
if (evt.target.readyState == FileReader.DONE) { // DONE == 2
var uri = self.uploadUrl + '&comp=block&blockid=' + self.blockIds[self.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
self.ReadBlock();
if(self.uploadFlag){
self.UploadBlock(requestData, uri);
}
}
};
this.ReadBlock();
}
AzureFileUpload.prototype.UploadBlock = function(requestData, blockUrl){
var self = this;
$.ajax({
url: blockUrl,
type: "PUT",
data: requestData,
processData: false,
beforeSend: function(xhr) {
xhr.setRequestHeader('x-ms-blob-type', 'BlockBlob');
xhr.setRequestHeader('x-ms-blob-cache-control', "public, max-age=864000");
},
success: function(data, status) {
self.UpdateProgress(requestData.length);
self.bytesUploaded += requestData.length;
if (parseFloat(self.bytesUploaded) == parseFloat(self.file.size)) {
self.CommitBlocks();
}
},
error: function(xhr, desc, err) {
// console.log(desc);
// console.log(err);
self.Error("Unexpected error occured while uploading model. Plaese try after some time");
}
});
};
AzureFileUpload.prototype.pad = function(number, length){
var str = '' + number;
while (str.length < length) {
str = '0' + str;
}
return str;
};
AzureFileUpload.prototype.ReadBlock = function(){
if (this.totalBytesRemaining > 0) {
var fileContent = this.file.slice(this.currentFilePointer, this.currentFilePointer + this.maxBlockSize);
var blockId = "block-" + this.file.name + "-" + this.pad(this.blockIds.length, 6);
this.blockIds.push(btoa(blockId));
this.reader.readAsArrayBuffer(fileContent);
this.currentFilePointer += this.maxBlockSize;
this.totalBytesRemaining -= this.maxBlockSize;
if (this.totalBytesRemaining < this.maxBlockSize) {
this.maxBlockSize = this.totalBytesRemaining;
}
}
};
AzureFileUpload.prototype.UpdateProgress = function(bytesUploaded){
console.log("Progress",bytesUploaded);
if(this.progressCallback){
this.progressCallback(bytesUploaded);
}
};
AzureFileUpload.prototype.CommitBlocks = function(){
var self = this;
var uri = this.uploadUrl + '&comp=blocklist';
var request = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < this.blockIds.length; i++) {
request += '<Latest>' + this.blockIds[i] + '</Latest>';
}
request += '</BlockList>';
$.ajax({
url: uri,
type: "PUT",
data: request,
beforeSend: function(xhr) {
xhr.setRequestHeader('x-ms-blob-content-type', self.file.type);
xhr.setRequestHeader('x-ms-blob-cache-control', "public, max-age=864000");
},
success: function(data, status) {
console.log("Block Commited", data);
if(self.successCallback){
self.successCallback();
}
},
error: function(xhr, desc, err) {
self.Error("Unexpected error occured while uploading model. Plaese try after some time");
}
});
};
AzureFileUpload.prototype.Error = function(msg){
this.CancelUpload();
if(this.errorCallback){
this.errorCallback(msg);
}
};
AzureFileUpload.prototype.CancelUpload = function(){
this.uploadFlag = false;
};
The problem is with the following line of code:
var blockId = "block-" + this.file.name + "-" + this.pad(this.blockIds.length, 6);
Essentially the max length of a block id can be 64 bytes (Ref: https://learn.microsoft.com/en-us/rest/api/storageservices/put-block - see URI parameters section). Because you're including file name in block id computation and your file name is large, you're exceeding this limitation.
Please try with the following line of code and you should not get this error:
var blockId = "block-" + this.pad(this.blockIds.length, 6);
Please note that block ids are scoped to a blob so it is not really necessary for you to include the blob name to make the block ids unique to a blob.
If your using a connection string this could also be an issue, double check it (and the casing) as container names etc are case sensitive. You can read more on naming rules here https://learn.microsoft.com/en-us/rest/api/storageservices/Naming-and-Referencing-Containers--Blobs--and-Metadata?redirectedfrom=MSDN

Nodejs generating duplicate http-requests

Hope some can help with my issue. i'm using below nodejs code from this SAP Tutorial to read Sensor values post them per HTTP. All works pretty fine, but for the fact that every record is posted twice(see Screenshot). i'm not versed with server-side JS and don't know why the duplicates.Agreed, the values not aways the same, but for further processing i'd like to have single datasets per timestamp. Could someone please help me locate the issue and if possible, provide a solution/workaround?
Also the script reads and transmits the data every 10s. Am looking for a way to set the interval to maybe 3mins. I would appreciate every bit of help here as well
/* sensorTag IR Temperature sensor example
* Craig Cmehil, SAP SE (c) 2015
*/
/* Choose the proper HTTP or HTTPS, SAP Cloud Platformrequires HTTPS */
var http = require('https');
var SensorTag = require('sensortag');
var lv_temp;
var lv_humid;
var lv_deviceid = "";
var DEBUG_VALUE = true;
var xtimestamp;
var date = new Date();
var time = date.getTime ();
// SAP Cloud Platform connection details
var portIoT = 443;
var pathIoT = '/com.sap.iotservices.mms/v1/api/http/data/';
var hostIoT = 'iotmmsXXXXXXXXXXtrial.hanatrial.ondemand.com';
var authStrIoT = 'Bearer XXXXXXXXXXXX';
var deviceId = 'XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX';
var messageTypeID = 'XXXXXXXXXXXX';
var options = {
host: hostIoT,
port: portIoT,
path: pathIoT + deviceId,
agent: false,
headers: {
'Authorization': authStrIoT,
'Content-Type': 'application/json;charset=utf-8',
'Accept': '*/*'
},
method: 'POST',
};
/***************************************************************/
/* Coding to access TI SensorTag and values of various sensors */
/***************************************************************/
console.log("If not yet activated, then press the power button.");
SensorTag.discover(function(tag) {
tag.on('disconnect', function() {
console.log('disconnected!');
process.exit(0);
});
function connectExecute() {
console.log('Connect Device and Execute Sensors');
tag.connectAndSetUp(enableSensors);
}
function enableSensors() {
/* Read device specifics */
tag.readDeviceName(function(error, deviceName) {
console.log('Device Name = ' + deviceName);
});
tag.readSystemId(function(error, systemId) {
console.log('System ID = ' + systemId);
lv_deviceid = systemId;
});
tag.readSerialNumber(function(error, serialNumber) {
console.log('Serial Number = ' + serialNumber);
});
tag.readFirmwareRevision(function(error, firmwareRevision) {
console.log('Firmware Rev = ' + firmwareRevision);
});
tag.readHardwareRevision(function(error, hardwareRevision) {
console.log('Hardware Rev = ' + hardwareRevision);
});
tag.readHardwareRevision(function(error, softwareRevision) {
console.log('Software Revision = ' + softwareRevision);
});
tag.readManufacturerName(function(error, manufacturerName) {
console.log('Manufacturer = ' + manufacturerName);
});
/* Enable Sensors */
console.log("Enabling sensors:");
console.log('\tenableIRTemperatureSensor');
tag.enableIrTemperature(notifyMe);
console.log('\tenableHumidity');
tag.enableHumidity(notifyMe);
console.log("*********************************************");
console.log(" To stop press both buttons on the SensorTag ");
console.log("*********************************************");
}
function notifyMe() {
tag.notifySimpleKey(listenForButton);
setImmediate(function loop () {
tag.readIrTemperature(function(error, objectTemperature, ambientTemperature){
lv_obj = objectTemperature.toFixed(1);
lv_ambient = ambientTemperature.toFixed(1);
});
tag.readHumidity(function(error, temperature, humidity) {
lv_temp = temperature.toFixed(1);
lv_humid = humidity.toFixed(1);
});
if(DEBUG_VALUE)
console.log("Sending Data: " + lv_deviceid + " " + lv_temp + " " + lv_humid);
setSensorData(lv_temp, lv_humid);
setTimeout(loop, 10000);
});
}
function listenForButton() {
tag.on('simpleKeyChange', function(left, right) {
if (left && right) {
tag.disconnect();
}
});
}
connectExecute();
});
/******************************************************************/
/* FUNCTION to get Temperature from the Sensor & update into HANA */
/******************************************************************/
function setSensorData(lv_temp,lv_humid){
date = new Date();
time =date.getTime();
var data = {
"mode":"sync",
"messageType": messageTypeID,
"messages": [{
"timestamp": time,
"temperature": lv_temp,
"humidity": lv_humid
}]
};
var strData = JSON.stringify(data);
if(DEBUG_VALUE)
console.log("Data: " + strData);
if(strData.length > 46){
if(DEBUG_VALUE)
console.log("Sending Data to server");
/* Process HTTP or HTTPS request */
options.agent = new http.Agent(options);
var request_callback = function(response) {
var body = '';
response.on('data', function (data) {
body += data;
});
response.on('end', function () {
if(DEBUG_VALUE)
console.log("REQUEST END:", response.statusCode);
});
response.on('error', function(e) {
console.error(e);
});
}
var request = http.request(options, request_callback);
request.on('error', function(e) {
console.error(e);
});
request.write(strData);
request.end();
}else{
if(DEBUG_VALUE)
console.log("Incomplete Data");
}
}
It should only be posting once to the system but twice to the screen.
In the function notifyMe you need to change the line
setTimeout(loop, 10000);
Change the number to the interval you want it to delay before posting again.

Multipart upload of zip file to AWS Glacier freezes midway

I'm trying to upload a 600mb .zip file to glacier using the multipartupload function of the node version of the aws-sdk. I figured out how to read the file as a buffer and start the upload using a script from the aws docs.
The script starts an upload for each part of the file but each one fails with a 400 error.
Uploading part 0 = bytes 0-2097151/*
Uploading part 2097152 = bytes 2097152-4194303/*
Uploading part 4194304 = bytes 4194304-6291455/*
Uploading part 6291456 = bytes 6291456-8388607/*
....
Uploading part 591396864 = bytes 591396864-591798963/*
//stops logging, then a couple seconds later, it starts returning an error message like this for each upload part:
{ [UnknownError: 400]
message: '400',
code: 'UnknownError',
statusCode: 400,
time: Tue Jan 10 2017 20:54:29 GMT-0500 (EST),
requestId: 'F16FEDE011D3039A',
retryable: false,
retryDelay: 91.54012566432357 }
Below is the upload script I'm using
var AWS = require('aws-sdk');
var creds = <path to creds>
var fs = require('fs');
var filePath = <path to file>;
var encoding = "utf8";
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: 'us-west-1'
});
var glacier = new AWS.Glacier(myConfig)
var buffer = fs.readFileSync(filePath);
// var buffer = new Buffer(2.5 * 1024 * 1024); // 2.5MB buffer
var partSize = 1024 * 1024; // 1MB chunks,
var numPartsLeft = Math.ceil(buffer.length / partSize);
var startTime = new Date();
var params = {
accountId: '-',
vaultName: <vault name>
archiveDescription: '100media',
partSize: partSize.toString(),
};
// Compute the complete SHA-256 tree hash so we can pass it
// to completeMultipartUpload request at the end
var treeHash = glacier.computeChecksums(buffer).treeHash;
// Initiate the multipart upload
console.log('Initiating upload to', params.vaultName);
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < buffer.length; i += partSize) {
var end = Math.min(i + partSize, buffer.length),
partParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
range: 'bytes ' + i + '-' + (end-1) + '/*',
body: buffer.slice(i, end)
};
// Send a single part
console.log('Uploading part', i, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) return;
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
}
});
});
}
});
Any thoughts on where the 400 errors are coming from would be greatly appreciated! I have not worked with buffers or binary data before so I might be messing up the format for this. The other suspect is that I am just formatting the glacier request or params wrong.
Here is a script that I created which tries a multipart upload one at a time. "Could try to be reworked to be concurrent but it works as is, retrying if an upload fails:
var minm = require('minimist');
var argv = require('minimist')(process.argv.slice(2));
var AWS = require('aws-sdk');
var creds = <path to local json creds>
var fs = require('fs');
var encoding = "utf8";
var partSize = 1024 * 1024; // 1MB chunks,
var startTime = new Date();
var byteIncrementer = 0;
var MBcounter = 0;
var multipart;
//move these out to args
var filePath = argv.filepath;
var vaultName = argv.vaultname
var archiveDescription = argv.description
if (!filePath) {
throw "ERROR: must pass file path via --filepath <filepath>"
}
if (!archiveDescription) {
throw "ERROR: must pass description path via --description <description>"
}
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: <region>
});
var params = {
accountId: '-',
vaultName: vaultName,
archiveDescription: archiveDescription,
partSize: partSize.toString(),
};
var buffer = fs.readFileSync(filePath);
var numPartsLeft = Math.ceil(buffer.length / partSize);
var glacier = new AWS.Glacier(myConfig)
var treeHash = glacier.computeChecksums(buffer).treeHash;
new Promise(function (resolve, reject) {
glacier.initiateMultipartUpload(params, function (mpErr, multi) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multi.uploadId);
multipart = multi
resolve();
});
}).then(function () {
console.log("total upload size: ", buffer.length);
recursivelyUploadPart(byteIncrementer)
}).catch(function (err) {console.log(err)});
function recursivelyUploadPart() {
var end = Math.min(byteIncrementer + partSize, buffer.length);
var partParams = {
accountId: '-',
uploadId: multipart.uploadId,
vaultName: params.vaultName,
range: 'bytes ' + byteIncrementer + '-' + (end-1) + '/*',
body: buffer.slice(byteIncrementer, end)
};
console.log('Uploading part', byteIncrementer, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) {
console.log('part upload error: ', multiErr)
console.log('retrying')
return recursivelyUploadPart(byteIncrementer)
} else {
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) {
MBcounter++;
console.log("MB Uploaded: ", MBcounter);
byteIncrementer += partSize;
console.log('recursing');
return recursivelyUploadPart(byteIncrementer);
} else {
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive: ", err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
console.log("==============================");
console.log('COMPLETED');
console.log("==============================");
}
});
}
}
});
};
As mentioned in the comment, it looks like i was opening up a ton of http connections and trying to do everything concurrently which won't work.

Time web requests in node.js

Given Node's async nature it is difficult to time a series of web requests. How would I fire off 100 webrequests and figure out how long each individual request takes? Knowing the OS will only allow a few concurrent web request, how do I get the timeing for each individual webrequest, removing the time spent waiting for the other connections to complete. I was hoping the socket event was fired when the request launched but it seems that the socket event is fired after the connection has been established.
var http = require('http');
var urls = [
'/cameron',
'/sara',
'...',
// Time a url collection.
function timeUrl(url, calback) {
var options = {
host: 'www.examplesite.com',
port: 80,
path: ''
};
var times = [];
times.push({'text': 'start', 'time':Date.now()});
http.get(options, function(res) {
times.push({'text': 'response', 'time':Date.now()});
var result = '';
res.on('data', function(chunk) {
result += chunk.length ;
// result += chunk;
});
res.on('end', function() {
times.push({'text': 'end', 'time': Date.now(), 'body': result, 'statusCode': res.statusCode}); // ,
calback(times);
});
}).on('error', function(e) {
calback();
console.log("Got error: " + e.message);
times.push({'error':Date.now()});
}).on('socket', function (response) {
times.push({'text': 'socket', 'time':Date.now()});
});
}
for (var i = 0; i < urls.length; i++) {
var url = urls[i];
timeUrl(url, function(times) {
console.log(url);
for (var i = 0; i < times.length; i++) {
console.log(times[i].text, times[i].time - times[1].time , 'ms');
}
console.log('statusCode:', times[times.length -1].statusCode, 'Response Size:', times[times.length -1].body);
console.log('-');
});
}
If you're worried about OS concurrency just introduce maximum concurrency (throttling) into your requests instead of trying to guess when exactly the OS has started. I'm skipping over some minor details like error handling and using the excellent async.js library:
var http = require('http')
, async = require('async')
, CONCURRENCY = 5 // edit to fit your OS concurrency limit
, results = {}
, urls = [
'/cameron',
'/sara',
'/...'
];
// Time a url collection.
function timeUrl(url, callback) {
var options = { host: 'www.examplesite.com', port: 80 }
, start = Date.now()
, socket = null;
options.path = url;
http.get(options, function(res) {
var response = Date.now()
, size = 0;
res.on('data', function(chunk) { size += chunk.length; });
res.on('end', function() {
var end = Date.now();
results[url] = { start: start, socket: socket, response: response, end: end, size: size };
callback();
});
}).on('error', function(e) {
results[url] = { start: start, socket: socket, error: Date.now(), stack: e };
callback();
}).on('socket', function () {
socket = Date.now();
});
}
async.forEachLimit(urls, CONCURRENCY, timeUrl, function() {
console.log(JSON.stringify(results));
});
For ease of use, doing what you seem to want to do, I've not seen anything beat Nodetime.

Resources