Multipart upload of zip file to AWS Glacier freezes midway - node.js

I'm trying to upload a 600mb .zip file to glacier using the multipartupload function of the node version of the aws-sdk. I figured out how to read the file as a buffer and start the upload using a script from the aws docs.
The script starts an upload for each part of the file but each one fails with a 400 error.
Uploading part 0 = bytes 0-2097151/*
Uploading part 2097152 = bytes 2097152-4194303/*
Uploading part 4194304 = bytes 4194304-6291455/*
Uploading part 6291456 = bytes 6291456-8388607/*
....
Uploading part 591396864 = bytes 591396864-591798963/*
//stops logging, then a couple seconds later, it starts returning an error message like this for each upload part:
{ [UnknownError: 400]
message: '400',
code: 'UnknownError',
statusCode: 400,
time: Tue Jan 10 2017 20:54:29 GMT-0500 (EST),
requestId: 'F16FEDE011D3039A',
retryable: false,
retryDelay: 91.54012566432357 }
Below is the upload script I'm using
var AWS = require('aws-sdk');
var creds = <path to creds>
var fs = require('fs');
var filePath = <path to file>;
var encoding = "utf8";
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: 'us-west-1'
});
var glacier = new AWS.Glacier(myConfig)
var buffer = fs.readFileSync(filePath);
// var buffer = new Buffer(2.5 * 1024 * 1024); // 2.5MB buffer
var partSize = 1024 * 1024; // 1MB chunks,
var numPartsLeft = Math.ceil(buffer.length / partSize);
var startTime = new Date();
var params = {
accountId: '-',
vaultName: <vault name>
archiveDescription: '100media',
partSize: partSize.toString(),
};
// Compute the complete SHA-256 tree hash so we can pass it
// to completeMultipartUpload request at the end
var treeHash = glacier.computeChecksums(buffer).treeHash;
// Initiate the multipart upload
console.log('Initiating upload to', params.vaultName);
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < buffer.length; i += partSize) {
var end = Math.min(i + partSize, buffer.length),
partParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
range: 'bytes ' + i + '-' + (end-1) + '/*',
body: buffer.slice(i, end)
};
// Send a single part
console.log('Uploading part', i, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) return;
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
}
});
});
}
});
Any thoughts on where the 400 errors are coming from would be greatly appreciated! I have not worked with buffers or binary data before so I might be messing up the format for this. The other suspect is that I am just formatting the glacier request or params wrong.

Here is a script that I created which tries a multipart upload one at a time. "Could try to be reworked to be concurrent but it works as is, retrying if an upload fails:
var minm = require('minimist');
var argv = require('minimist')(process.argv.slice(2));
var AWS = require('aws-sdk');
var creds = <path to local json creds>
var fs = require('fs');
var encoding = "utf8";
var partSize = 1024 * 1024; // 1MB chunks,
var startTime = new Date();
var byteIncrementer = 0;
var MBcounter = 0;
var multipart;
//move these out to args
var filePath = argv.filepath;
var vaultName = argv.vaultname
var archiveDescription = argv.description
if (!filePath) {
throw "ERROR: must pass file path via --filepath <filepath>"
}
if (!archiveDescription) {
throw "ERROR: must pass description path via --description <description>"
}
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: <region>
});
var params = {
accountId: '-',
vaultName: vaultName,
archiveDescription: archiveDescription,
partSize: partSize.toString(),
};
var buffer = fs.readFileSync(filePath);
var numPartsLeft = Math.ceil(buffer.length / partSize);
var glacier = new AWS.Glacier(myConfig)
var treeHash = glacier.computeChecksums(buffer).treeHash;
new Promise(function (resolve, reject) {
glacier.initiateMultipartUpload(params, function (mpErr, multi) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multi.uploadId);
multipart = multi
resolve();
});
}).then(function () {
console.log("total upload size: ", buffer.length);
recursivelyUploadPart(byteIncrementer)
}).catch(function (err) {console.log(err)});
function recursivelyUploadPart() {
var end = Math.min(byteIncrementer + partSize, buffer.length);
var partParams = {
accountId: '-',
uploadId: multipart.uploadId,
vaultName: params.vaultName,
range: 'bytes ' + byteIncrementer + '-' + (end-1) + '/*',
body: buffer.slice(byteIncrementer, end)
};
console.log('Uploading part', byteIncrementer, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) {
console.log('part upload error: ', multiErr)
console.log('retrying')
return recursivelyUploadPart(byteIncrementer)
} else {
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) {
MBcounter++;
console.log("MB Uploaded: ", MBcounter);
byteIncrementer += partSize;
console.log('recursing');
return recursivelyUploadPart(byteIncrementer);
} else {
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive: ", err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
console.log("==============================");
console.log('COMPLETED');
console.log("==============================");
}
});
}
}
});
};
As mentioned in the comment, it looks like i was opening up a ton of http connections and trying to do everything concurrently which won't work.

Related

File chunk upload to azure storage blob, file seems broken

I'm trying to upload excel file to azure storage blob in chunks, using the stage block and commitblock from BlobBlockClient Class. File upload seems to success but when i try to download and open the file, there it seems to be broken.
I'm using react and node js to do this. Code follows below
In UI
const chunkSize = (1024 * 1024) * 25; // file chunk size
// here slicing the file and sending it to api method
const fileReader = new FileReader();
const from = currentChunkIndexRef.current * chunkSize;
const to = from + chunkSize;
const blob = file.slice(from, to);
fileReader.onload = ((e: any) => uploadChunksToBlob(e, file, obj));
fileReader.readAsDataURL(blob);
// api method
const uploadChunksToBlob = async (event: any, file: File, obj: any) => {
try {
const totalChunks = Math.ceil(file.size / chunkSize);
const uploadChunkURL = `/upload?currentChunk=${currentChunkIndexRef.current}&totalChunks=${totalChunks}&file=${file.name}&type=${file.type}`;
console.log(event.target.result)
const fileUpload = await fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: JSON.stringify(event.target.result),
});
const fileUploadJson = await fileUpload.json();
const isLastChunk = (totalChunks - 1) === currentChunkIndexRef.current;
if(!isLastChunk) {
console.log({ Chunk: currentChunkIndexRef.current });
currentChunkIndexRef.current = currentChunkIndexRef.current + 1;
// eslint-disable-next-line #typescript-eslint/no-use-before-define
uploadFileToAzureBlob(file, obj);
} else {
console.log("File Uploaded")
}
//
} catch (error) {
console.log("uploadFileToAzureBlob Catch Error" + error);
}
}
// In Node
const sharedKeyCredential = new StorageSharedKeyCredential(
config.StorageAccountName,
config.StorageAccountAccessKey
);
const pipeline = newPipeline(sharedKeyCredential);
const blobServiceClient = new BlobServiceClient(
`https://${config.StorageAccountName}.blob.core.windows.net`,
pipeline
);
const containerName = getContainerName(req.headers.key, req.headers.clientcode);
const identifier = uuid.v4();
const blobName = getBlobName(identifier, file);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
try {
let bufferObj = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8"); // Create buffer object, specifying utf8 as encoding
let base64String = bufferObj.toString("base64"); // Encode the Buffer as a base64 string
blockIds = [...blockIds, base64String];
const bufferedData = Buffer.from(req.body);
let resultOfUnitArray = new Uint8Array(bufferedData.length);
for (let j = 0; j < bufferedData.length; j++) {
resultOfUnitArray[j] = bufferedData.toString().charCodeAt(j);
} // Converting string to bytes
const stageBlockResponse = await blockBlobClient.stageBlock(base64String, resultOfUnitArray, resultOfUnitArray.length, {
onProgress: (e) => {
console.log("bytes sent: " + e.loadedBytes);
}
});
if ((Number(totalChunks) - 1) === (Number(currentChunk))) {
const commitblockResponse = await blockBlobClient.commitBlockList(blockIds, {blobHTTPHeaders: req.headers});
res.json({ uuid: identifier, message: 'File uploaded to Azure Blob storage.' });
} else {
res.json({ message: `Current Chunks ${currentChunk} is Successfully Uploaded` });
}
} catch (err) {
console.log({ err })
res.json({ message: err.message });
}
I don't know, what i'm doing wrong here.
Any help would be appreciated
Thank you
The problem is that you convert it into dataURL, that’s where things break.
It appears to me that you're under the wrong impression that you need to first encode a blob into string in order to send it. Well, you don't have to, browser fetch API is capable to handle raw binary payload.
So on the client (browser) side, you don’t need to go through FileReader. Just send the chunk blob directly.
const blob = file.slice(from, to);
// ...
fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: blob,
});
On the server (node.js) side, you'll receive the blob in raw binary form, so you can simply forward that blob untouched to azure storage. There's no need to decode from string and move bytes onto resultOfUnitArray like you currently do.
const base64String = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8").toString("base64");
const bufferedData = Buffer.from(req.body);
const stageBlockResponse = await blockBlobClient.stageBlock(
base64String,
bufferedData,
bufferedData.length
);

How to download large files from aws s3 with restart on network loss

I am trying to implement a "network safe" downloader from aws s3 bucket.
the downloader should be able to download a single .zip file from s3 and write it to a local .zip file.
My current approach is using node with readStream and writeStream ass follows
const download = async () => {
AWS.config.update(
{
accessKeyId: "",
secretAccessKey: "",
region: ""
}
);
const s3 = new AWS.S3();
const params = {
Bucket: '',
Key: ''
};
const { ContentLength: contentLength } = await s3.headObject(params).promise();
const rs = s3.getObject(params).createReadStream()
const ws = fs.createWriteStream(path.join('./', 'file.zip'));
let progress = 0;
rs.on('data', function (chunk) {
progress += chunk.length;
console.log(`Progress: ${progress / contentLength * 100}%`);
});
rs.pipe(ws);
}
what i need is a way to catch/create an event regarding network errors that will allow me to pause and restart the download when network is back on.
or even better - auto restart the download when network is restored.
currently i couldn't find any events regarding network errors and seems that network loss while download is in process does not triggers the 'error' event.
any solutions in node/python will be very appriciated
Short update: found a work around using wget to download a preSignedUrl to the object i want to download. still not the native experience i wanted to have. share your thoughts
export class Downloader {
constructor(dest_dir, file_name_to_save) {
this.dest_dir = dest_dir;
this.file_name = file_name_to_save;
this.progress = "61%"
}
async download_file(on_progress, on_done, socket, io) {
let client = new S3Client({
region: 'eu-west-1',
credentials: {
accessKeyId: "",
secretAccessKey: "",
}
});
const command = new GetObjectCommand({ Bucket: "s", Key: "" });
const url = await getSignedUrl(client, command, { expiresIn: 3600 });
let child = spawn('wget', ['-c', '-O', this.dest_dir + this.file_name, url]);
socket.on('cancel', () => {
child.kill()
console.log("killed child")
})
child.stderr.on('data', (e) => {
let stdout = e.toString();
let p = stdout.match(/([0-9]+?\%)+/g);
if (p && p.length > 0 && p[0] != this.progress) {
on_progress && on_progress(p[0])
console.log(p[0])
this.progress = p[0]
}
});
child.stdout.on('end', function (data) {
on_done && on_done("end");
console.log("end: ", data)
child.kill();
});
child.on('exit', function (code) {
if (code != 0) console.log('Failed: ' + code);
else console.log("success!", code)
on_done && on_done("exit");
});
}
}

Does Azure blockBlobURL.download() have a limit to file size?

I'm using Azure's blockBlobURL.download() to download an image but am only receiving the top portion of the image. Is there a limit to how much I can download from Azure blob to a readable stream? The content length is 172628 and there exists a property highWaterMark: 16384. Are these to related?
async function compareToBaseline(imageData, blobName, metadata){
const baselineBlobName = "MacOSX10.12/chrome/initial"
const containerURL = ContainerURL.fromServiceURL(serviceURL, "baselines")
const blockBlobURL = BlockBlobURL.fromContainerURL(containerURL, baselineBlobName );
let baseLineImage = await blockBlobURL.download(aborter, 0)
baseLineImage = baseLineImage.originalResponse.readableStreamBody.read()
console.log(baseLineImage.length);
baseLineImage = new Buffer(baseLineImage, 'base64');
await fs.writeFile('./newest.png', baseLineImage, 'binary', function(err){
console.log('written');
})
}
The result is only the top portion of an image.
There's a 4-MB limit for each call to the Azure Storage service. If your file is larger than 4 MB, you must break it in chunks. For more information, see Azure Storage scalability and performance targets.
Here is sample c# code to download very large files in 1MB chunks. it's performance oriented too.
private static void DownloadLargeFile()
{
string connectionString = "connString"; //ConfigurationSettings.AppSettings["StorageConnectionString"]; //blob connection string
#pragma warning restore CS0618 // Type or member is obsolete
#pragma warning disable CS0618 // Type or member is obsolete
string sourceContainerName = "quickstartblob"; //ConfigurationSettings.AppSettings["sourcecontainerName"]; //source blob container name
#pragma warning restore CS0618 // Type or member is obsolete
string sourceBlobFileName = "QuickStart1.txt"; //source blob name
CloudStorageAccount account = CloudStorageAccount.Parse(connectionString);
var blobClient = account.CreateCloudBlobClient();
var container = blobClient.GetContainerReference(sourceContainerName);
var file = sourceBlobFileName;
var blob = container.GetBlockBlobReference(file);
//First fetch the size of the blob. We use this to create an empty file with size = blob's size
blob.FetchAttributes();
var blobSize = blob.Properties.Length;
long blockSize = (1 * 1024 * 1024);//1 MB chunk;
blockSize = Math.Min(blobSize, blockSize);
//Create an empty file of blob size
using (FileStream fs = new FileStream(file, FileMode.Create))//Create empty file.
{
fs.SetLength(blobSize);//Set its size
}
var blobRequestOptions = new BlobRequestOptions
{
RetryPolicy = new ExponentialRetry(TimeSpan.FromSeconds(5), 3),
MaximumExecutionTime = TimeSpan.FromMinutes(60),
ServerTimeout = TimeSpan.FromMinutes(60)
};
long currentPointer = 0;
long bytesRemaining = blobSize;
do
{
var bytesToFetch = Math.Min(blockSize, bytesRemaining);
using (MemoryStream ms = new MemoryStream())
{
//Download range (by default 1 MB)
blob.DownloadRangeToStream(ms, currentPointer, bytesToFetch, null, blobRequestOptions);
ms.Position = 0;
var contents = ms.ToArray();
using (var fs = new FileStream(file, FileMode.Open))//Open that file
{
fs.Position = currentPointer;//Move the cursor to the end of file.
fs.Write(contents, 0, contents.Length);//Write the contents to the end of file.
}
currentPointer += contents.Length;//Update pointer
bytesRemaining -= contents.Length;//Update bytes to fetch
}
}
while (bytesRemaining > 0);
}
Something like below in node js
var azure = require('azure-storage');
var fs = require('fs');
module.exports = function (context, input) {
context.done();
var accessKey = 'myaccesskey';
var storageAccount = 'mystorageaccount';
var containerName = 'mycontainer';
var blobService = azure.createBlobService(storageAccount, accessKey);
var recordName = "a_large_movie.mov";
var blobName = "standard/mov/" + recordName;
var blobSize;
var chunkSize = (1024 * 512) * 8; // I'm experimenting with this variable
var startPos = 0;
var fullPath = "D:/home/site/wwwroot/myAzureFunction/input/";
var blobProperties = blobService.getBlobProperties(containerName, blobName, null, function (error, blob) {
if (error) {
throw error;
}
else {
blobSize = blob.contentLength;
context.log('Registered length: ' + blobSize);
fullPath = fullPath + recordName;
console.log(fullPath);
doDownload();
}
}
);
function doDownload() {
var stream = fs.createWriteStream(fullPath, {flags: 'a'});
var endPos = startPos + chunkSize;
if (endPos > blobSize) {
endPos = blobSize;
context.log('Reached end of file endPos: ' + endPos);
}
context.log("Downloading " + (endPos - startPos) + " bytes starting from " + startPos + " marker.");
blobService.getBlobToStream(
containerName,
blobName,
stream,
{
"rangeStart": startPos,
"rangeEnd": endPos-1
},
function(error) {
if (error) {
throw error;
}
else if (!error) {
startPos = endPos;
if (startPos <= blobSize - 1) {
doDownload();
}
}
}
);
}
};
Hope it helps.
blockBlobURL.download() doesn't have a limit to file size. But read() returns null doesn't mean no more data in the stream. You need to follow Node.js practices to get all data by listening to data or readable event.
For example, the data event posted by Peter Pan. Or the readable event posted by Node.js official documents:
readable.on('readable', () => {
let chunk;
while (null !== (chunk = readable.read())) {
console.log(`Received ${chunk.length} bytes of data.`);
}
});
Please always call read() inside readable event callback.
It seems that this issue was similar with your other thread Unable to read readableStreamBody from downloaded blob.
Here is my function to help saving the baseLineImage.readableStreamBody to a file, as below.
async function streamToFs(filename, readableStream) {
const ws = fs.createWriteStream(filename);
readableStream.on("data", data => {
ws.write(data);
}).on("end", () => {
console.log('written');
});
}
And change your code as below.
async function compareToBaseline(imageData, blobName, metadata){
const baselineBlobName = "MacOSX10.12/chrome/initial"
const containerURL = ContainerURL.fromServiceURL(serviceURL, "baselines");
const blockBlobURL = BlockBlobURL.fromContainerURL(containerURL, baselineBlobName );
let baseLineImage = await blockBlobURL.download(aborter, 0);
await streamToFs('./newest.png', baseLineImage.readableStreamBody);
}
It works. Hope it helps.

Azure BlobStorage: 400 (One of the request inputs is out of range.)

When I try to upload blobs to my azure storage account I get the following error response
<?xml version="1.0" encoding="utf-8"?>
<Error>
<Code>OutOfRangeInput</Code>
<Message>One of the request inputs is out of range.
RequestId:--------------------------
Time:2017-10-29T07:13:37.4218874Z
</Message>
</Error>
I am uploading multiple blobs of which some are uploaded successfully while others are not. The ones that throw the error have large blob-names (about 100 characters) so assume it may be due to blob-names size. But according to https://blogs.msdn.microsoft.com/jmstall/2014/06/12/azure-storage-naming-rules/ the maximum blob-names can be 1024 and my blob-names are way less than that limit.
An example blob-name would be "65/36/aluminium_03_group67_product_02pCube1_product_02group2_product_02Flow000_Albedo.png"
Edit Code to upload the blob.
The code to upload is in Javascript. I am breaking the file into multiple chunks and uploading. Here is the function responsible for uploading files
function AzureFileUpload(file, uploadUrl, successCallback, progressCallback, errorCallback){
this.file = file;
this.uploadUrl = uploadUrl;
this.successCallback = successCallback;
this.progressCallback = progressCallback;
this.errorCallback = errorCallback;
this.reader = new FileReader();
this.maxBlockSize = 256 * 1024;
this.blockIds = [];
this.totalBytesRemaining = this.file.size;
this.currentFilePointer = 0;
this.bytesUploaded = 0;
this.uploadFlag = true;
var self = this;
this.reader.onloadend = function(evt) {
if (evt.target.readyState == FileReader.DONE) { // DONE == 2
var uri = self.uploadUrl + '&comp=block&blockid=' + self.blockIds[self.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
self.ReadBlock();
if(self.uploadFlag){
self.UploadBlock(requestData, uri);
}
}
};
this.ReadBlock();
}
AzureFileUpload.prototype.UploadBlock = function(requestData, blockUrl){
var self = this;
$.ajax({
url: blockUrl,
type: "PUT",
data: requestData,
processData: false,
beforeSend: function(xhr) {
xhr.setRequestHeader('x-ms-blob-type', 'BlockBlob');
xhr.setRequestHeader('x-ms-blob-cache-control', "public, max-age=864000");
},
success: function(data, status) {
self.UpdateProgress(requestData.length);
self.bytesUploaded += requestData.length;
if (parseFloat(self.bytesUploaded) == parseFloat(self.file.size)) {
self.CommitBlocks();
}
},
error: function(xhr, desc, err) {
// console.log(desc);
// console.log(err);
self.Error("Unexpected error occured while uploading model. Plaese try after some time");
}
});
};
AzureFileUpload.prototype.pad = function(number, length){
var str = '' + number;
while (str.length < length) {
str = '0' + str;
}
return str;
};
AzureFileUpload.prototype.ReadBlock = function(){
if (this.totalBytesRemaining > 0) {
var fileContent = this.file.slice(this.currentFilePointer, this.currentFilePointer + this.maxBlockSize);
var blockId = "block-" + this.file.name + "-" + this.pad(this.blockIds.length, 6);
this.blockIds.push(btoa(blockId));
this.reader.readAsArrayBuffer(fileContent);
this.currentFilePointer += this.maxBlockSize;
this.totalBytesRemaining -= this.maxBlockSize;
if (this.totalBytesRemaining < this.maxBlockSize) {
this.maxBlockSize = this.totalBytesRemaining;
}
}
};
AzureFileUpload.prototype.UpdateProgress = function(bytesUploaded){
console.log("Progress",bytesUploaded);
if(this.progressCallback){
this.progressCallback(bytesUploaded);
}
};
AzureFileUpload.prototype.CommitBlocks = function(){
var self = this;
var uri = this.uploadUrl + '&comp=blocklist';
var request = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < this.blockIds.length; i++) {
request += '<Latest>' + this.blockIds[i] + '</Latest>';
}
request += '</BlockList>';
$.ajax({
url: uri,
type: "PUT",
data: request,
beforeSend: function(xhr) {
xhr.setRequestHeader('x-ms-blob-content-type', self.file.type);
xhr.setRequestHeader('x-ms-blob-cache-control', "public, max-age=864000");
},
success: function(data, status) {
console.log("Block Commited", data);
if(self.successCallback){
self.successCallback();
}
},
error: function(xhr, desc, err) {
self.Error("Unexpected error occured while uploading model. Plaese try after some time");
}
});
};
AzureFileUpload.prototype.Error = function(msg){
this.CancelUpload();
if(this.errorCallback){
this.errorCallback(msg);
}
};
AzureFileUpload.prototype.CancelUpload = function(){
this.uploadFlag = false;
};
The problem is with the following line of code:
var blockId = "block-" + this.file.name + "-" + this.pad(this.blockIds.length, 6);
Essentially the max length of a block id can be 64 bytes (Ref: https://learn.microsoft.com/en-us/rest/api/storageservices/put-block - see URI parameters section). Because you're including file name in block id computation and your file name is large, you're exceeding this limitation.
Please try with the following line of code and you should not get this error:
var blockId = "block-" + this.pad(this.blockIds.length, 6);
Please note that block ids are scoped to a blob so it is not really necessary for you to include the blob name to make the block ids unique to a blob.
If your using a connection string this could also be an issue, double check it (and the casing) as container names etc are case sensitive. You can read more on naming rules here https://learn.microsoft.com/en-us/rest/api/storageservices/Naming-and-Referencing-Containers--Blobs--and-Metadata?redirectedfrom=MSDN

Create Thumbnail Image using Windows Azure Blob Storage

I am trying to use the azure-sdk-for-node to save a streamed image to Windows Azure blob storage but without success. Below is the function that I call and pass the video object with the thumbnail property. Initially I fetch the image using the request object which fetches the image from another website and turn that into a base64 object which in turn gets converted into a stream object because Azure blob service uses createBlockBlobFromStream method as I couldn't use createBlockBlobFromFile or createBlockBlobFromText to upload the image to blob storage.
var azure = require('azure')
, uuid = require('node-uuid')
, http = require('http')
, url = require('url')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream');
function createVideoThumbnail(video, callback){
var bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
var sURL = video.Thumbnail;
var oURL = url.parse(sURL);
var client = http.createClient(80, oURL.hostname);
var request = client.request('GET', oURL.pathname, {'host': oURL.hostname});
request.end();
request.on('response', function (response) {
var type = response.headers["content-type"];
var prefix = "data:" + type + ";base64,";
var body = "";
response.setEncoding('binary');
response.on('end', function () {
var base64 = new Buffer(body, 'binary').toString('base64');
var data = prefix + base64;
console.log('base64 image data ' + video.Thumbnail + ': ' + data + '\n');
var decodedImage = new Buffer(data, 'base64');
var magic = new Magic(mmm.MAGIC_MIME_TYPE);
magic.detect(decodedImage, function(err, result) {
if(err) {
throw err;
}
var bytes = 0;
var imageStream = new stream.Stream();
imageStream.writable = true;
imageStream.write = function(buf) {
bytes += buf.length;
imageStream.emit('data', buf);
};
imageStream.end = function(buf) {
//if(arguments.length) {
imageStream.write(buf);
//}
imageStream.writable = false;
imageStream.emit('end');
console.log(bytes + ' bytes written');
};
var options = {}
console.log('mmm = ' + result + '\n');
options.contentType = result;
options.contentTypeHeader = result;
console.log('\n');
bs.createBlockBlobFromStream(config.imageContainer, uuid().replace(/-/gi, "").toLowerCase() + '.jpg', imageStream, decodedImage.length, options, function(error, blobResult, response) {
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
// now store in Azure blob storage
callback();
});
imageStream.end(decodedImage);
});
});
response.on('data', function (chunk) {
if (response.statusCode == 200) body += chunk;
});
});
}
and this is how I call it:
createVideoThumbnail(video, function(){
console.log("returning from create thumbnails\n\n");
});
The function is not working it hangs and won't print out the the final log statement:
console.log("returning from create thumbnails\n\n");
However the base64 does seem to work as I am getting this for the encoding:

mmm = application/octet-stream
5283 bytes written
But I am not getting any of these log statements being printed:
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
So I am presuming it is hanging somewhere or I have not structured my code properly. Can anybody see what I am doing wrong ?
Cheers
Rob
My sources:
http://social.msdn.microsoft.com/Forums/en-US/wavirtualmachinesforlinux/thread/47bfe142-c459-4815-b09e-bd0a07ca18d5
Node.js base64 encode a downloaded image for use in data URI
Here's my simplified version. Since magic needs to operate on the whole file, there's no point in using the streaming blog API, instead this is written for the text api. body will be a buffer of the image, I suspect azure will be happy with it sans encoding, call toString('encoding') if it does need it.
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
request(sURL, {encoding:null}, function (err, res, body) {
// encoding:null makes request return a buffer, which is ideal to run magic.detect on
magic.detect(body, function (err, res) {
console.log(res);
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var text = body; //might need to be converted into a string, I don't have azure setup to test
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromText(container, blob, text, options, function(error, blobResult, response) {
if (error)
console.log('got error =', error);
// if you give console.log multiple arguments, it will format each of them,
// no need to manipulate objects into strings manually
if (blobResult)
console.log('blobResult =', blobResult);
if (response)
console.log('response =', response);
// now store in Azure blob storage
callback();
});
});
});
}
edit: temp file version
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, fs = require('fs')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
var name = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var ws = fs.createWriteStream('./tmp/' + name);
request(sURL, {encoding:null})
.pipe(ws).on('close', function () {
console.log('downloaded');
magic.detectFile('./tmp/' + name, function (err, res) {
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromFile(container, name, './tmp/' + name, function (err) {
callback();
});
});
});
}
Thanks to Valery Jacobs on msdn forums was able to come up with the answer. It's a nice clean solid solution using the stream, request and util object.
:)
http://social.msdn.microsoft.com/Forums/en-US/windowsazurepurchasing/thread/25c7705a-4ea0-4d9c-af09-cb48a031d06c

Resources