Read from Dropbox stream into Buffer - node.js

I need to download a file from Dropbox into buffer on my server. Due to a security issues I can't download a file directly to a client. Therefore I send request to my server, then fetch the file from Dropbox and then forward it to the client. I managed to implement this writing Dropbox stream to a file on my server and then sending it to a client.
I need to implement this mechanism without writing Dropbox stream into file on my server. I need to create a buffer and write into it and then forward the buffer to a client.
export const downloadFileFromDropbox = async function downloadFileFromDropbox(fileName,
folderNameOnDropbox) {
let isSucceeded;
const message = [];
let downloadResult;
let fileBuffered = "";
// authentication
const dropbox = dropboxV2Api.authenticate({
token: process.env.DEV_DROPBOX_SECRET_KEY
});
// configuring parameters
const params = Object.freeze({
resource: "files/download",
parameters: {
path: `/${folderNameOnDropbox}/${fileName}`
}
});
let dropboxPromise = new Promise(function (resolve, reject) {
dropbox(params, function (err, result) {
if (err) {
reject(err);
} else {
resolve(result);
}
}).pipe(fs.createWriteStream(/* need to implement buffer here */));
});
await dropboxPromise.then(async function (resultObj) {
isSucceeded = true;
message.push("fileDownload_OK");
}).catch(async function (err) {
isSucceeded = false;
message.push(err.message);
});
downloadResult = {
isSucceeded,
message,
/* Buffer */
};
return downloadResult;
};

There is no way to convert a file into buffer without writing it in some location
What worked for me is:
write the file in a temporary folder
Read it and convert into buffer
send the buffer back to the client
delete the file from the temporary location
here's my code :
dropbox = dropboxV2Api.authenticate({ token: credentials.access_token });
dropbox(
{
resource: 'files/download',
parameters: {
path: `${req.query.folder}` + `/${req.query.filename}`,
},
},
(err, result, response) => {
//download completed
if (err) {
return res.status(500).send(err);
} else {
console.log('download completed');
}
}
).pipe(
fs
.createWriteStream(`./app/tmp/` + `${req.query.filename}`)
.on('finish', () => {
fs.readFile(
`./app/tmp/` + `${req.query.filename}`,
function (err, buffer) {
if (err) {
res.status(500).send(err);
} else {
fs.unlink(`./app/tmp/` + `${req.query.filename}`, (err) => {
if (err) {
console.error(err);
return;
}
//file removed
res.status(200).send(buffer);
});
}
}
);
})
);
I hope this will help you even though it's a little bit late

Related

Buffer/Base64 encoding issue in Node.js and Azure Web APP

My code is working perfectly on local, the file being downloaded is never corrupt (I tried it dozens of time and the files always come complete), however when I run it on Azure (Web App); in a lot of times the returned file is corrupt (and with the wrong file size) - it's extremely random, sometimes it is returned fine and sometimes it is not.
My function that downloads the file from Azure blob storage is the following, the "setTimeOut" things are desperate attempts from my side to make it work without corruption
storageUtils.downloadBlobFromCloud = (subDirectory, fileName) => {
var dir = './temp';
var tempFilePath = dir + '/' + Date.now().toString() + '.temp';
console.log(tempFilePath)
var absolutePath = pathLib.resolve(tempFilePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
if (!fileName) {
return null;
}
return new Promise( (resolve, reject) => {
blobService.getBlobToLocalFile(
container,
`${subDirectory}/${fileName}`,
tempFilePath,
async function (error, serverBlob) {
if (!error) {
try {
let base64 = await fileStream(tempFilePath);
console.log("STREAMMMINNNGGGG")
setTimeout(() => {
resolve(base64);
}, 2000);
setTimeout(()=>{
console.log('deleting file')
fs.unlinkSync(tempFilePath);
},10000);
} catch (e) {
console.log('error1')
console.log(e)
reject(e);
}
} else {
console.log("fi error2")
console.log(error)
reject(error);
}
}
);
});
};
The stream function is
function fileStream( filePath) {
return new Promise((resolve, reject) => {
let buffers = [];
let myStream = fs.createReadStream(filePath);
myStream.on('data', (chunk) => { buffers.push(chunk); });
myStream.once('end', () => {
let buffer = Buffer.concat(buffers);
console.log("BUFFER SIZE " + buffer.length)
let base64 = buffer.toString('base64')
/* fs.writeFile('./temp/buffer.txt', base64, function (err) {
if (err) return console.log(err);
console.log(err)
}); */
resolve(base64);
});
myStream.once('error', (err) => {
reject(err);
});
});
}
the function that returns the response to client:
let downloadFile = async (directory, fileName, attempts) => {
attempts++;
let file = await storageUtils.downloadBlobFromCloud(directory, fileName);
if(attempts < 20 && !file) {
file = await downloadFile(directory, fileName, attempts);
}
return file;
}
server.post('/api/download-file', async function (req, res) {
try{
console.log(`downloading file ${req.body.fileName} ${req.body.directory}` )
let attempts = 0;
let file = await downloadFile(req.body.directory, req.body.fileName, attempts);
return res.json({ file });
}catch(error) {
console.log(error);
return res.json(error);
};
});
you may notice the commented "buffer.txt", I've checked the base64 code in the txt and it's indeed incomplete (ie. expecting 1.5MB file but it's 1MB) when the file is returned corrupt to client.
I am out of ideas, what am I doing wrong? and why things working perfectly on local but not on the cloud hosting?
NOTE: I have even did a side by side comparison between the base64 of the file downloaded fine vs when it's corrupt using a text checker tool, the two base64 blocs are identical for a large portion while scrolling down, then...it's blank for the "corrupt one", while the non-corrupt base64 is longer. As if it's something is "cutting" the base64 encoding.

How to make express download link with GridFsBucket?

As the title says, how do you make a direct download link with a file from mongoDB(GridFsBucket) using express?
The file should be downloadable from memory, as i dont want to save it temporarily on the server.
I have this method:
async function downloadFileFromDB(fileId) {
var gridfsbucket = new mongoose.mongo.GridFSBucket(mongoose.connection.db, {
chunkSizeBytes: 1024,
bucketName: 'filesBucket'
});
try {
const stream = gridfsbucket.openDownloadStream(fileId)
const fileBuffer = Buffer.from(stream)
return fileBuffer
} catch (err) {
stream.on('error', () => {
console.log("Some error occurred in download:" + error);
})
console.log(err);
}
}
And this route:
router.get('/download-file', async (req,res) => {
const fileId = req.query.fileId
const ObjectFileId = new ObjectId(fileId)
const fileBuffer = await fileFacade.downloadFileFromDB(ObjectFileId)
res.download(fileBuffer)
})
But res.download wants a path and not a buffer. Aswell im not sure i can make a buffer directly from the openDownloadStream method.
Can anyone help?
I believe you need to write the data to your res object. I accomplished this like:
const readStream = gridfs.openDownloadStreamByName(filename);
readStream.on("data", (chunk) => {
res.write(chunk);
});
readStream.on("end", () => {
res.status(200).end();
mongoClient.close();
});
readStream.on("error", (err) => {
console.log(err);
res.status(500).send(err);
});
So, you may just have to do:
res.write(fileBuffer).end();
//// Instead of doing:
// res.download(fileBuffer);

download large files and continue uploading to minio with nodejs, KILLED MY SERVICE?

I have an API built with NodeJS, in that API there is a process where I download a large file using modules request-promise and then it is made a new buffer uploaded to minio. But I have a problem that my API always crashes if the file is above 80-100MB and its NodeJS Killed on the server, how to handle it?
This function is to download the file and convert it into a buffer :
const convertLink = async link => {
const options = {
uri: link,
encoding: null,
headers: {
'Content-type': 'application/octet-stream'
}
};
const res = rp.get(options)
.then((body) => {
console.log(body)
const a = new Buffer.from(body);
return a;
})
.catch(err => {
console.log(err)
return err;
});
return res;
};
this is function for uploading files to minio from miniosdk :
const streamUpload = async (bucketName, objectName, newBuffer) => {
try {
const isUploaded = await minioClient.putObject(bucketName, objectName, newBuffer);
if (isUploaded) {
return isUploaded;
}
} catch (err) {
return err;
}
};
I think the issue here is you are downloading the file and keeping it in the memory and then uploading it to your minioClient. which is not recommended for large files. you should download that file as a stream and then upload it as a stream too. keeping large files in memory can be the reason to kill your node.js server.
you can try as following example in which I am using request npm library and downloading the file and saving it as a stream to a temporary location and then reading the file from that temporary location and uploading to another URL:-
Downloading file:-
const downloadFile = async (url) => {
try {
let tempLocation = "./temp/";
let fileName="myfile";
return new Promise((resolve, reject) => {
request
.get(url)
.on('response', function (response) {
console.log(response.statusCode) // 200
console.log(response.headers['content-type'])
})
.on('error', function (error) {
console.log('downloading error', error)
reject()
})
.on('end', async function () {
console.log("donwload finished")
resolve();
})
.pipe(fs.createWriteStream(tempLocation + '/' + fileName))
});
} catch (error) {
console.log("error in downloadFile", error)
throw error;
}
}
now you can upload the file to your minioClient as a stream. you can use fs.createReadStream(file) to read the file as a stream data from that temporary location.

Azure: Function stops working when trying to get blob content

I am working with nodeJS and Azure functions. I am trying to get the content of a blob (pptx) and then further work with that pptx (unzip it with admzip).
However, whenever I try to get the content, the function just stops without any error and after some time it times out. I tried getting the properties of the blob first (to check if the blob exists) and that works.
Here is my function:
const storage = require('azure-storage');
const STORAGE_ACCOUNT_NAME = 'storage-account';
const ACCOUNT_ACCESS_KEY = 'storage-key';
let AdmZip = require('adm-zip');
let fs = require('file-system');
const blobService = storage.createBlobService(STORAGE_ACCOUNT_NAME, ACCOUNT_ACCESS_KEY);
module.exports = function (context, req) {
context.log('JavaScript HTTP trigger function processed a request.');
getBlobProperties('default-powerpoint', 'download.pptx').then((properties) => {
context.log('Properties: ', properties);
getBlobContent('default-powerpoint', 'download.pptx').then((content) => {
context.log('Blob Content: ', content);
})
});
};
function getBlobProperties(containerName, fileName) {
return new Promise((resolve, reject) => {
blobService.getBlobProperties(
containerName,
fileName,
function (err, properties, status) {
if (err) {
reject(err);
} else {
resolve(properties);
}
});
})
}
function getBlobContentAsStream(containerName, fileName, res) {
return new Promise((resolve, reject) => {
blobService.getBlobToStream(containerName, fileName, res, function (err, results) {
if (err) {
reject(err);
} else {
resolve(JSON.stringify(results, null, 2));
}
});
})
}
function getBlobContent(containerName, blobName) {
return new Promise((resolve, reject) => {
blobService.getBlobToText(
containerName,
blobName,
function (err, blobContent, blob) {
if (err) {
reject(err);
} else {
resolve({
'content': blobContent,
'blob': blob
});
}
});
})
}
As you can see I tried both getBlobToStream and getBlobToText but with the same result. The getBlobProperties works fine and I get all the information about the blob, just not the content.
Can anyone please help me get the content of the blob.
Edit:
This is the output of the properties if anyone is interested:
BlobResult {
container: 'default-powerpoint',
name: 'download.pptx',
metadata: {},
lastModified: 'Wed, 14 Aug 2019 08:28:16 GMT',
creationTime: 'Wed, 14 Aug 2019 08:28:16 GMT',
etag: '"something"',
blobType: 'BlockBlob',
contentLength: '4658',
serverEncrypted: 'true',
requestId: 'someID',
contentSettings: { contentType: 'image/jpeg' },
lease: { status: 'unlocked', state: 'available' },
copy:
{ id: 'id123',
status: 'success',
source: 'sourceURL',
progress: '4658/4658',
bytesCopied: 4658,
totalBytes: 4658,
completionTime: 'Wed, 14 Aug 2019 08:28:16 GMT' } }
Here is the working code i have used in my app
return new Promise(async r => {
bst.getBlobToText(containername, name, (err, text) => r(err ? null : text));
})
Full SourceCode
I was able to get solve this to work using code
const storage = require('azure-storage');
const fs = require('fs');
const STORAGE_ACCOUNT_NAME = '<account_name>';
const ACCOUNT_ACCESS_KEY = '<access_key>';
const blobService = storage.createBlobService(STORAGE_ACCOUNT_NAME, ACCOUNT_ACCESS_KEY);
function getBlobContentAsStream(containerName, fileName, res) {
return new Promise((resolve, reject) => {
blobService.getBlobToStream(containerName, fileName, fs.createWriteStream('task1-download.txt'), function(error, serverBlob) {
if(!error) {
resolve(serverBlob);
} else {
reject(err);
}
});
})
}
module.exports = function (context, req) {
context.log('JavaScript HTTP trigger function processed a request.');
context.log('Starting...');
getBlobContentAsStream('default-powerpoint', 'download.pptx').then((content) => {
context.log('Blob Content: ', content);
context.done();
}, function(err) {
console.log.error(err);
context.done();
});
};
And trace output
The problem that happens in your code you can actually see in Application Insights trace if you connected Functions to it. You didn't get any errors because you didn't add error handling for your then callback executor.
getBlobContent('default-powerpoint', 'download.pptx').then((content) => {
context.log('Blob Content: ', content);
context.done();
})
Use
getBlobContent('default-powerpoint', 'download.pptx').then((content) => {
context.log('Blob Content: ', content);
context.done();
}, function(err) {
console.log(err);
context.done();
}))
You would see
With details
Error: An incorrect number of bytes was read from the connection.
The connection may have been closed.
So the problem you are having with getBlobToText is that it tries to return Buffer object as string and it fails to validate MD5. I read somewhere it's possible to using write to stream function to write to buffer instead of file but I can't find it right now.
I would probably grab some NodeJS memory stream library and try output it there as I would assume you don't want to save directly to file. But maybe you do, decide yourself.
If you would end up using 'fs' library remember to use recommended safe non blocking patterns like this
const fs = require('fs');
const util = require('util');
const readFileAsync = util.promisify(fs.readFile);
module.exports = async function (context) {
try {
const data = await readFileAsync('./hello.txt');
} catch (err) {
context.log.error('ERROR', err);
// This rethrown exception will be handled by the Functions Runtime and will only fail the individual invocation
throw err;
}
context.log(`Data from file: ${data}`);
}
Might be the issue is that the api is changed. I just checked below, callback function takes only two arguments in getBlobToText:
https://github.com/Azure-Samples/storage-blobs-node-quickstart/blob/master/index.js
const downloadBlob = async (containerName, blobName) => {
const dowloadFilePath = path.resolve('./' + blobName.replace('.txt', '.downloaded.txt'));
return new Promise((resolve, reject) => {
blobService.getBlobToText(containerName, blobName, (err, data) => {
if (err) {
reject(err);
} else {
resolve({ message: `Blob downloaded "${data}"`, text: data });
}
});
});
};

Creating a GIF from remote stream in graphicsmagick

I am creating a GIF from remote files in node currently by downloading each image to the file system into a tmp folder.
I want to bypass saving the image to a tmp folder and save in memory instead. Is this possible?
As you can see, I have a download function in my AWS class which saves to a tmp folder:
download(key){
return new Promise((resolve, reject) => {
request.head(`${this.base_url}/${this.bucket}/${key}`, (err, res, body) => {
request(`${this.base_url}/${this.bucket}/${key}`)
.pipe(fs.createWriteStream(`tmp/${key}`)).on('close', resolve )
})
})
};
Once they have all downloaded, I have a createGif function in my GifService class which adds each file path as a custom argument of gm, adds a delay of 50ms, resizes then outputs as buffer which I am then uploading to AWS s3.
import gm from 'gm';
...
constructor(){
this.gm = gm()
}
generateGif(images, prefix){
return new Promise((resolve, reject) => {
// for each image we want in array, we pass to gm
images.forEach(image => {
this.gm.in(`tmp/${image.Key}`)
})
// Now we create the gif with 50sec delay between images, sized to 600px x 2
this.gm
.delay(50)
.resize(600,600)
.toBuffer('gif', async (err, buffer) => {
if (err) reject(err)
const params = {
ACL: 'public-read',
Bucket: config.aws_bucket,
ContentType: 'image/gif',
Key: `${prefix}/${uuid()}.gif`,
Body: buffer
}
try{
// uplaod to S3
const upload = await this.aws.upload(params)
// resolve s3 URL
resolve(upload)
}catch(err) {
console.log('err', err)
reject(err)
}
});
})
}
Ideally if I could pass a remote file stream as custom argument, or pass a buffer in as a custom argument as opposed to how I am currently passing in the tmp file path:
images.forEach(image => {
this.gm.in(`tmp/${image.Key}`)
})
I managed to make it work using only streams by converting first the images to miff and concat them into a single stream. Then passing the buffer or the stream into gm again with delay does the trick.
You will need to instal concat-stream npm for this to work.
Sorry for the mixed ES5 code.
import gm from 'gm';
var concat = require('concat-stream');
...
constructor() {
this.gm = gm()
}
start() {
return getYourReadAbleStreamsSomehow().then(streams => {
return generateGif(streams);
}).then(gifBuffer => {
return uploadToAWS(gifBuffer, prefix);
}).catch(err => {
console.log(err)
})
}
uploadToAWS(buffer, prefix) {
const params = {
ACL: 'public-read',
Bucket: config.aws_bucket,
ContentType: 'image/gif',
Key: `${prefix}/${uuid()}.gif`,
Body: buffer
}
try {
// uplaod to S3
const upload = await this.aws.upload(params)
// resolve s3 URL
resolve(upload)
} catch (err) {
console.log('err', err)
reject(err)
}
}
generateGif(imageStreams, delay) {
return new Promise((resolve, reject) => {
var write = concat(function(buffer) {
gm(buffer)
.delay(delay)
.toBuffer('gif', function(err, buffer) {
if (err)
reject(err);
resolve(buffer);
})
})
//Convert to miff and concat streams
var i = 0;
var streamHandler = function() {
gm(imageStreams[i])
.resize('600', '600')
.stream('miff', function(err, stdout, stderr) {
if (err)
reject(err)
var lastOne = i === streams.length - 1;
if (!lastOne)
stdout.once('end', streamHandler)
stdout.pipe(write, {
end: lastOne
});
i++;
});
}
streamHandler();
})
}

Resources