Store json object to Azure blob storage - node.js

is there any way to store a json object without converting it to stream? I can upload after converting it to stream. But is there any to to store object as {something}.json with out converting it to stream?
what I do now
const azureStorage = require('azure-storage');
const blobService = azureStorage.createBlobService(accountName, accountKey);
var Readable = require('stream').Readable
var msg = {
a: "something",
b: "anotherthing"
}
var stream = new Readable
stream.push(JSON.stringify(msg))
stream.push(null)
var option = {
contentSettings: {contentType: 'application/json'}
}
stream.pipe(blobService.createWriteStreamToBlockBlob('container', 'something.json', option, function onResponse(error, result) {
console.log("done")
}));
Is there a better way?

Sure, you can just send text using createblockblobfromtext like this:
blobService.createBlockBlobFromText(
'container',
'something.json',
JSON.stringify(msg)
option,
function onResponse(error, result) {
console.log("done")
});

Related

How can i save a file i download using fetch with fs

I try downloading files with the fetch() function from github.
Then i try to save the fetched file Stream as a file with the fs-module.
When doing it, i get this error:
TypeError [ERR_INVALID_ARG_TYPE]: The "transform.writable" property must be an instance of WritableStream. Received an instance of WriteStream
My problem is, that i don't know the difference between WriteStream and WritableStream or how to convert them.
This is the code i run:
async function downloadFile(link, filename = "download") {
var response = await fetch(link);
var body = await response.body;
var filepath = "./" + filename;
var download_write_stream = fs.createWriteStream(filepath);
console.log(download_write_stream.writable);
await body.pipeTo(download_write_stream);
}
Node.js: v18.7.0
You can use Readable.fromWeb to convert body, which is a ReadableStream from the web streams API, into a NodeJS Readable stream that can be used with the fs methods.
Note that readable.pipe returns another stream instantly. To wait for it to finish, you can use the promise version of stream.finished to convert it into a Promise, or else you could add listeners for the 'finish' and 'error' events to detect success or failure.
const fs = require('fs');
const { Readable } = require('stream');
const { finished } = require('stream/promises');
async function downloadFile(link, filepath = './download') {
const response = await fetch(link);
const body = Readable.fromWeb(response.body);
const download_write_stream = fs.createWriteStream(filepath);
await finished(body.pipe(download_write_stream));
}
Good question. Web streams are something new, and they are different way of handling streams. WritableStream tells us that we can create WritableStreams as follows:
import {
WritableStream
} from 'node:stream/web';
const stream = new WritableStream({
write(chunk) {
console.log(chunk);
}
});
Then, you could create a custom stream that writes each chunk to disk. An easy way could be:
const download_write_stream = fs.createWriteStream('./the_path');
const stream = new WritableStream({
write(chunk) {
download_write_stream.write(chunk);
},
});
async function downloadFile(link, filename = 'download') {
const response = await fetch(link);
const body = await response.body;
await body.pipeTo(stream);
}

Pause Node csv-parser when uploading data

I am working with Node csv-parser and my read stream is from std in. I am piping that to the parser. Inside the parser readable function, I am making an async HTTP call to upload this data. During this time, I want the parser to pause reading. Until the async call has finished excecuting.
var parse = require('csv-parse');
var output = [];
// Create the parser
var parser = parse({delimiter: ',', columns: true, trim: true});
parser.on('readable', function () {
while (record = parser.read()) {
console.log('Still reading');
var jsonRec = convertIpToInt(record);
var jsonData = JSON.stringify(jsonRec);
output.push(new Buffer(jsonData + "\n"));
//parser.pause();
var assertPromise = uploadBatches1(indexName, function() {
//parser.resume();
console.log('Returned');
});
}
});
function uploadBatches1(indexToAssert, cb) {
//uploads data got form parser
}
Since csv-parse exposes a readable stream, you can pause the stream with parser.pause()
Likewise, when your update is complete, you can call parser.resume(). You can also check if you need to resume beforehand using parser.isPaused().
You can read more about readable streams and what the pause method does here:
https://nodejs.org/api/stream.html#stream_readable_pause
Based on the above, you should be implementing the parser.on('data') readable stream event, instead of the while (record = parser.read()) loop. Pausing the stream does nothing if you are actively calling the read method yourself.
Consider restructuring your code like so, and then use parser.pipe from a filesystem read:
var parse = require('csv-parse');
var output = [];
// Create the parser
var parser = parse({delimiter: ',', columns: true, trim: true});
parser.on('data', function () {
console.log('Still reading');
var jsonRec = convertIpToInt(record);
var jsonData = JSON.stringify(jsonRec);
output.push(new Buffer(jsonData + "\n"));
parser.pause();
var assertPromise = uploadBatches1(indexName, function() {
parser.resume();
console.log('Returned');
});
});
function uploadBatches1(indexToAssert, cb) {
//uploads data got form parser
}

Storing string data with compressed (buffer) data together on Redis

Is there any chance to store Buffer data with a string data together on Redis.
Pseudo code:
// Data compression
var user = { name: "Xyz" }
var userString = JSON.stringify(user)
var userBuffer = new Buffer(userString, "utf8")
var compressed = zlib.gzip(userBuffer)
// Adding data onto Redis
var data = { id: 1, compressed: compressed }
var dataString = JSON.stringify(data)
redis.set("test", dataString)
Although it seems impossible I wanna ask.
Is there any chance to store compressed data with string together?
Edit:
After storing compressed data with uncompressed data in the same key, I cannot decompress the user data above.
Pseudo code:
var dataString = redis.get("test")
var data = JSON.parse(dataString)
console.log(data)
// writes { id:1, compressed: { type: Buffer, data: [31, 139, 8...] } }
var compressed = data.compressed
var user = zlib.gunzip(compressed)
// user would be undefined here
const zlib = require('zlib');
const redis = require('redis').createClient();
var user = { name: "Xyz" }
var userString = JSON.stringify(user)
var userBuffer = new Buffer(userString, "utf8")
var compressed = zlib.gzipSync(userBuffer)
var data = { id: 1, compressed: compressed }
var dataString = JSON.stringify(data)
console.log('Compressed', dataString);
redis.set('mykey', dataString, err => {
if (err) throw err;
redis.get('mykey', (err, dataString) => {
if (err) throw err;
var data = JSON.parse(dataString)
var buffer = Buffer.from(data.compressed);
var uncompressed = zlib.gunzipSync(buffer);
console.log('Uncompressed', JSON.parse(uncompressed));
redis.end(false);
})
});
The key is to convert the string-representation of the Buffer back to a proper instance before decompressing it (using Buffer.from()).
However, the JSON-representation of a Buffer isn't very concise, so you probably should look into an alternative way of storing the data in Redis (one where you don't have to stringify the buffer). Perhaps using a hash.

Azure Functions - NodeJS - Response Body as a Stream

I'd like to return a file from Blob Storage when you hit a given Azure Function end-point. This file is binary data.
Per the Azure Storage Blob docs, the most relevant call appears to be the following since its the only one that doesn't require writing the file to an interim file:
getBlobToStream
However this call gets the Blob and writes it to a stream.
Is there a way with Azure Functions to use a Stream as the value of res.body so that I can get the Blob Contents from storage and immediately write it to the response?
To add some code, trying to get something like this to work:
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'DeContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream
};
context.done();
}
});
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!."}
};
context.done();
}
Unfortunately we don't have streaming support implemented in NodeJS just yet - it's on the backlog: https://github.com/Azure/azure-webjobs-sdk-script/issues/1361
If you're not tied to NodeJ open to using a C# function instead, you can use the storage sdk object directly in your input bindings and stream request output, instead of using the intermediate object approach.
While #Matt Manson's answer is definitely correct based on the way I asked my question, the following code snippet might be more useful for someone who stumbles across this question.
While I can't send the Stream to the response body directly, I can use a custom stream which captures the data into a Uint8Array, and then sends that to the response body.
NOTE: If the file is REALLY big, this will use a lot of memory.
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'deContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
//Override the write to store the value to our "contents"
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream.contents
};
context.done();
}
});//*/
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!"}
};
context.done();
}
I tried #Doug's solution from the last comment above, with a few minor mods in my azure function, and so far, after trying 20 different ideas, this is the only one that actually delivered the file to the browser! Thank you, #Doug...
const fs = require("fs");
const stream = require("stream");
...
const AzureBlob = require('#[my_private_artifact]/azure-blob-storage');
const azureStorage = new AzureBlob(params.connectionString);
//Override the write to store the value to our "contents" <-- Doug's solution
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
let azureSpeedResult = await azureStorage.downloadBlobToStream(params.containerName, params.objectId, outputStream);
let headers = {
"Content-Length": azureSpeedResult.size,
"Content-Type": mimeType
};
if (params.action == "download") {
headers["Content-Disposition"] = "attachment; filename=" + params.fileName;
}
context.res = {
status: 200,
headers: headers,
isRaw: true,
body: outputStream.contents
};
context.done();
...

Node js, piping pdfkit to a memory stream

I am using pdfkit on my node server, typically creating pdf files, and then uploading them to s3.
The problem is that pdfkit examples pipe the pdf doc into a node write stream, which writes the file to the disk, I followed the example and worked correctly, however my requirement now is to pipe the pdf doc to a memory stream rather than save it on the disk (I am uploading to s3 anyway).
I've followed some node memory streams procedures but none of them seem to work with pdf pipe with me, I could just write strings to memory streams.
So my question is: How to pipe the pdf kit output to a memory stream (or something alike) and then read it as an object to upload to s3?
var fsStream = fs.createWriteStream(outputPath + fileName);
doc.pipe(fsStream);
An updated answer for 2020. There is no need to introduce a new memory stream because "PDFDocument instances are readable Node streams".
You can use the get-stream package to make it easy to wait for the document to finish before passing the result back to your caller.
https://www.npmjs.com/package/get-stream
const PDFDocument = require('pdfkit')
const getStream = require('get-stream')
const pdf = () => {
const doc = new PDFDocument()
doc.text('Hello, World!')
doc.end()
return await getStream.buffer(doc)
}
// Caller could do this:
const pdfBuffer = await pdf()
const pdfBase64string = pdfBuffer.toString('base64')
You don't have to return a buffer if your needs are different. The get-stream readme offers other examples.
There's no need to use an intermediate memory stream1 – just pipe the pdfkit output stream directly into a HTTP upload stream.
In my experience, the AWS SDK is garbage when it comes to working with streams, so I usually use request.
var upload = request({
method: 'PUT',
url: 'https://bucket.s3.amazonaws.com/doc.pdf',
aws: { bucket: 'bucket', key: ..., secret: ... }
});
doc.pipe(upload);
1 - in fact, it is usually undesirable to use a memory stream because that means buffering the entire thing in RAM, which is exactly what streams are supposed to avoid!
You could try something like this, and upload it to S3 inside the end event.
var doc = new pdfkit();
var MemoryStream = require('memorystream');
var memStream = new MemoryStream(null, {
readable : false
});
doc.pipe(memStream);
doc.on('end', function () {
var buffer = Buffer.concat(memStream.queue);
awsservice.putS3Object(buffer, fileName, fileType, folder).then(function () { }, reject);
})
A tweak of #bolav's answer worked for me trying to work with pdfmake and not pdfkit. First you need to have memorystream added to your project using npm or yarn.
const MemoryStream = require('memorystream');
const PdfPrinter = require('pdfmake');
const pdfPrinter = new PdfPrinter();
const docDef = {};
const pdfDoc = pdfPrinter.createPdfKitDocument(docDef);
const memStream = new MemoryStream(null, {readable: false});
const pdfDocStream = pdfDoc.pipe(memStream);
pdfDoc.end();
pdfDocStream.on('finish', () => {
console.log(Buffer.concat(memStream.queue);
});
My code to return a base64 for pdfkit:
import * as PDFDocument from 'pdfkit'
import getStream from 'get-stream'
const pdf = {
createPdf: async (text: string) => {
const doc = new PDFDocument()
doc.fontSize(10).text(text, 50, 50)
doc.end()
const data = await getStream.buffer(doc)
let b64 = Buffer.from(data).toString('base64')
return b64
}
}
export default pdf
Thanks to Troy's answer, mine worked with get-stream as well. The difference was I did not convert it to base64string, but rather uploaded it to AWS S3 as a buffer.
Here is my code:
import PDFDocument from 'pdfkit'
import getStream from 'get-stream';
import s3Client from 'your s3 config file';
const pdfGenerator = () => {
const doc = new PDFDocument();
doc.text('Hello, World!');
doc.end();
return doc;
}
const uploadFile = async () => {
const pdf = pdfGenerator();
const pdfBuffer = await getStream.buffer(pdf)
await s3Client.send(
new PutObjectCommand({
Bucket: 'bucket-name',
Key: 'filename.pdf',
Body: pdfBuffer,
ContentType: 'application/pdf',
})
);
}
uploadFile()

Resources