How use async functions with node-pdfkit? - node.js

I am trying to create a PDF file with PDFKit. I insert an image with like this:
var PDFDocument = require('pdfkit');
var doc = new PDFDocument();
doc.image(some_image_as_buffer);
and it is working like expected. But now want the image be trimmed and I found GraphicsMagick for node.js. But the problem that I have is to make it work with PDFKit. doc.image expects a filename or a buffer, but since I already have a buffer I want to work with buffers (there is no file anywhere because the buffer comes directly from the database).
The trimming works like this:
var gm = require('gm');
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
// trimmed_image_buffer is correct,
// but I can't put it to the document like this:
doc.image(trimmed_image_buffer);
// beacause I don't know which page and/or position
// the doc is currently on, because of the asynchronous
// nature of this callback.
});
UPDATE:
For clarification: I want to be able to use the asynchronous trimmed image in the synchronous code for PDFKit. PDFKit only works synchronously and gm doesn't offer a synchronous interface.
UPDATE2:
var gm = require('gm');
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
// trimmed_image_buffer is correct,
// but I can't put it to the document like this:
doc.image(trimmed_image_buffer);
// beacause I don't know which page and/or position
// the doc is currently on, because of the asynchronous
// nature of this callback.
});
doc.text('some text');
// is not guaranteed to run after image is inserted
// and a couple of hundred lines more
After the last line in this example there are a lot more lines of code which add content to the PDF, but I don't want to put everything (couple of hundred lines) in one callback just because I need on asynchronous function to manipulate the image.
Is there any way to make this manipulation synchronous?

UPDATE_2
You basically ask for stopping execution of a code until some asynchronous operation has completed. For sure it is not possible in general case.
In case of gm module, it is not possible either. The gm module spawns a new process for executing a command (in your case trim()) and the API for spawning new processes is asynchronous in its very nature.
UPDATE
To make use of promise in your scenario:
var gm = require('gm'),
Q = require('Q'),
PDFDocument = require('pdfkit'),
doc = new PDFDocument();
function getTrimmedImage(some_image_as_buffer){
var deferred = Q.defer();
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
if(err) { deferred.reject(err); }
else { deferred.resolve(trimmed_image_buffer); }
});
return deferred.promise;
}
// here goes all manipulations before the trimmed image is inserted
getTrimmedImage(some_image_as_buffer).then(
function(trimmed_image_buffer){
doc.image(trimmed_image_buffer);
// here goes all manipulations after the trimmed image is inserted
}
);
As I wrote in the comment above, a promise based solution should work elegantly. I use Q library, but any other promise library will do the job, as well.
One option would be to collect all resources of asynchronous nature before starting manipulating the pdf. Then you are guaranteed that no race condition occur, though it may slow down the whole process. I used a toy example to have it working in the browser environment, let me know if you have any problems converting it to your use case:
function getAsyncResource(){
var defer = Q.defer();
setTimeout(function(){
var result = "Some value: " + Date.now();
console.log("Async resource resolved: " + result);
defer.resolve(result);
}, Math.random() * 5000);
return defer.promise;
}
function someOperationThatNeedsAsyncResources(A, B, C){
console.log("Have all resources: ", A, B, C);
}
var A = getAsyncResource(),
B = getAsyncResource(),
C = getAsyncResource();
Q.all([A,B,C]).spread(someOperationThatNeedsAsyncResources);
<script src="//cdnjs.cloudflare.com/ajax/libs/q.js/1.1.2/q.js"></script>
Other option would be to split the process into steps, like so:
function getAsyncResource(value){
var defer = Q.defer();
setTimeout(function(){
var result = "Some value: " + value;
console.log("Async resource resolved: " + result);
defer.resolve(result);
}, Math.random() * 5000);
return defer.promise;
}
function nextStep(resource){
console.log("Next step: " + resource);
}
var A = getAsyncResource("A"),
B = getAsyncResource("B"),
C = getAsyncResource("C");
A.then(nextStep)
.then(function(){return B;})
.then(nextStep)
.then(function(){return C;})
.then(nextStep);
<script src="//cdnjs.cloudflare.com/ajax/libs/q.js/1.1.2/q.js"></script>

Related

Having difficulties with node.js res.send() loop

I'm attempting to write a very basic scraper that loops through a few pages and outputs all the data from each url to a single json file. The url structure goes as follows:
http://url/1
http://url/2
http://url/n
Each of the urls has a table, which contains information pertaining to the ID of the url. This is the data I am attempting to retrieve and store inside a json file.
I am still extremely new to this and having a difficult time moving forward. So far, my code looks as follows:
app.get('/scrape', function(req, res){
var json;
for (var i = 1163; i < 1166; i++){
url = 'https://urlgoeshere.com' + i;
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var mN, mL, iD;
var json = { mN : "", mL : "", iD: ""};
$('html body div#wrap h2').filter(function(){
var data = $(this);
mN = data.text();
json.mN = mN;
})
$('table.vertical-table:nth-child(7)').filter(function(){
var data = $(this);
mL = data.text();
json.mL = mL;
})
$('table.vertical-table:nth-child(8)').filter(function(){
var data = $(this);
iD = data.text();
json.iD = iD;
})
}
fs.writeFile('output' + i + '.json', JSON.stringify(json, null, 4), function(err){
console.log('File successfully written! - Check your project directory for the output' + i + '.json file');
})
});
}
res.send(json);
})
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
When I run the code as displayed above, the output within the output.json file only contains data for the last url. I presume that's because I attempt to save all the data within the same variable?
If I include res.send() inside the loop, so the data writes after each page, I receive the error that multiple headers cannot be sent.
Can someone provide some pointers as to what I'm doing wrong? Thanks in advance.
Ideal output I would like to see:
Page ID: 1
Page Name: First Page
Color: Blue
Page ID: 2
Page Name: Second Page
Color: Red
Page ID: n
Page Name: Nth Page
Color: Green
I can see a number of problems:
Your loop doesn't wait for the asynchronous operations in the loop, thus you do some things like res.send() before the asynchronous operations in the loop have completed.
In appropriate use of cheerio's .filter().
Your json variable is constantly being overwritten so it only has the last data in it.
Your loop variable i would lose its value by the time you tried to use it in the fs.writeFile() statement.
Here's one way to deal with those issues:
const rp = require('request-promise');
const fsp = require('fs').promises;
app.get('/scrape', async function(req, res) {
let data = [];
for (let i = 1163; i < 1166; i++) {
const url = 'https://urlgoeshere.com/' + i;
try {
const html = await rp(url)
const $ = cheerio.load(html);
const mN = $('html body div#wrap h2').first().text();
const mL = $('table.vertical-table:nth-child(7)').first().text();
const iD = $('table.vertical-table:nth-child(8)').first().text();
// create object for this iteration of the loop
const obj = {iD, mN, mL};
// add this object to our overall array of all the data
data.push(obj);
// write a file specifically for this invocation of the loop
await fsp.writeFile('output' + i + '.json', JSON.stringify(obj, null, 4));
console.log('File successfully written! - Check your project directory for the output' + i + '.json file');
} catch(e) {
// stop further processing on an error
console.log("Error scraping ", url, e);
res.sendStatus(500);
return;
}
}
// send all the data we accumulated (in an array) as the final result
res.send(data);
});
Things different in this code:
Switch over all variable declarations to let or const
Declare route handler as async so we can use await inside.
Use the request-promise module instead of request. It has the same features, but returns a promise instead of using a plain callback.
Use the promise-based fs module (in latest versions of node.js).
Use await in order to serialize our two asynchronous (now promise-returning) operations so the for loop will pause for them and we can have proper sequencing.
Catch errors and stop further processing and return an error status.
Accumulate an object of data for each iteration of the for loop into an array.
Change .filter() to .first().
Make the response to the request handler be a JSON array of data.
FYI, you can tweak the organization of the data in obj however you want, but the point here is that you end up with an array of objects, one for each iteration of the for loop.
EDIT Jan, 2020 - request() module in maintenance mode
FYI, the request module and its derivatives like request-promise are now in maintenance mode and will not be actively developed to add new features. You can read more about the reasoning here. There is a list of alternatives in this table with some discussion of each one. I have been using got() myself and it's built from the beginning to use promises and is simple to use.

asynchronous version of JSON.stringify and JSON.parse

var assert = require('assert');
var parseJSON = require('json-parse-async');
var contact = new Object();
contact.firstname = "Jesper";
contact.surname = "Aaberg";
contact.phone = ["555-0100", "555-0120"];
var contact2 = new Object();
contact2.firstname = "JESPER";
contact2.surname = "AABERG";
contact2.phone = ["555-0100", "555-0120"];
contact.toJSON = function(key) {
var replacement = new Object();
for (var val in this) {
if (typeof(this[val]) === 'string')
replacement[val] = this[val].toUpperCase();
else
replacement[val] = this[val]
}
return replacement;
};
var jsonText = JSON.stringify(contact);
contact = JSON.parse(jsonText);
console.log(contact);
console.log(contact2);
assert.deepEqual(contact, contact2, 'these two objects are the same');
What are the asynchronous equivalent functions of JSON.parse, JSON.stringify and assert.deepEqual? I am trying to create a race condition and non-deterministic behavior within the following code but I have not been able lto find non-blocking, asynchronous equivalents of the functions mentioned above.
node.js does not have an actual asynchronous JSON parser built-in. If you want something that will actually do the parsing outside the main node.js Javascript thread, then you would have to find a third party module that parses the JSON outside of the Javascript thread (e.g. in a native code thread or in some other process). There are some modules in NPM that advertise themselves as asynchronous such as async-json-parser or async-json-parse or json-parse-async. You would have to verify that whichever implementation you were interested in was truly an asynchronous implementation (your Javascript continues to run while the parsing happens in the background).
But, reading the detail in your question about the problem you're trying to solve, it doesn't sound like you actually need a parser that truly happens in the background. To give you your ability to test what you're trying to test, it seems to me like you just need an indeterminate finish that allows other code to run before the parsing finishes. That can be done by wrapping the synchronous JSON.parse() in a setTimeout() with a promise that has a random delay. That will give some random amount of time for other code to run (to try to test for your race conditions). That could be done like this:
JSON.parseAsyncRandom = function(str) {
return new Promise(function(resolve, reject) {
// use a random 0-10 second delay
setTimeout(function() {
try {
resolve(JSON.parse(str));
} catch(e) {
reject(e);
}
}, Math.floor(Math.random() * 10000));
});
}
JSON.parseAsyncRandom(str).then(function(obj) {
// process obj here
}, function(err) {
// handle err here
});
Note: This is not true asynchronous execution. It's an asynchronous result (in that it arrives some random time later and other code will run before the result arrives), but true asynchronous execution happens in the background in parallel with other JS running and this isn't quite that. But, given your comment that you just want variable and asynchronous results for testing purposes, this should do that.
I've recently faced this problem myself, so I decided to create a library to handle JSON parsing in a really asynchronous way.
The idea behind it is to divide the parsing process into chunks, and then run each separately in the event loop so that other events (user interactions, etc) can still be evaluated within a few milliseconds, keeping the UI interactive.
If you are interested, the library it's called RAJI and you can find it here: https://github.com/federico-terzi/raji
After installing RAJI, you can then convert your synchronous JSON.parse calls into async raji.parse calls, such as:
const object = await parse(payload);
These calls won't block the UI
You can use 'bluebird', like this example to convert calling function to promise.
I write code below using javascript es6.
const Promise = require('bluebird')
function stringifyPromise(jsonText) {
return Promise.try(() => JSON.stringify(jsonText))
}
function parsePromise(str) {
return Promise.try(() => JSON.parse(str))
}
stringifyPromise(contact)
.then(jsonText => parsePromise(jsonText))
.then(contact => {
assert.deepEqual(contact, contact2, 'these two objects are the same')
})
})

Nodejs, not waiting for Redis query to complete before continuing with execution

Using Node.js I need to load three files dynamically with a require() function by fetching the file path from Cassandra. From each file I need to fetch data that is in Redis and do some validation before loading another file from Cassandra. The issue here is: before the validation logic executes and provides results the next file's start to get loaded in parallel. The validation result comes after the loading of the second file, which shouldn't happen. The second file loading should wait for the first file validation logic to be complete and must load only if the validation result is a success. Please help me ... How do I pause or wait for Redis to complete the query in node.js???
node.js
"use strict";
var express = require('express');
var cassandra = require('cassandra-driver');
var app = express();
var Promise = require('bluebird');
var redis = Promise.promisifyAll(require('redis'));
var redisClient = redis.createClient(6379, '127.0.0.1');
var client = new cassandra.Client({contactPoints: ['127.0.0.1'], keyspace: 'poc'});
client.execute("SELECT file FROM testqry1", function (err, result) {
if (!err){
if ( result.rows.length > 0 ) {
for(var i=0; i< result.rows.length; i++){
var filePath=result.rows[i].get('file');
var newdat=Promise.promisifyAll(require(filePath));
var res = newdat(redisClient);
console.log('res:::'+res);
if (res=='failed'){
return;
}
}
} else {
console.log("No results");
}
}
});
file1.js
var crypto = require('crypto');
var redisValue='';
module.exports = function(redisclient){
redisclient.hmgetAsync("testdata", "text1").then(function(redisValue){
console.log('value from redis::'+redisValue)
}).then(function(){
var hashedUserID = crypto.createHmac('sha256', 'sample')
.update('helloworld')
.digest('hex');
function disp(value) {
console.log('value::'+value);
}
disp(hashedUserID);
console.log('redisValue::'+redisValue);
if(hashedUserID =='e043e7e68058c8a4cd686db38f01771bd7a04b8bb9a658d3cb40d0be45935094'){
redata='true';
}else{
redata='false';
}
console.log('redata::'+redata)
})
}
file2.js & file3.js as same content
var result1='';
module.exports = function(redisclient){
redisclient.hmget("testdata", "text1" , function(err, redisValue){
console.log('redisValue2 == %s',redisValue);
if(redisValue == 'test value'){
result1 = "success";
}else{
result1="failed";
}
});
return result1;
}
Output :
res:::undefined
res:::
res:::
value from redis::test data here
value::e043e7e68058c8a4cd686db38f01771bd7a04b8bb9a658d3cb40d0be45935094
redisValue::
redata::true
redisValue2 == test data here
redisValue3 == hello world test data
You say that file2/3 are "same content" but they aren't in one critical area. Per Bluebird's documentation for promisifyAll (see http://bluebirdjs.com/docs/api/promise.promisifyall.html), this feature creates an ...Async version of each core function in the Redis client. You call hmgetAsync in your first case, but you only call hmget in your others.
This is important because you're using an async pattern but with a non-async code structure. In file2/3 you set result1 inside an async callback, but then return it below each call before the call could possibly have returned.
You have two choices:
1: You can convert file2/3/etc to a fully traditional pattern by passing in a callback in addition to the redis client:
module.exports = function(redisclient, callback){
Instead of returning result1, you would then call the callback with this value:
if(redisValue == 'test value'){
callback(null, "success");
} else {
callback("failed", null);
}
2: You could convert file2/3/..N to be Promise-based, in which case you do not need to promisifyAll(require(...)) them - you can simply require() them. Such a pattern might look like:
module.exports = function(redisclient){
return redisclient.hmgetAsync("testdata", "text1");
};
This is a much simpler and cleaner option, and if you keep going with it you can see that you could probably even eliminate the require() and simply do the hmgetAsync in file1 with appropriate data returned by Cassandra. But it's hard to know without seeing your specific application needs. In any event, Promise-based patterns are generally much shorter and cleaner, but not always better - there IS a moderate performance overhead for using them. It's your call which way you go - either will work.

Nodejs run promises sequentially

Dears ,
How can i run promises in nodejs sequentially , in the following example am looping through array of hours then for each fetched hour get result from the database , the issue here : am getting results but i want it sequentially same order that i got hours .
angular.forEach(SharedVar.getCategories(), function (h) {
t = h.split('-', 2);
t = t[0];
RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {
$scope.answerdCallsTotalByHour.push(answerdTotal);
var d = SharedVar.getDataS();
d[count] = answerdTotal;
SharedVar.setDataS(d);
count++;
});
});
Thanks ,
var promise = Promise.resolve(); // make an empty promise in the way you do it with your promise library
angular.forEach(SharedVar.getCategories(), function (h) {
promise.then(function() {
return RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {});
});
});
The way to do it sequently would be to do one Request and do the next request inside the promise.
I think the better approach by far is to extend your SharedVar.setDataS(d) function in a way, that it does not depend on getting the data sequentially. Like having a SharedVar.setDataS(d, index) and using the config var in your $http.get (or whatever) functioncall inside your RESTApi to promote that index all the way to the promise.
If your RESTApi looks like this:
var RESTApi = {
getAnswerdCallsByHour : function(hour) {
var url = "bla.com/myservice?hour=" + hour;
return $http.get(url).data;
}
// Some other Methods...
Then you need a way to pass something to "reorder" your Data when it arrives asynchronously, this could be a index you count up or in your case maybe the hour Variable:
var RESTApi = {
getAnswerdCallsByHour : function(hour) {
var url = "bla.com/myservice?hour=" + hour;
var config = [];
config.hour = hour;
return $http.get(url, config); // Return the promise not just data or a specific field
}
// Some other Methods...
Now when your promise is fullfiled you can access your "hour" Variable like so:
var d = SharedVar.getDataS();
d[promise.config.hour] = promise.data;
SharedVar.setDataS(d);
Now you know what piece of data correlates to which request and you do not need to recieve Data in order. The last piece only works properly when hours runs sequential from 0 to 23, if that isn't the case you need to:
var RESTApi = {
getAnswerdCallsByHour : function(hour, index) {
var url = "bla.com/myservice?hour=" + hour;
var config = [];
config.index = index;
return $http.get(url, config);
}
// Some other Methods...
...
...
var d = SharedVar.getDataS();
d[promise.config.index] = promise.data;
SharedVar.setDataS(d);
Safari's answer is how I typically handle this. (Sorry, I don't have enough rep to comment yet...) You were experiencing problems with it because the example provided does not capture and use the new promise in subsequent loops. See my comments on the slightly modified version here:
var promise = Promise.resolve();
angular.forEach(SharedVar.getCategories(), function (h) {
t = h.split('-', 2);
t = t[0];
// You must capture the new promise here; the next loop will wait
// for the promise returned from getAnswerdCallsByHour to resolve.
promise = promise.then(function() {
// Halt downstream promises until this returned promises finishes
return RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {
$scope.answerdCallsTotalByHour.push(answerdTotal);
var d = SharedVar.getDataS();
d[count] = answerdTotal;
SharedVar.setDataS(d);
count++;
});
});
});

Non-Blocking MongoDB + NodeJS

I am trying to find a document in a MongoDB collection in a NodeJS environment. Is there any way to do the following?
This is not working:
var foo = function (id) {
// find document
var document = database.find(id);
// do whatever with the document
...
}
This way creates a block :
var foo = function (id) {
// find document
var document = database.find(id);
while (!database.find.done) {
//wait
}
// do whatever with the document
...
}
What I want to do :
var foo = function (id) {
// find document
var document = database.find(id);
// pause out of execution flow
// continue after find is finished
// do whatever with the document
...
}
I know I can use a callback but is there a simpler way of just "pausing" and then "continuing" in NodeJS/JavaScript? Sorry, I am still pretty new to web development.
This is not possible. If you are concerned about the readability of callbacks you may consider using a language that compiles to JavaScript. LiveScript for example has so called “Backcalls”, they make the code appear to be pausing, but compile to a callback function:
For example:
result <- mongodb.find id
console.log result
compiles to:
mongodb.find(id, function(result){
return console.log(result);
});

Resources