How to convert pdf from buffer using node-imagemagick - node.js

I want to convert pdf file using imagemagick from a buffer that I get from URL contain pdf (not pdf file, so I don't have to save the pdf first and run imagemagick). I use node-imagemagick and in docs is just showing the convert() method with a file path like below: (kittens.jpg)
im.convert(['kittens.jpg', '-resize', '25x120', 'kittens-small.jpg'],
function(err, stdout){
if (err) throw err;
console.log('stdout:', stdout);
});
But If I want to use buffer, how can I do this? Here is my code
request.get({ url: 'url to pdf', encoding: null }, (err, resp, body) => {
im.convert([body, '-resize', '25x120', 'kittens-small.jpg'],
function(err, stdout){
if (err) throw err;
console.log('stdout:', stdout);
});
})

You may check the source code of node-imagemagik: Link to code.
node-imagemagik is a wrapper for the convert tool.
The convert function is defined as follows
exports.convert = function(args, timeout, callback) {
var procopt = {encoding: 'binary'};
if (typeof timeout === 'function') {
callback = timeout;
timeout = 0;
} else if (typeof timeout !== 'number') {
timeout = 0;
}
if (timeout && (timeout = parseInt(timeout)) > 0 && !isNaN(timeout))
procopt.timeout = timeout;
return exec2(exports.convert.path, args, procopt, callback);
}
exports.convert.path = 'convert';
It assumes you give the same arguments as in the commandline, i.e. a source image path. However, convert supports input from stdin, and this is how you can feed your pdf data to the process.
In the source code, there is an usefull example. The definition of a resize function, wich accepts binary data and feeds it into the convert function with the appropiate parameters.
var resizeCall = function(t, callback) {
var proc = exports.convert(t.args, t.opt.timeout, callback);
if (t.opt.srcPath.match(/-$/)) {
if ('string' === typeof t.opt.srcData) {
proc.stdin.setEncoding('binary');
proc.stdin.write(t.opt.srcData, 'binary');
proc.stdin.end();
} else {
proc.stdin.end(t.opt.srcData);
}
}
return proc;
}
exports.resize = function(options, callback) {
var t = exports.resizeArgs(options);
return resizeCall(t, callback)
}
The call to convert is made replacing the input filename with "-". The equivalen usage from the terminal would be something like:
my_process_that_outputs_pdf | convert - <convertion options here...> my_output.png

Related

Node.js function not running in order. Error: Unhandled stream error in pipe

I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)

My function is returning list of empty object a

I have created a function to populate a list of objects but they are not being written.
if (stats.isDirectory()) {
objlist=[];
//do something here.. the path was correct
fs.readdir(path, (err, files) => {
objlist=[];
for(file in files){
//console.log(files[file])
objlist[file]={};
fs.stat(path + '\\' + files[file], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
objlist[file]['file'] = 'folder'
}
if(stats.isFile()){
objlist[file]['file'] = 'file'
}
objlist[file]['name'] = files[file]
objlist[file]['size'] = stats.size
//console.log(objlist)
//console.log(stats)
});
}
});
console.log(objlist);
return objlist;
}
However the function returns an empty objlist; Can you please suggest what I am doing wrong
this code will be helphul. use let file in files
objlist=[];
//do something here.. the path was correct
fs.readdir(path, (err, files) => {
objlist=[];
for(let file in files){
objlist[file]={};
fs.stat(path + '\\' + files[file], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
objlist[file]['file'] = 'folder'
}
if(stats.isFile()){
objlist[file]['file'] = 'file'
}
objlist[file]['name'] = files[file]
objlist[file]['size'] = stats.size
if(file == files.length-1) {
console.log(objlist);
}
});
}
});
You must add the objects to the array with the method push, like this:
objlist=[];
for(file in files){
obj = {};
obj.file = file;
//... do all stuff with obj
objlist.push(obj);
}
Also your function is asynchronous inside the fs.readdir, so if you have to build and populate in the function or do an await to receive the result.
Otherwise you will call the function, it will be executed asynchronously, you will continue at the main function without the wait of the response of the fs.readdir and the array will continue empty because you async function may or may not have been executed.
There are two mistakes in your code:-
1) You are using an array as an object
2) Since for loop is synchronous loop and you are trying asynchronous task inside it and hence either use synchronous version of fs or change your for loop as follows:-
for(file in files) {
obj = {};
obj[file]={};
(function(f){
fs.stat(path + '\\' + files[f], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
obj[file]['file'] = 'folder'
}
if(stats.isFile()){
obj[file]['file'] = 'file'
}
obj[file]['name'] = files[file]
obj[file]['size'] = stats.size
objlist.push(obj)
//console.log(objlist)
//console.log(stats)
});
}
}(file))
}

createWriteStream 'close' event not being triggered

I am trying to extract images from a csv file by doing the following:
Parsing/streaming in a large csv file using csv-parse and the fs createReadStream method
Grabbing each line for processing using stream-transform
Extraction of image and other row data for processing using the async waterfall method.
Download and write image to server using request and the fs createWriteStream method
For some reason after the data gets piped into createWriteStream, there is some event in which an async callback never gets called. I have run this same code only using request, without piping to createWriteStream, and it works. I've also run createWriteStream w/ a drain event, and then some how it works? Can anyone explain this to me?
In the code below, request is trying to pipe 14,970 images, but the createWriteStream close or finish events only fire 14,895 times, with error firing 0 times. Could this be a draining issue? Could highWaterMark be exceeded and a write fail could be occurring undetected?
Here is my csv line getting code:
var first = true;
var parser = parse();
var transformer = transform( (line, complete) => {
if(!first)
extractData(line,complete)
else {
first = false;
complete(null);
}
},
() => {
console.log('Done: parseFile');
});
fs.createReadStream(this.upload.location).pipe(parser).pipe(transformer);
extractData function that doesn't always do a required async callback:
extractData(line,complete){
var now = new Date();
var image = {
createdAt: now,
updatedAt: now
};
async.waterfall([
next => { // Data Extraction
async.forEachOf(line, (data, i, complete) => {
if(i === 2) image.src = data;
if(i === 3) image.importSrc = data;
complete(null);
}, err => {
if(err) throw err;
next(null);
});
},
next => { // Download Image
var file = fs.createWriteStream('public/'+image.src);
var sendReq = request.get(image.importSrc);
sendReq.on('response', response => {
if (response.statusCode !== 200) {
this.upload.report.image.errors++;
return next(null);
}
});
sendReq.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
sendReq.pipe(file);
file.on('finish', () => {
this.upload.report.image.inserts++;
file.close(next); // Close file and callback
});
file.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
}
], err => {
if(err) throw err;
complete(null);
});
}
As suggested by #mscdex, I've also tried switching out finish for his replacement close approach.
file.close(next); is unnecessary as the file stream is closed automatically by default. What you can do instead is to listen for the close event to know when the file descriptor for the stream has been closed. So replace the entire finish event handler with:
file.on('close', () => {
this.upload.report.image.inserts++;
next(null);
});

Read a file one character at a time in node.js?

Is there a way to read one symbol at a time in nodejs from file without storing the whole file in memory?
I found an answer for lines
I tried something like this but it doesn't help:
const stream = fs.createReadStream("walmart.dump", {
encoding: 'utf8',
fd: null,
bufferSize: 1,
});
stream.on('data', function(sym){
console.log(sym);
});
Readable stream has a read() method, where you can pass the length, in bytes, of every chunk to be read. For example:
var readable = fs.createReadStream("walmart.dump", {
encoding: 'utf8',
fd: null,
});
readable.on('readable', function() {
var chunk;
while (null !== (chunk = readable.read(1) /* here */)) {
console.log(chunk); // chunk is one byte
}
});
Here's a lower-level way to do it: fs.read(fd, buffer, offset, length, position, callback)
using:
const fs = require('fs');
// open file for reading, returns file descriptor
const fd = fs.openSync('your-file.txt','r');
function readOneCharFromFile(position, cb){
// only need to store one byte (one character)
const b = new Buffer(1);
fs.read(fd, b, 0, 1, position, function(err,bytesRead, buffer){
console.log('data => ', String(buffer));
cb(err,buffer);
});
}
you will have to increment the position, as you read the file, but it will work.
here's a quick example of how to read a whole file, character by character
Just for fun I wrote this complete script to do it, just pass in a different file path, and it should work
const async = require('async');
const fs = require('fs');
const path = require('path');
function read(fd, position, cb) {
let isByteRead = null;
let ret = new Buffer(0);
async.whilst(
function () {
return isByteRead !== false;
},
function (cb) {
readOneCharFromFile(fd, position++, function (err, bytesRead, buffer) {
if(err){
return cb(err);
}
isByteRead = !!bytesRead;
if(isByteRead){
ret = Buffer.concat([ret,buffer]);
}
cb(null);
});
},
function (err) {
cb(err, ret);
}
);
}
function readOneCharFromFile(fd, position, cb) {
// only need to store one byte (one character)
const b = new Buffer(1);
fs.read(fd, b, 0, 1, position, cb);
}
/// use your own file here
const file = path.resolve(__dirname + '/fixtures/abc.txt');
const fd = fs.openSync(file, 'r');
// start reading at position 0, position will be incremented
read(fd, 0, function (err, data) {
if (err) {
console.error(err.stack || err);
}
else {
console.log('data => ', String(data));
}
fs.closeSync(fd);
});
As you can see we increment the position integer every time we read the file. Hopefully the OS keeps the file in memory as we go. Using async.whilst() is OK, but I think for a more functional style it's better not to keep the state in the top of the function (ret and isByteRead). I will leave it as an exercise to the reader to implement this without using those stateful variables.

node.js asynchronous issue?

Basically I want to loop through existing arrtickers to get each symbol
Then read the content inside each symbol url and save the content into local dir.
In php, it will print out each ticker and each symbol in step by step.
But in node, the sequences are mess up.
it will print out all the url_optionable first...
then sometimes print console.log('path: ' + file), sometimes print console.log("The file was saved!");
Everytime run through fs.writefile function, sym value is not detected, the saved file is show as msn-.html
for(var v=0;v<arrtickers.length;v++)
{
var arrticker= arrtickers[v].split('##');
var sym= $.trim(arrticker[1]);
url_optionable= "http://sample.com/ns/?symbol="+sym;
console.log('url_optionable: ' + url_optionable);
request({ uri:url_optionable }, function (error, response, body) {
if (error && response.statusCode !== 200) {
console.log('Error contacting ' + url_optionable)
}
jsdom.env({
html: body,
scripts: [
jqlib
]
}, function (err, window) {
var $ = window.jQuery;
var data= $('body').html();
var file= "msn-"+sym+".html";
console.log('path: ' + file);
fs.writeFile(file, data, function(err) {
if(err) {
console.log(err);
}
else
{
console.log("The file was saved!");
}
});
});
});
}
ZeissS is right. Basically, you can't use variables declared inside the for loop in a callback function to an asynchronous call, because they will all be set to the last value in the loop. So in your code, url_optionable and sym will correspond to arrtickers[arrtickers.length - 1].
Either use (as ZeissS suggests):
arrtickers.forEach(function(arrticker) {
// Get symbol and url, make request etc
});
Or declare a function which takes sym and does the request, and call that in your loop:
function getSymbol(symbol) {
// Request url and read DOM
}
for(var v=0;v<arrtickers.length;v++) {
var arrticker = arrtickers[v].split('##');
var sym = $.trim(arrticker[1]);
getSymbol(sym);
}
Personally, I would opt for the forEach solution.

Resources