Loading a texture in THREE.js using Node - node.js

I am trying to build a messenger bot which does some image processing in 3d and returns a brand new image. I use THREE.CanvasRenderer and my app is hosted on Heroku.
When a user /POSTs an attachment to my webhook, I want to take the URL of the newly created image and insert it into my 3d Scene.
This is my code (using the node-canvas library):
const addTexture = (imageUrl) => {
request({
uri: imageUrl,
method: 'GET'
}, (err, res, body) => {
let image = new Canvas.Image();
image.src = body;
mesh.material.map = new THREE.Texture(image);
mesh.material.needsUpdate = true;
});
}
The callback gets run and I can actually console.log() the image's contents, but nothing shows up in the scene - the plane I am supposed to render to just gets black without any errors... What am I missing here?
I also tried several other ways, without any success...
I tried using THREE.TextureLoader and patch it with jsdom (mock document, window) and node-xmlhttprequest, but then there is error with my load event (event.target is not defined...) Just like in this example
How should one approach this problem? I have a url generated by Facebook, I want to download the image from it and place it in my scene?

Okay, I figured it out after half a day and am posting it for future generations:
Here is the code that did the trick for me:
const addTexture = (imageUrl) => {
request.get(imageUrl, (err, res, data) => {
if (!err && res.statusCode == 200) {
data = "data:" + res.headers["content-type"] + ";base64," + new Buffer(data).toString('base64');
let image = new Canvas.Image();
image.onload = () => {
mesh.material.map = new THREE.Texture(image);
mesh.material.map.needsUpdate = true;
}
image.src = data;
}
});
}
This way, I still get an error: document is not defined, because it seems under the hood three.js writes the texture to a separate canvas.
So the quick and ugly way is to do what I did:
document.createElement = (el) => {
if (el === 'canvas') {
return new Canvas()
}
}
I really hope this helps somebody out there, because besides all the hurdles, rendering WebGL on the server is pure awesomeness.

Related

Is it possible to extract how many images and if they have alt text, tables, interactive elements and page count from a PDF using PDF.js?

I am wanting to build a simple tool that can evaluate any given PDF and see how many pages it has if it has images and if so how many (and if they have alt text) and to give me a total count of how many tables, interactive elements, etc. there are. I am getting the PDFs via user upload as such I am having to deal with buffers. Is this task possible using PDF.js or something else? I have taken a look at pdf-parser and it works well for getting page count, but not more details about how many images there are if they have alt text, etc. I also tried PDF2JSON but its parsebuffer method was not working. I haven't been able to find much on the docs or Google about this.
Thank you all!
Update:
If the PDF has some tags I am able to get those tags from the PDFs structure with the following code as they seem to be positioned in the ops.argsArray[i][0], however, I am not successfully locating the alt text. Any advice?
let doc = await pdfJS.getDocument({ data }).promise;
console.log("here is the document");
console.log(doc);
console.log("here is doc info");
console.log(doc._pdfInfo);
console.log("here is meta data");
let metaData = await doc.getMetadata();
console.log(metaData);
pageCount = doc.numPages;
Array.from({ length: doc.numPages }, async (v, i) => {
pages = await doc.getPage(i + 1);
console.log("here is page");
console.log(pages);
const ops = await pages.getOperatorList();
console.log("here is ops");
console.log(ops);
console.log("here is the function array");
console.table(ops.fnArray);
console.log("here is args array");
console.table(ops.argsArray);
for (let i = 0; i < ops.fnArray.length; i++) {
if (Array.isArray(ops.argsArray[i])) {
console.log("this is data from ops");
console.log(ops.argsArray[i]);
console.log("this is info from ops.argsArray[i][0]");
console.log(ops.argsArray[i][0]);
console.log("this is data from ops.argsArray[i][0][0]");
console.log(ops.argsArray[i][0][0]);
if (typeof ops.argsArray[i][0] === "string") {
stringArray.push(ops.argsArray[i][0]);
}
// pdfObjects.push(ops.argsArray[i][0]);
} else if (ops.argsArray[i]) {
// console.log("this is null");
}
}
// console.log(stringArray);
});
return stringArray;
}

Handling Async Functions and Routing in Formidable / Extracting text in PDFReader

I'm creating an application where users upload a pdf and extracts the text into JSON format. I am able to access the text, but I can't hold the response until the PDF extraction is complete. I'm unfamiliar with Formidable and I may be missing something entirely.
I am using Formidable for uploading and PDFReader for text extraction. The front-end and back-end are on separate servers, and the app is only intended for local use, so that shouldn't be an issue. I'm able to console.log the text perfectly. I would like to work with the text in JSON format in some way. I would like to append the text to the response back to the front-end, but I can't seem to hold it until the response is sent.
const IncomingForm = require("formidable").IncomingForm;
const { PdfReader } = require('pdfreader');
const test = new PdfReader(this,1);
module.exports = function upload(req, res) {
let str = ''
let form = new IncomingForm();
form.parse(req, () => {
console.log('parse')
});
form.on("file", (field, file) => {
test.parseFileItems(file.path, (err, item) => {
if (err){
console.log(err)
}
else if (item){
if (item.text){
console.log(item.text)
str += item.text
}
}
})
});
form.on("end", () => {
console.log("reached end/str: ", str)
});
};
I've attempted a number of different ways of handling the async functions, primarily within form.on('file'). The following attempts at form.on('file') produce the same effect (the text is console.logged correctly but only after form.on('end") is hit:
//Making the callback to form.on('file') async then traditional await
form.on("file", async (field, file) => {
//...
await test.parseFileItems(...)
//...
console.log(str) //After end of PDFReader code, shows blank
//Making cb async, then manually creating promise
form.on("file", async (field, file) => {
//...
let textProm = await new Promise ((res, rej) => //...
I've also attempted to convert the text manually from the Buffer using fs.readFile, but this also produces the same effect; I can only access text after form.end is hit.
A few things I see is that form.on('file') is hit first, then form.parse. It seems maybe I'm attempting to parse the document twice (Formidable and Pdfreader), but this is probably necessary.
Also, after reading through the docs/stackoverflow, I think I'm mixing the built-in middleware with form.parse/form.on/form.end with manual callbacks, but I was unsure of how to stick with just one, and I'm still able to access the text.
Finally, PDFReader accesses text one line at a time, so parseFileItems is run for every line. I've attempted to resolve a Promise.all with the PdfReader instance, but I couldn't get it to work.
Any help would be greatly appreciated!

Mustache does not render values, when deployed

Node version: v8.9.0;
Im using moustache to render some html from templates as follows:
static createHtml(title, variables, template) {
let header = fs.readFileSync(headerPath);
let content = fs.readFileSync(contentPath);
let footer = fs.readFileSync(footerPath);
var headerVars = {
title: title
};
console.log('createHtml: ', variables); // <- variables are as expected
try {
header = Mustache.render(header.toString(), headerVars);
content = Mustache.render(content.toString(), variables);
} catch (e) {
console.log('Moustache error:\n', e); // <- no error is thrown
}
const html = header + content + footer;
console.log('content', content); // <- content does not include filled variables when deployed
return html;
}
When I run it on my local machine everything works just perfectly, however when it is deployed the variables are not injected into the template. I had several assumptions and tried to figure out every possible way the environment is different then the local, but nothing worked so far. Its is deployed on EC2 to an AMI Linux instance.
Any idea, how could I figure out what's the difference? According to the console logs it must be something inside Mustache.render().
Seems that the deployed instance on AMI Linux did not really like the syncronous fs.readFileSync operation after one point (what was the point and why is still and open question), though the solution was very simple. Just refactor the code to async as follows:
static createHtml(title, variables, template) {
fs.readFile(headerPath, (err, header) => {
fs.readFile(contentPath, (err, content) => {
fs.readFile(footerPath, (err, footer) => {
var headerVars = { title: title };
header = Mustache.render(header.toString(), headerVars);
content = Mustache.render(content.toString(), variables);
const html = header + content + footer.toString();
return html;
});
});
});
}

Node.js: given array of URLs, determine which are valid

I am a total scrub with the node http module and having some trouble.
The ultimate goal here is to take a huge list of urls, figure out which are valid and then scrape those pages for certain data. So step one is figuring out if a URL is valid and this simple exercise is baffling me.
say we have an array allURLs:
["www.yahoo.com", "www.stackoverflow.com", "www.sdfhksdjfksjdhg.net"]
The goal is to iterate this array, make a get request to each and if a response comes in, add the link to a list of workingURLs (for now just another array), else it goes to a list brokenURLs.
var workingURLs = [];
var brokenURLs = [];
for (var i = 0; i < allURLs.length; i++) {
var url = allURLs[i];
var req = http.get(url, function (res) {
if (res) {
workingURLs.push(?????); // How to derive URL from response?
}
});
req.on('error', function (e) {
brokenURLs.push(e.host);
});
}
what I don't know is how to properly obtain the url from the request/ response object itself, or really how to structure this kind of async code - because again, I am a nodejs scrub :(
For most websites using res.headers.location works, but there are times when the headers do not have this property and that will cause problems for me later on. Also I've tried console logging the response object itself and that was a messy and fruitless endeavor
I have tried pushing the url variable to workingURLs, but by the time any response comes back that would trigger the push, the for loop is already over and url is forever pointing to the final element of the allURLs array.
Thanks to anyone who can help
You need to closure url value to have access to it and protect it from changes on next loop iteration.
For example:
(function(url){
// use url here
})(allUrls[i]);
Most simple solution for this is use forEach instead of for.
allURLs.forEach(function(url){
//....
});
Promisified solution allows you to get a moment when work is done:
var http = require('http');
var allURLs = [
"http://www.yahoo.com/",
"http://www.stackoverflow.com/",
"http://www.sdfhksdjfksjdhg.net/"
];
var workingURLs = [];
var brokenURLs = [];
var promises = allURLs.map(url => validateUrl(url)
.then(res => (res?workingURLs:brokenURLs).push(url)));
Promise.all(promises).then(() => {
console.log(workingURLs, brokenURLs);
});
// ----
function validateUrl(url) {
return new Promise((ok, fail) => {
http.get(url, res => return ok(res.statusCode == 200))
.on('error', e => ok(false));
});
}
// Prevent nodejs from exit, don't need if any server listen.
var t = setTimeout(() => { console.log('Time is over'); }, 1000).ref();
You can use something like this (Not tested):
const arr = ["", "/a", "", ""];
Promise.all(arr.map(fetch)
.then(responses=>responses.filter(res=> res.ok).map(res=>res.url))
.then(workingUrls=>{
console.log(workingUrls);
console.log(arr.filter(url=> workingUrls.indexOf(url) == -1 ))
});
EDITED
Working fiddle (Note that you can't do request to another site in the browser because of Cross domain).
UPDATED with #vp_arth suggestions
const arr = ["/", "/a", "/", "/"];
let working=[], notWorking=[],
find = url=> fetch(url)
.then(res=> res.ok ?
working.push(res.url) && res : notWorking.push(res.url) && res);
Promise.all(arr.map(find))
.then(responses=>{
console.log('woking', working, 'notWorking', notWorking);
/* Do whatever with the responses if needed */
});
Fiddle

Node.js thumbnailer using Imagemagick: nondeterministic corruption

I have a Node.js server which dynamically generates and serves small (200x200) thumbnails from images (640x640) in a database (mongodb). I'm using the node-imagemagick module for thumbnailing.
My code works roughly 95% of the time; about 1 in 20 (or fewer) thumbnailed images are corrupt on the client (iOS), which reports:
JPEG Corrupt JPEG data: premature end of data segment
For the corrupt images, the client displays the top 50% - 75% of the image, and the remainder is truncated.
The behavior is non-deterministic and the specific images which are corrupt changes on a per-request basis.
I'm using the following code to resize the image and output the thumbnail:
im.resize({
srcData: image.imageData.buffer,
width: opt_width,
}, function(err, stdout) {
var responseHeaders = {};
responseHeaders['content-type'] = 'image/jpeg';
responseHeaders['content-length'] = stdout.length;
debug('Writing ', stdout.length, ' bytes.');
response.writeHead(200, responseHeaders);
response.write(stdout, 'binary');
response.end();
});
What could be wrong, here?
Notes:
The problem is not an incorrect content-length header. When I omit the header, the result is the same.
When I do not resize the image, the full-size image always seems to be fine.
In researching this I found this and this StackOverflow questions, which both solved the problem by increasing the buffer size. In my case the images are very small, so this seems unlikely to be responsible.
I was originally assigning stdout to a new Buffer(stdout, 'binary') and writing that. Removing it ('binary' will be deprecated) made no difference.
The problem seems to have been due to a slightly older version of node-imagemagick (0.1.2); upgrading to 0.1.3 was the solution.
In case this is helpful to anyone, here's the code I used to make Node.js queue up and handle client requests one at a time.
// Set up your server like normal.
http.createServer(handleRequest);
// ...
var requestQueue = [];
var isHandlingRequest = false; // Prevent new requests from being handled.
// If you have any endpoints that don't always call response.end(), add them here.
var urlsToHandleConcurrently = {
'/someCometStyleThingy': true
};
function handleRequest(req, res) {
if (req.url in urlsToHandleConcurrently) {
handleQueuedRequest(req, res);
return;
}
requestQueue.push([req, res]); // Enqueue new requests.
processRequestQueue(); // Check if a request in the queue can be handled.
}
function processRequestQueue() {
// Continue if no requests are being processed and the queue is not empty.
if (isHandlingRequest) return;
if (requestQueue.length == 0) return;
var op = requestQueue.shift();
var req = op[0], res = op[1];
// Wrap .end() on the http.ServerRequest instance to
// unblock and process the next queued item.
res.oldEnd = res.end;
res.end = function(data) {
res.oldEnd(data);
isHandlingRequest = false;
processRequestQueue();
};
// Start handling the request, while blocking the queue until res.end() is called.
isHandlingRequest = true;
handleQueuedRequest(req, res);
}
function handleQueuedRequest(req, res) {
// Your regular request handling code here...
}

Resources