Can I force SSR for a Nuxt page? - node.js

In a Nuxt app I need to render a page with a lot of data displayed on a google map, obtained from a 100MB .jsonl file. I'm using fs.createReadStream inside asyncData() to parse the data and feed it to the Vue component. Since fs is a server-side only module, this means my app errors when it attempts to render that page client-side.
I would like it so this specific page will exclusively be rendered with SSR so I can use fs in the Vue component.
I thought of using a custom Express middleware to process the data, but this still results in downloading dozens of MB to the client, which is unacceptable. You can see how I request it with Axios in my example.
async asyncData( {$axios} ) {
const fs = require('fs');
if (process.server) {
console.log("Server");
async function readData() {
const DelimiterStream = require('delimiter-stream');
const StringDecoder = require('string_decoder').StringDecoder;
const decoder = new StringDecoder('utf8');
let linestream = new DelimiterStream();
let input = fs.createReadStream('/Users/Chibi/WebstormProjects/OPI/OPIExamen/static/stream.jsonl');
return new Promise((resolve, reject) => {
console.log("LETS GO");
let data = [];
linestream.on('data', (chunk) => {
let parsed = JSON.parse(chunk);
if (parsed.coordinates)
data.push({
coordinates: parsed.coordinates.coordinates,
country: parsed.place && parsed.place.country_code
});
});
linestream.on('end', () => {
return resolve(data);
});
input.pipe(linestream);
});
}
const items = await readData();
return {items};
} else {
console.log("CLIENT");
const items = this.$axios.$get('http://localhost:3000/api/stream');
return {items };
}
}
Even when it renders correctly, NUXT will show me an error overlay complaining about the issue.

Related

fs.writeFile crashes node app after writing first json file

I'm trying to crawl several web pages to check broken links and writing the results of the links to a json files, however, after the first file is completed the app crashes with no error popping up...
I'm using Puppeteer to crawl, Bluebird to run each link concurrently and fs to write the files.
WHAT IVE TRIED:
switching file type to '.txt' or '.php', this works but I need to create another loop outside the current workflow to convert the files from '.txt' to '.json'. Renaming the file right after writing to it also causes the app to crash.
using try catch statements for fs.writeFile but it never throws an error
the entire app outside of express, this worked at some point but i trying to use it within the framework
const express = require('express');
const router = express.Router();
const puppeteer = require('puppeteer');
const bluebird = require("bluebird");
const fs = require('fs');
router.get('/', function(req, res, next) {
(async () => {
// Our (multiple) URLs.
const urls = ['https://www.testing.com/allergy-test/', 'https://www.testing.com/genetic-testing/'];
const withBrowser = async (fn) => {
const browser = await puppeteer.launch();
try {
return await fn(browser);
} finally {
await browser.close();
}
}
const withPage = (browser) => async (fn) => {
const page = await browser.newPage();
// Turns request interceptor on.
await page.setRequestInterception(true);
// Ignore all the asset requests, just get the document.
page.on('request', request => {
if (request.resourceType() === 'document' ) {
request.continue();
} else {
request.abort();
}
});
try {
return await fn(page);
} finally {
await page.close();
}
}
const results = await withBrowser(async (browser) => {
return bluebird.map(urls, async (url) => {
return withPage(browser)(async (page) => {
await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: 0 // Removes timeout.
});
// Search for urls we want to "crawl".
const hrefs = await page.$$eval('a[href^="https://www.testing.com/"]', as => as.map(a => a.href));
// Predefine our arrays.
let links = [];
let redirect = [];
// Loops through each /goto/ url on page
for (const href of Object.entries(hrefs)) {
response = await page.goto(href[1], {
waitUntil: 'domcontentloaded',
timeout: 0 // Remove timeout.
});
const chain = response.request().redirectChain();
const link = {
'source_url': href[1],
'status': response.status(),
'final_url': response.url(),
'redirect_count': chain.length,
};
// Loops through the redirect chain for each href.
for ( const ch of chain) {
redirect = {
status: ch.response().status(),
url: ch.url(),
};
}
// Push all info of target link into links
links.push(link);
}
// JSONify the data.
const linksJson = JSON.stringify(links);
fileName = url.replace('https://www.testing.com/', '');
fileName = fileName.replace(/[^a-zA-Z0-9\-]/g, '');
// Write data to file in /tmp directory.
fs.writeFile(`./tmp/${fileName}.json`, linksJson, (err) => {
if (err) {
return console.log(err);
}
});
});
}, {concurrency: 4}); // How many pages to run at a time.
});
})();
});
module.exports = router;
UPDATE:
So there is nothing wrong with my code... I realized nodemon was stopping the process after each file was saved. Since nodemon would detect a "file change" it kept restarting my server after the first item

Two rendered empty PDFs are not identical

I'm using TypeScript + Node.js + the pdfkit library to create PDFs and verify that they're consistent.
However, when just creating the most basic PDF, consistency already fails. Here's my test.
import {readFileSync, createWriteStream} from "fs";
const PDFDocument = require('pdfkit');
const assert = require('assert').strict;
const fileName = '/tmp/tmp.pdf'
async function makeSimplePDF() {
return new Promise(resolve => {
const stream = createWriteStream(fileName);
const doc = new PDFDocument();
doc.pipe(stream);
doc.end();
stream.on('finish', resolve);
})
}
describe('test that pdfs are consistent', () => {
it('simple pdf test.', async () => {
await makeSimplePDF();
const data: Buffer = readFileSync(fileName);
await makeSimplePDF(); // make PDF again
const data2: Buffer = readFileSync(fileName);
assert.deepStrictEqual(data, data2); // fails!
});
});
Most of the values in the two Buffers are identical but a few of them are not. What's happening here?
I believe that the bytes may be slightly different due to the creation time being factored into the Buffer somehow. When I used mockdate(https://www.npmjs.com/package/mockdate) to fix 'now', I ended up getting consistent Buffers.

Realtime scrape a chat using Nodejs

What I want to do is to build a scraping application on NodeJs from which it m*onitors on Realtime a chat and store certain messages within any database?
What I am wanting to do is the following, I am wanting to capture data from the chat platforms streaming, and thus capture some useful information that helps those who are doing the streaming service;
But I do not know how to start doing this using NodeJs,
What I have been able to do so far has been to capture the data of the messages, however I can not monitor in realtime new messages,
any help in this regard?
What i did so far:
server.js
var express = require('express');
var fs = require('fs');
var request = require('request');
var puppeteer = require('puppeteer');
var app = express();
app.get('/', function(req, res){
url = 'https://www.nimo.tv/live/6035521326';
(async() => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
await page.waitForSelector('.msg-nickname');
const messages = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.msg-nickname'))
.map(item => item.innerText);
});
console.log(messages);
})();
res.send('Check your console!')
});
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
With this, I get the Nicknames of Users messages and put in an Array, I want to make my application run and receive new Nicknames automatically when the input is done in the chat,
Any help with this challenge?
Maybe I'm going to need to use WebSocket
If possible you should use the API, the chat is using. Try to open the network tab inside the Chrome developer tools and try to figure out which network requests are happening.
If that is not possible, you can use a MutationObserver to monitor DOM changes. Expose a function via page.exposeFunction and then listen to relevant changes. You can then insert the obtained data into a database.
Here is some example code to get you started:
const puppeteer = require('puppeteer');
const { Client } = require('pg');
(async () => {
const client = new Client(/* ... */);
await client.connect(); // connect to database
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
// call a handler when a mutation happens
async function mutationListener(addedText) {
console.log(`Added text: ${addedText}`);
// insert data into database
await client.query('INSERT INTO users(text) VALUES($1)', [addedText]);
}
page.exposeFunction('mutationListener', mutationListener);
await page.goto('http://...');
await page.waitForSelector('.msg-nickname');
await page.evaluate(() => {
// wait for any mutations inside a specific element (e.g. the chatbox)
const observerTarget = document.querySelector('ELEMENT-TO-MONITOR');
const mutationObserver = new MutationObserver((mutationsList) => {
// handle change by checking which elements were added and which were deleted
for (const mutation of mutationsList) {
const { removedNodes, addedNodes } = mutation;
// example: pass innerText of first added element to our mutationListener
mutationListener(addedNodes[0].innerText);
}
});
mutationObserver.observe( // start observer
observerTarget,
{ childList: true }, // wait for new child nodes to be added/removed
);
});
})();

How to crawl javascript (vuejs, reactjs) web site with nodejs

I was going to crawl vue js frontend web site when I try to crawl that it doesn't load the content to cheerio.. what i was getting , a blank web page. my code as follows
getSiteContentAsJs = (url) => {
return new Promise((resolve, reject) => {
let j = request.jar();
request.get({url: url, jar: j}, function(err, response, body) {
if(err)
return resolve({body: null, jar: j, error: err});
return resolve({body: body, jar: j, error: null});
});
})
}
I got my content as follows
const { body, jar, error} = await getSiteContentAsJs(url);
//I passed body to cheerio to get the js object out of the web content
const $ = cheerio.load(body);
but there is nothing rendered. but a blank web page. no content in it.
I found that cheerio doesn't run javascript. since this web site based on vue front end I needed a virtual browser which actually run js and render me the output
so instead of using request I used phantom to render js web pages
const phantom = require('phantom');
const cheerio = require('cheerio');
loadJsSite = async (url) => {
const instance = await phantom.create();
const page = await instance.createPage();
await page.on('onResourceRequested', function(requestData) {
console.info('Requesting', requestData.url);
});
const status = await page.open(url);
const content = await page.property('content');
// console.log(content);
// let $ = cheerio.load(content);
await instance.exit();
return {$: cheerio.load(content), content: content};
}
now I can get the rendered page like below
const {$, content} = await loadJsSite(url);
// I can query like this
// get the body
$('body').html();

nodejs async/await nested API progress

I have an API that searches for the user-provided term, returns an array of results, then fires off async requests for each of the results and gets results for each of these second batch of requests. I'd like the API to report progress as it happens rather than just the final result. So, if I do the following request, I should get updates like so
$ curl 'http://server/?q=foobar'
searching for ${q}…
found 76… now getting images…
found 30 images… done
{
result
}
Most of relevant code is shown below. Fwiw, I am using hapijs for my application.
let imagesOfRecords = {};
const getImages = async function (q) {
console.log(`searching for ${q}…`);
const uri = `http://remoteserver/?q=${q}`;
const {res, payload} = await Wreck.get(uri);
const result = JSON.parse(payload.toString()).hits;
const numOfFoundRecords = result.total;
if (result.total) {
console.log(`found ${result.total}… now getting images…`);
const foundRecords = result.hits.map(getBuckets);
Promise.all(foundRecords).then(function() {
console.log(`found ${Object.keys(imagesOfRecords).length} images… done`);
reply(imagesOfRecords).headers = res.headers;
}).catch(error => {
console.log(error)
});
}
else {
console.log('nothing found');
reply(0).headers = res.headers;
}
};
const getBuckets = async function(record) {
const { res, payload } = await Wreck.get(record.links.self);
const bucket = JSON.parse(payload.toString()).links.bucket;
await getImageFiles(bucket, record.links.self);
};
const getImageFiles = async function(uri, record) {
const { res, payload } = await Wreck.get(uri);
const contents = JSON.parse(payload.toString()).contents;
imagesOfRecords[record] = contents.map(function(el) {
return el.links.self;
});
};
Once I can implement this, my next task would be to implement this progressive update in a web app that uses the above API.
To show result with each step of your requests for backend you can use EventEmitter, which will emit event on each progress step. You can read about events here.
Simple implementation:
const events = require('events');
const eventEmitter = new events.EventEmitter();
//your request code
Promise.all(foundRecords).then(function() {
console.log(`found ${Object.keys(imagesOfRecords).length} images… done`);
eventEmitter.emit('progress');
reply(imagesOfRecords).headers = res.headers;
})
const eventReaction = (e) => {
// do something with event, console log for example.
}
eventEmitter.on('progress', eventReaction);
More examples you can find here and here.
To show events to client you can use library socket.io. I think you can find pretty straightforward explanations how socket.io works in documentation.
If you want to send events between servers or processes and want to go little further, you can read more about 0MQ (zero mq) and it's node implementation

Resources