Handling playlist with youtube-sr gone wrong - node.js

Okay, so I need to handle a youtube playlist link because I need the video id for each of the videos. But as always something goes terribly wrong, the built-in function called getPlaylist is trying to parse the playlist details to JSON, but it's filtering code isn't that right and it tries to parse this
I only want the JSON object that is colored blue, not the gibberish in red. How can I do it?
static getPlaylist(html, limit = 100) {
if (!limit || typeof limit !== "number") limit = 100;
if (limit <= 0) limit = 100;
const videos = [];
let parsed;
let playlistDetails;
try {
const rawJSON = `${html.split("{\"playlistVideoListRenderer\":{\"contents\":")[1].split("}],\"playlistId\"")[0]}}]`;
parsed = JSON.parse(rawJSON);
let log = html.split("{\"playlistSidebarRenderer\":")[1].split("\n")[0].slice(0, -3)
console.log(log);
// This is the part where it crashes because it tries to convert the JSON and the gibberish
playlistDetails = JSON.parse(html.split("{\"playlistSidebarRenderer\":")[1].split("\n")[0].slice(0, -3)).items;
} catch (e) {
return null;
}
These are a few lines of code that I need to work, how can I filter out the gibberish?

I got it working thanks to #Cannicide.
I just added a split with the starting data of the gibberish
playlistDetails = JSON.parse(html.split("{\"playlistSidebarRenderer\":")[1].split("\n")[0].slice(0, -3).split("}};</script><link rel=\"alternate\" media=\"handheld\" href=\"https://m.youtube.com/playlist?list=")[0]).items;

Related

Test random URLs from spreadsheet using alasql

I have a large number of URLs within a xlsx file. What I'd like to do is randomly select some of these URLs, load them, then check that they return a status code of 200.
So I'm using the npm alasql package to do this.
At the moment, the following code successfully loads the first 5 URLs in the spreadsheet, checks that they 200, then finishes the test.
var alasql = require('alasql');
var axios = require('axios');
module.exports = {
'#tags': ['smokeTest'],
'Site map XML pages load': async (browser) => {
const result = await alasql.promise('select URL from xlsx("./testUrls.xlsx",{sheetid:"RandomUrls"})');
var xcelData = result.map(item => {
return item.URL;
});
async function siteMapUrlsTestArr(item) {
var response = await axios.get(browser.launch_url + item);
browser.verify.equal(response.status, 200);
console.log('Sitemap test URL =', (browser.launch_url + item));
}
for (let index = 0; index < xcelData.length; index++) {
if (index < 5) {
const xmlTestUrl = xcelData[index];
await siteMapUrlsTestArr(xmlTestUrl);
} else {}
}
},
'Closing the browser': function (browser) {
browser.browserEnd();
},
};
However, what I'd like to do is randomly select 5 URLs from the (large) list of URLs in the spreadsheet, rather than the first 5 URLs.
I appreciate that this will (probably) include using the Math.floor(Math.random() command, but I can't seem to get it to work no matter where I place this command.
Any help would be greatly appreciated. Thanks.
Your logic is flawed. Here's how.
You want to select 5 random URLs from a list and then, perform the operation on the items but what you're doing is you're getting all the items and running the operation using a loop on first five.
To correct it:
//Fixed to five as you only want to test 5 URLs.
for (let index = 0; index < 5; index++) {
//Selecting a Random item from the list using Math.random();
const xmlTestUrl = xcelData[Math.floor(Math.random() * xcelData.length)];
//Performing the HTTP response operation on it.
await siteMapUrlsTestArr(xmlTestUrl);
}
The aforementioned solution will select a random item in each loop and perform the HTTP response operation on it. The items will be randomly selected using Math.random().

Replay a log file with NodeJS as if it were happening in real-time

I have a log file with about 14.000 aircraft position datapoints captured from a system called Flarm, it looks like this:
{"addr":"A","time":1531919658.578100,"dist":902.98,"alt":385,"vs":-8}
{"addr":"A","time":1531919658.987861,"dist":914.47,"alt":384,"vs":-7}
{"addr":"A","time":1531919660.217471,"dist":925.26,"alt":383,"vs":-7}
{"addr":"A","time":1531919660.623466,"dist":925.26,"alt":383,"vs":-7}
What I need to do is find a way to 'play' this file back in real-time (as if it were occuring right now, even though it's pre-recorded), and emit an event whenever a log entry 'occurs'. The file is not being added to, it's pre-recorded and the playing back would occur at a later stage.
The reason for doing this is that I don't have access to the receiving equipment when I'm developing.
The only way I can think to do it is to set a timeout for every log entry, but that doesn't seem like the right way to do it. Also, this process would have to scale to longer recordings (this one was only an hour long).
Are there other ways of doing this?
If you want to "play them back" with the actual time difference, a setTimeout is pretty much what you have to do.
const processEntry = (entry, index) => {
index++;
const nextEntry = getEntry(index);
if (nextEntry == null) return;
const timeDiff = nextEntry.time - entry.time;
emitEntryEvent(entry);
setTimeout(processEntry, timeDiff, nextEntry, index);
};
processEntry(getEntry(0), 0);
This emits the current entry and then sets a timeout based on the difference until the next entry.
getEntry could either fetch lines from a prefilled array or fetch lines individually based on the index. In the latter case only two lines of data would only be in memory at the same time.
Got it working in the end! setTimeout turned out to be the answer, and combined with the input of Lucas S. this is what I ended up with:
const EventEmitter = require('events');
const fs = require('fs');
const readable = fs.createReadStream("./data/2018-07-18_1509log.json", {
encoding: 'utf8',
fd: null
});
function read_next_line() {
var chunk;
var line = '';
// While this is a thing we can do, assign chunk
while ((chunk = readable.read(1)) !== null) {
// If chunk is a newline character, return the line
if (chunk === '\n'){
return JSON.parse(line);
} else {
line += chunk;
}
}
return false;
}
var lines = [];
var nextline;
const processEntry = () => {
// If lines is empty, read a line
if (lines.length === 0) lines.push(read_next_line());
// Quit here if we've reached the last line
if ((nextline = read_next_line()) == false) return true;
// Else push the just read line into our array
lines.push(nextline);
// Get the time difference in milliseconds
var delay = Number(lines[1].time - lines[0].time) * 1000;
// Remove the first line
lines.shift();
module.exports.emit('data', lines[0]);
// Repeat after the calculated delay
setTimeout(processEntry, delay);
}
var ready_to_start = false;
// When the stream becomes readable, allow starting
readable.on('readable', function() {
ready_to_start = true;
});
module.exports = new EventEmitter;
module.exports.start = function() {
if (ready_to_start) processEntry();
if (!ready_to_start) return false;
}
Assuming you want to visualize the flight logs, you can use fs watch as below, to watch the log file for changes:
fs.watch('somefile', function (event, filename) {
console.log('event is: ' + event);
if (filename) {
console.log('filename provided: ' + filename);
} else {
console.log('filename not provided');
}
});
Code excerpt is from here. For more information on fs.watch() check out here
Then, for seamless update on frontend, you can setup a Websocket to your server where you watch the log file and send newly added row via that socket to frontend.
After you get the data in frontend you can visualize it there. While I haven't done any flight visualization project before, I've used D3js to visualize other stuff (sound, numerical data, metric analysis and etc.) couple of times and it did the job every time.

PDF.js - split pdf into pages and re-build multiple files

I am currently working on a Node.js project. One of the actions required is to read the text of a pdf document and then split the document into separate files.
As I have been using pdf.js for all other pdf parsing in this project, I was hoping to complete the above requirement using it as well.
Reading the PDF and its text content is relatively straightforward.
For example -
function GetWords(pdfUrl){
var pdf = PDFJS.getDocument(pdfUrl);
return pdf.then(function(pdf) { // calculate total count for document
var maxPages = pdf.pdfInfo.numPages;
var countPromises = []; // collecting all page promises
for (var j = 1; j <= maxPages; j++) {
var page = pdf.getPage(j);
var txt = "";
countPromises.push(page.then(function(page) { // add page promise
var textContent = page.getTextContent();
return textContent.then
(
function(page)
{ // return content promise
for(var i=0;i<page.items.length;i++)
{
var txtadd = page.items[i].str
txt += txtadd.replace(/[^a-zA-Z0-9:;,.?!-() ]/g,'');
}
return txt.split(" ").length; // value for page words
});
}));
}
// Wait for all pages and sum counts
return Promise.all(countPromises).then(function (counts) {
var count = 0;
//counts.forEach(function (c) { count += c; });
return count;
});
});
}
However, I can't seem to find any examples of building a PDF from one / or more of its pages. Ideally, I would want to use the pdf.GetPage(j) to get an array of the pages required. Then push these into a new document and save this new document to disk.
Any help would be appreciated.
I ended up using a separate library to perform the splitting. http://pdfhummus.com/. So in combination with the PDF.js i was able to get the desired result.

Using socketio-file-upload to upload multiple files

Im using NodeJS with socket.io and socketio-file-upload to upload multiple files, it works great! However I'm having an issue where I'm trying to save the name attribute of the input these files come to save them into my DB.
When I upload 1 or more files, I can't seem to access the input field name or something that shows me which of the files come from which input field.
Here is my front:
var uploader = new SocketIOFileUpload(socket);
var array_files_lvl_3 = [
document.getElementById("l3_id_front"),
document.getElementById("l3_id_back"),
document.getElementById("l3_address_proof_1"),
document.getElementById("l3_address_proof_2"),
document.getElementById("l3_passport")
];
uploader.listenOnArraySubmit(document.getElementById("save_level_3"), array_files_lvl_3);
And here is my back:
var uploader = new siofu();
uploader.dir = "uploads/userL3";
uploader.listen(socket);
uploader.on('saved', function(evnt){
console.log(evnt);
//this "event" variable has a lot of information
//but none of it tells me the input name where it came from.
});
This is what the "evnt" variable holds:
Unfortunately the library doesn't send that information. So there is nothing existing config you can do. So this needs code modification.
client.js:374
var _fileSelectCallback = function (event) {
var files = event.target.files || event.dataTransfer.files;
event.preventDefault();
var source = event.target;
_baseFileSelectCallback(files, source);
client.js:343
var _baseFileSelectCallback = function (files, source) {
if (files.length === 0) return;
// Ensure existence of meta property on each file
for (var i = 0; i < files.length; i++) {
if (source) {
if (!files[i].meta) files[i].meta = {
sourceElementId: source.id || "",
sourceElementName: source.name || ""
};
} else {
if (!files[i].meta) files[i].meta = {};
}
}
After these changes I am able to get the details in event.file.meta
I'm the author of socketio-file-upload.
It looks like the specific input field is not currently being recorded, but this would not be a hard feature to add. Someone opened a new issue and left a backpointer to this SO question.
A workaround would be to directly use submitFiles instead of listenOnArraySubmit. Something like this might work (untested):
// add a manual listener on your submit button
document.getElementById("save_level_3").addEventListener("click", () => {
let index = 0;
for (let element of array_files_lvl_3) {
let files = element.files;
for (let file of files) {
file.meta = { index };
}
uploader.submitFiles(files);
index++;
}
});

Splitting url parameter node.js?

I am having the following url parameter
sample value actual value contains so many parameters
var data = "browserName=MSIE&cookies=Rgn=%7CCode%3DMUMBAI%7Ctext%3DMumbai%7C; NREUM=s=1376355292394&r=220970&p=2080092;cs_uuid=209712058942163; cs_si=209712058942163.1&javascriptEnabled=true";
Following function is used to get the particular parameter
//Generic Function to get particular parameter
getParameterValue : function(data, parameter) {
var value = null;
if (data.length > 0) {
var paramArray = data.split("&");
for ( var i = 0; len = paramArray.length, i < len; i++) {
var param = paramArray[i];
var splitter = parameter + "=";
if (param.indexOf(splitter) > -1) {
value = param.substring(param.indexOf(splitter)
+ splitter.length, param.length);
break;
}
}
}
return value;
}
Example
getParameterValue(data, "browserName");
output is MSIE //correct
Problem is
getParameterValue(data, "cookies");
Output is
Rgn=%7CCode%3DMUMBAI%7Ctext%3DMumbai%7C; NREUM=s=1376355292394
But required output is
Rgn=%7CCode%3DMUMBAI%7Ctext%3DMumbai%7C; NREUM=s=1376355292394&r=220970&p=2080092;cs_uuid=209712058942163; cs_si=209712058942163.1
To Know :
1.URL parameter is encoded(clientside) while sending to node server and decoded.
2.NREUM is not encoded, So getParameterValue method splits upto 1376355292394.
Any help to improve getParameterValue function.
Ready to explain more.
Well, getParameterValue() does seem to be parsing data correctly, but data is not encoded properly to distinguish between delimiters and value characters.
The value of cookies, for example:
cookies=Rgn=%7CCode%3DMUMBAI%7Ctext%3DMumbai%7C; NREUM=s=1376355292394&r=220970&p=2080092;cs_uuid=209712058942163; cs_si=209712058942163.1
Should itself be encoded (on top of any encoding used for the cookie values themselves).
cookies=Rgn%3D%257CCode%253DMUMBAI%257Ctext%253DMumbai%257C%3B%20NREUM%3Ds%3D1376355292394%26r%3D220970%26p%3D2080092%3Bcs_uuid%3D209712058942163%3B%20cs_si%3D209712058942163.1
And, it's rather late to try to "fix" this server-side because of the ambiguity. It'll need to be encoded client-side, which can be done with encodeURIComponent().
'cookies=' + encodeURIComponent(cookies)
If you happen to be using jQuery, you can use jQuery.param() to help ensure proper encoding.
var data = {
browserName: 'MSIE',
cookies: document.cookie,
javascriptEnabled: true
};
data = $.param(data); // browserName=MSIE&cookies=Rgn%3D%257CCode...
Or, you can use a light-weight recreation, at least for the "traditional" format.
function urlParam(params) {
var res = [];
for (var name in params) {
res.push(encodeURIComponent(name) + '=' + encodeURIComponent(params[name]));
}
return res.join('&');
}
var data = urlParam(data);
Example: http://jsfiddle.net/GQpTB/
Also, are you aware that Node.js has querystring.parse()?
// with: var qs = require('querystring');
var query = qs.parse(data);
console.log(query.cookies); // ...

Resources