I just tried to make this script:
// ==UserScript==
// #name Link Bypasser Script
// #include *http://onion.com/*
// #run-at document-start
// ==/UserScript==
var oldUrl = window.location;
var newURL = "http://localhost:8887/processor.php?link=" + oldUrl;
window.location.replace (newURL);
I "inlcude" more urls, but I don't think that's relevant to show, anyway, this script I thought would turn, for example, http://onion.com/4GUTDTA into http://localhost:8887/processor.php?link=http://onion.com/4GUTDTA, but nothing happens:
Before this I tried:
var oldUrlPath = window.location.path;
with:
+ "http://localhost:8887/processor.php?link=" + oldUrlPath
Which did modify it, however, but it only added that link before the path, which makes sense, because I used window.location.path. I also tried to use
var oldUrl = window.location.href;
which did nothing.
var oldUrl = window.location.href;
var newURL = "http://localhost:8887/processor.php?link=" + oldUrl
window.location.replace (newURL);
You should encode a string with encodeURIComponent before you append it to a URL:
var oldUrl = window.location.href;
var newURL = "http://localhost:8887/processor.php?link="
+ encodeURIComponent(oldUrl);
window.location.replace (newURL);
Related
I trying to convert pdf file to pdfa3 file by using PDFTron.
I added current url_path.
the my code below:
var input_url = './utils/';
var input_filename = 'test.pdf';
var output_filename = 'test_pdfa.pdf';
var convert = true;
var pwd = '';
var exceptions;
var max_ref_objs = 10;
var url_input = input_url + input_filename;
console.log('Converting input document: ' + input_filename);
var pdfa = await PDFNet.PDFACompliance.createFromUrl(true, url_input, '', PDFNet.PDFACompliance.Conformance.e_Level2B, exceptions, max_ref_objs);
get error:
'NetworkError(Unsupported protocol ${this._url.protocol})',
Does anyone know what the problem is,
And why doesn't it recognize the location?
I changed the code to :
here.
Now it's working!!
Using gulp 3.9.1
I am attempting to return a bunch of files and perform a task that requires a var to be passed between two pipes.
I'm using node uuid to create a v3 UUID for each file path to
ultimately end up with a uuid for each page. I'm grabbing the file path with gulp-print.
I want to store that uuid value as a var. In the next pipe Im using
gulp-inject-string to write it into the page during the build.
Help: Either I need help getting the file path inside the gulp-inject-string pipe or I need to pass the var between the two different pipes. If I globally set a var with a default value outside the src it gets passed easily to the pipe(inject).
Super simplified code below:
// test code
var gulp = require('gulp');
var print = require('gulp-print');
var inject = require('gulp-inject-string');
var reload = browserSync.reload;
const uuidv3 = require('uuid/v3');
var uuid;
gulp.task('uuid', function() {
return gulp.src('**/*.html'])
// create uuid
.pipe(print(function(filepath) {
uuid = uuidv3(filepath, uuidv3.URL);
return "compiled: " + filepath + ' uuid: ' + uuid;
}))
// need to to add UUIDv3 to each page
.pipe(inject.before('</head>', '<meta name="dc.identifier" content="' + uuid + '">'))
.pipe(gulp.dest('/prod/./'))
.pipe(reload({ stream: true }));
});
It's worth noting that I need a cross platform way to get the file path starting in the root of the project and including forward slashes. The gulp(print) does this perfectly starting at the root of the project and ignoring anything upstream from that point. The format of the path is important because it's one half of the equation in creating the uuid and the uuid's must match on Mac or PC platforms.
examples:
/index.html
/dir1/file.html
/dir1/dir2/dir3/file.html
var gulp = require('gulp');
var print = require('gulp-print');
var inject = require('gulp-inject-string');
const uuidv3 = require('uuid/v3');
var tap = require('gulp-tap');
// you can declare here
var uuid;
gulp.task('pages', function() {
// or you can declare here
var uuid;
return gulp.src('**/*.html')
// bunch of stuff happens here involving templating/minifying
// create uuid
.pipe(print(function(filepath) {
// then set it here and use it further below
// it will be available
uuid = uuidv3(filepath, uuidv3.URL);
return "compiled: " + filepath + ' uuid: ' + uuid;
}))
// need to to add UUIDv3 to each page
//.pipe(inject.before('</head>', '<meta name="dc.identifier" content="' + uuid + '">\n'))
.pipe(tap(function(file, t) {
return t.through(inject.before('</head>', '<meta name="dc.identifier" content="' + uuid + '">\n');
})
.pipe(gulp.dest('/prod/./'))
.pipe(reload({stream:true}));
});
You are just creating a variable at a higher scope that you can set and refer to later. If you need a bunch of them create an array with filepath as an index. But I would try it first as just a simple value.
I solved the problem. It was an amateur mistake. I returned the statement where the var was set so the var was essentially killed. Updated code that allows the var to pass through the pipes.
var gulp = require('gulp');
var print = require('gulp-print');
var replace = require('gulp-replace');
const uuidv3 = require('uuid/v3');
var uuid;
gulp.task('build', function() {
return gulp.src('**/*.html')
// get a cross-platform filepath and create a uuid
.pipe(print(function(filepath) {
uuid = uuidv3(filepath, uuidv3.URL);
}))
// inject uuid
.pipe(replace('dc.identifier" content=""', function() {
return 'dc.identifier" content="' + uuid + '"';
}))
.pipe(gulp.dest('/prod/./'));
});
The var uuid passes through the pipes just fine now. This code creates a UUID based on a cross-platform file path and injects it into an empty dc.identifier meta tag.
I have a for loop in my nodejs code
const saveDocument = co.wrap(function *(documentData, user, locale) {
var now = moment();
var creationDateLongString = now.format("YYYYMMDDHHmmss");
var creationDateShortString = now.format("YYYYMMDD");
var outputChildFolder = documentData.code + '_' + creationDateLongString + '_' + documentCounter;
var outputFolder = config.files.incomingDocumentsDir + '/' + outputChildFolder;
++documentCounter;
yield fs.mkdir(outputFolder)
var xmlFileName = documentData.code + "-" + creationDateLongString + ".xml";
var pdfFileName = documentData.code + "-" + creationDateLongString + ".pdf";
const pages = [];
for(var index=0;index < documentData.pages.length; ++index) {
const page = documentData.pages[index];
var data = new Buffer(page, "base64");
var dataEncoding = imageType(data).mime === "image/png" ? "png" : "jpg";
var fileName = "page" + index + "." + dataEncoding;
var targetFilePath = outputFolder + "/" + fileName
yield fs.writeFile(targetFilePath,data);
pages.push(fileName);
}
...
}
What I don't understand is why in the above code page only gets assigned once, on the first iteration, and holds that same value during the other iterations. So if I have 5 pages I end up 5 times with the data of the first page in that variable.
I am running node 4 without any special arguments or postprocessor. Simply npm run ... which maps to a node src/main/myApp.js in my package.json
I am probably missing something simple here but I've never seen this before when doing client side ES6 code. The big difference of course being that the client side code goes through Babel + Webpack and the server side code is ran directly through node.
Small addendum: if you are wondering why the "old school" for syntax and not something along the lines of pages.forEach(...., it's because this is existing code where I just did a few minor modifications.
This will work as you are expecting in strict mode. Try adding...
"use strict";
You will only see this behavior in environments (like Node) that actually respect and enforce the keyword. Babel simply converts all let and const to var right now to provide ES5 compatibility. To demonstrate, take a look at this Babel example. You can see in the output that const has been changed to var
So I tested my scraping on a static HTML file before adding it to my Node app.
The problem is that it's not returning all the rows.
On the site:
$('#sport tr').length
//Returns 13
In Cheerio:
$('#sport tr').length
//Returns 2
I'm stumped, here is the code I'm using. I've contained the URL as proof, so you can visit it yourself if you wish.
I'm suspecting it's something to do with var $ = cheerio.load(html); however I'm not experienced in Cheerio to say outright that's the problem.
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res){
var url = 'http://www.olbg.com/football.php';
var json = [];
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
console.log($('#sport tr').length);
var headers = [];
$('#sport tr th').each(function(i, th) {
var text = $(th).text();
if (text.trim() !== "") {
headers[i] = text.replace(/[\t\n\r\s]/mgi, '');
}
});
$('#sport tr').each(function(i, tr) {
// skip if header
if (!$(tr).is('th')) {
var temp = {};
temp["Event"] = $(tr).find('td').eq(0).text().trim();
temp["TopSelection"] = $(tr).find('td').eq(1).text().trim();
temp["BookieOdds"] = $(tr).find('td').eq(2).text().trim();
temp["OLBGRating"] = $(tr).find('td').eq(3).find('img').length;
if (temp["Event"] !== "" || temp["TopSelection"] !== ""){
json.push(temp);
}
}
});
}
// To write to the system we will use the built in 'fs' library.
// In this example we will pass 3 parameters to the writeFile function
// Parameter 1 : output.json - this is what the created filename will be called
// Parameter 2 : JSON.stringify(json, null, 4) - the data to write, here we do an extra step by calling JSON.stringify to make our JSON easier to read
// Parameter 3 : callback function - a callback function to let us know the status of our function
fs.writeFile('output.json', JSON.stringify(json), function(err){
console.log('File successfully written!');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send(json);
});
});
app.listen("8081");
console.log("Magic happens on port 8081");
exports = module.exports = app;
The reason that you're not getting the expected result is because the (table) html on that page is mangled. If you look at the second <td> in the second <tr> of the table#sport, you'll see an "extra" </td>. This causes the <td> that the table#sport is inside to close (and an implicit closing of table#sport) on some parsers because that is the closest open <td>. So that is why the parser reports only 2 <tr>s instead of 13. The other <tr>s you're expecting are now outside of table#sport.
Probably your best bet is to pass the html through an HTML tidying program/script (e.g. this one with the clean option enabled) first before passing it to cheerio. After that, your selector should return the elements you're probably expecting.
I'm using Node v0.10.11 on Ubuntu 12.04. I can't figure out what I'm missing to make streams of URLs work with the request module.
This program is trying to go to a mailing list site, find the download links for each month, then download the pages for each month.
Mikael's readme says "The first argument can be either an url or an options object. The only required option is URI, all others are optional.
uri || url - fully qualified uri or a parsed url object from url.parse()"
If I call url.parse(www.targeturl.com), I get
Error: options.uri is a required argument
If I don't use url.parse, I get
Error: Invalid URI "www.freelists.org/archive/si-list/06-2013"
(this link works perfectly fine in my browsers)
I've cut the code down to 42 lines. Any advice welcome
var request = require('request'),
url = require('url'),
stream = require('stream'),
cheerio = require('cheerio'), // a reduced jQuery style DOM library
Transform = require('stream').Transform
var DomStripStream = function(target) {
this.target = target;
stream.Transform.call(this,{objectMode: true});
}
DomStripStream.prototype = Object.create(
Transform.prototype, {constructor: {value: DomStripStream}}
)
DomStripStream.prototype.write = function () {
this._transform.apply(this, arguments);
};
DomStripStream.prototype.end = function () {
this._transform.apply(this, arguments);
this.emit("end");
};
DomStripStream.prototype._transform = function(chunk, encoding, callback) {
chunk = chunk ? chunk.toString() : "";
$ = cheerio.load(chunk);
domLinks = $(this.target);
$(domLinks).each(function (i, link) {
currLink = 'www.freelists.org' + $(link).attr('href')
// currLink = url.parse(currLink)
request(currLink, function (error, response, body) {
console.log(error);
})
});
}
var fs = require("fs"),
output = fs.createWriteStream("out.txt"),
mainPage = new DomStripStream('td a')
request('http://www.freelists.org/archive/si-list').
pipe(mainPage).
pipe(output);
add http:// or https:// in the url