puppeteer - scrape all a->innerText - node.js

I've this part of webpage that I want to scrape href or innerText
<span class="hash-tag text-truncate"><<test that i want to scrape>></span>
This is my code:
const nodeChildren = await page.$$('.hash-tag', (uiElement) => {
uiElement.map((option) => option.innerText)
});
console.log(nodeChildren);
result is:
_page: Page {
eventsMap: Map(0) {},
emitter: [Object],
_closed: false,
_timeoutSettings: [TimeoutSettings],
_pageBindings: Map(0) {},
_javascriptEnabled: true,
_workers: Map(0) {},
_fileChooserInterceptors: Set(0) {},
_userDragInterceptionEnabled: false,
_handlerMap: [WeakMap],
_client: [CDPSession],
How I can it?

try:
const textAndHrefs = await page.$$eval(".hash-tag a", els =>
els.map(el => ({text: el.innerText, href: el.href})))

try textContent instead of innerText because it's buggy in Puppeteer.
const nodeChildren = await page.$$('.hash-tag', (uiElement) => {
uiElement.map((option) => option.textContent)
});
console.log(nodeChildren);

Related

Can't get values for a table located inside a frame through Puppeteer

Below is the error I get when I try to apply the 'table[0].$$eval' method (see code snipped below) :
Failed to execute 'querySelectorAll' on 'Element': '# 297d0e3 > table > tbody > tr:nth-child(1)' is not a valid selector
const puppeteer = require('puppeteer')
const scrape = async () => {
const browser = await puppeteer.launch({headless:false,defaultViewport: null,args: [
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process'
]});
const page = await browser.newPage();
await page.goto('https://dealers.carwow.co.uk/dealers/sign_in')
await page.type('#dealer_email', 'email')
await page.type('#dealer_password', 'password')
await page.click('#new_dealer > p > input')
await new Promise(resolve => setTimeout(resolve, 5000));
let xpathArray = await page.$x('//*[#id="dealer-dashboard"]/div[3]/div/div/a')
await xpathArray[0].click()
await new Promise(resolve => setTimeout(resolve, 5000));
const frameHandle = await page.$x('//*[#id="klipfolio-iframe"]');
await new Promise(resolve => setTimeout(resolve, 5000));
const frame = await frameHandle[0].contentFrame();
await frame.waitForXPath('//*[#id="0297d0e3"]/table');
const table = await frame.$x('//*[#id="0297d0e3"]/table');
console.log(table)
browser.close()
};
The above function returns an array containing a ElementHandle (below) rather than an element.
[
ElementHandle {
_disposed: false,
_context: ExecutionContext {
_client: [CDPSession],
_world: [DOMWorld],
_contextId: 17,
_contextName: ''
},
_client: CDPSession {
eventsMap: [Map],
emitter: [Object],
_callbacks: Map(0) {},
_connection: [Connection],
_targetType: 'page',
_sessionId: '326BCCF50B6BBE8CA175CB21AB46C382'
},
_remoteObject: {
type: 'object',
subtype: 'node',
className: 'HTMLTableElement',
description: 'table.layout-grid',
objectId: '3652992625290954585.17.4'
},
_page: Page {
eventsMap: Map(0) {},
emitter: [Object],
_closed: false,
_timeoutSettings: [TimeoutSettings],
_pageBindings: Map(0) {},
_javascriptEnabled: true,
_workers: Map(0) {},
_fileChooserInterceptors: Set(0) {},
_userDragInterceptionEnabled: false,
_client: [CDPSession],
_target: [Target],
_keyboard: [Keyboard],
_mouse: [Mouse],
_touchscreen: [Touchscreen],
_accessibility: [Accessibility],
_frameManager: [FrameManager],
_emulationManager: [EmulationManager],
_tracing: [Tracing],
_coverage: [Coverage],
_screenshotTaskQueue: [ScreenshotTaskQueue],
_viewport: null
},
_frameManager: FrameManager {
eventsMap: [Map],
emitter: [Object],
_frames: [Map],
_contextIdToContext: [Map],
_isolatedWorlds: [Set],
_client: [CDPSession],
_page: [Page],
_networkManager: [NetworkManager],
_timeoutSettings: [TimeoutSettings],
_mainFrame: [Frame]
}
}
]
I have tried iterating over the array then doing applying the method (see below) to extract data from a table (see picture)
What exactly is an Element handle and how do I solve this issue?
table[0].$$eval('#\30 297d0e3 > table > tbody > tr:nth-child(1)', rows => {
return Array.from(rows, row => {
const columns = row.querySelectorAll('td');
return Array.from(columns, column => column.innerText);
});
});
If you need a selector with a number in its id, try this workaround:
table[0].$$eval('[id="the_number"] > table > tbody > tr:nth-child(1)', rows => {

Converting audio with sox in nodejs

I have made sure that all that sox needs to function is installed, and I've also installed the sox package for node. Yet, this does not seem to work.
function encode(file, destination, quality) {
return new Promise((resolve, reject) => {
console.log(destination)
let job = sox.transcode(file, destination, {
sampleRate: 44100,
format: 'mp3',
bitRate: quality * 1024,
})
job.on('src', function(info) {
console.log(info)
});
job.on('progress', (amountDone, amountTotal) => {
console.log("progress", amountDone, amountTotal);
});
job.on('error', (err) => {
reject('Could not transcode mp3.')
})
job.on('end', () => {
resolve('mp3 transcoded.')
})
console.log(job)
})
}
The console.log at the end logs:
Transcode {
domain: null,
_events:
{ src: [Function],
progress: [Function],
error: [Function],
end: [Function] },
_eventsCount: 4,
_maxListeners: undefined,
inputFile: 'C:\\Users\\User\\Documents\\App\\Media\\media\\uploads\\audio/2016/269/1/0//10c746ef62374c6ab1f2ecfc36705618/original.mp3',
outputFile: 'C:\\Users\\User\\Documents\\App\\Media\\media\\uploads\\audio/2016/269/1/0//10c746ef62374c6ab1f2ecfc36705618/128.mp3',
options:
{ sampleRate: 44100,
format: 'mp3',
bitRate: 196608,
channelCount: 2,
compressionQuality: 5 } }
There are no errors, so I wonder if something here is failing silently. Identifying a file works just fine:
let original = (path + 'original.mp3')
sox.identify(original, function(err, info) {
if (err) {
console.log(err)
throw(err)
}
if (info.format !== 'mp3') {
throw('File must be mp3!')
}
console.log(info) // {format: 'mp3', ..}
});
The above code works just fine. but sox.transcode does not.
The example code mentions a job.start() that (presumably) starts the transcoding job. I don't see that being called in your code.

How to get base64 of image in React Native

I'm using react native image picker and image resizer to pick and then resize an image. How do I get the base64 of the image once it has been resized?
ImagePickerManager.showImagePicker(imagepicker_options, (response) => {
ImageResizer.createResizedImage(response.uri, 550, null, 'JPEG', 100).then((resizedImageUri) => {
//get base64 of image
});
});
https://libraries.io/npm/react-native-asset-library-to-base64
ImagePickerManager.showImagePicker(imagepicker_options, (response) => {
ImageResizer.createResizedImage(response.uri, 550, null, 'JPEG', 100).then((resizedImageUri) => {
//get base64 of image, uri is link to asset-library://
ReadImageData.readImage(uri, (imageBase64) => {
console.log(imageBase64);
});
});
});
you might also want to read this if you haven't https://github.com/facebook/react-native/issues/1158
Actually the imagepicker_options provides a way to resize.
You can pass maxWidth and maxHeight on options.
The code:
const imagepicker_options = {
mediaType: 'photo',
maxWidth: 550,
storageOptions: {
skipBackup: true,
cameraRoll: false,
path: 'images',
},
}
ImagePickerManager.showImagePicker(imagepicker_options, (response) => {
//get base64 of image
const base64 = response.data
});

How to fix encoding in x-ray (NodeJS scraping library) response?

The following script is working perfectly in my NodeJS server, but rarely it returns response like this, when I'm trying to scrape some Cyrillic websites.
Script
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]#content',
metaDescription: 'meta[name="description"]#content',
ogImage: 'meta[property="og:image"]#content',
twitterImage: 'meta[name="name="twitter:image:src""]#content',
metaImage: 'meta[name="image"]#content',
headImage: 'head img#src',
contentImage_1: '.content img#src',
contentImage_2: '.image img#src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
Example of response with incorrect encoding
firstData { name: [ '(Rock, Pop) [15LP] [24/96] Queen - Studio Collection - 2015,
FLAC (tracks) :: RuTracker.org' ],
description:
[ 'RuTracker.org » ���������� ��� (����������� ���������) »
������� ������� (Rock, Pop) [15LP] [24/96] Queen -
Studio Collection - 2015, FLAC (tracks)',
undefined ],
image: [ undefined, undefined, undefined, undefined, undefined, undefined ] }
How do I fix this?
you can use request as x-ray's driver and iconv the body in it like this:
var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}
var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]#content',
metaDescription: 'meta[name="description"]#content',
ogImage: 'meta[property="og:image"]#content',
twitterImage: 'meta[name="name="twitter:image:src""]#content',
metaImage: 'meta[name="image"]#content',
headImage: 'head img#src',
contentImage_1: '.content img#src',
contentImage_2: '.image img#src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);
});

How to upload image in jQuery jTable

I successfully created an upload field in create mode in jQuery jTable as follows:
upload: {
title: 'Upload Image',
input: function (data) {
return '<input type="file" name="file">';
'<input type="submit" value="Submit" id="submitBtn" />';
},
},
I am able to successfully display the browse button and select files. I am unable to submit the form:
Error: newcourse.php' not found or unable to stat.
with the same file name in which the code is.
I am at a dead end. Where will the file be uploaded to? In the same directory?
How to do it in jTable ? Can it be done using ajax ? If so, how to proceed? jTable documentation is very poor.
I finally found a soluton to upload files on jTable
Now with the version of jtable 2.4.0 (the most recent at the moment writing this post)
on your file.js add the following methods:
// Read a page's GET URL variables and return them as an associative array.
function getVars(url)
{
var formData = new FormData();
var split;
$.each(url.split("&"), function(key, value) {
split = value.split("=");
formData.append(split[0], decodeURIComponent(split[1].replace(/\+/g, " ")));
});
return formData;
}
// Variable to store your files
var files;
$( document ).delegate('#input-image','change', prepareUpload);
// Grab the files and set them to our variable
function prepareUpload(event)
{
files = event.target.files;
}
//The actions for your table:
$('#table-container').jtable({
actions: {
listAction: 'backoffice/catalogs/display',
deleteAction: 'backoffice/catalogs/delete',
createAction: function (postData) {
var formData = getVars(postData);
if($('#input-image').val() !== ""){
formData.append("userfile", $('#input-image').get(0).files[0]);
}
return $.Deferred(function ($dfd) {
$.ajax({
url: 'backoffice/catalogs/update',
type: 'POST',
dataType: 'json',
data: formData,
processData: false, // Don't process the files
contentType: false, // Set content type to false as jQuery will tell the server its a query string request
success: function (data) {
$dfd.resolve(data);
$('#table-container').jtable('load');
},
error: function () {
$dfd.reject();
}
});
});
},
updateAction: function (postData) {
var formData = getVars(postData);
if($('#input-image').val() !== ""){
formData.append("userfile", $('#input-image').get(0).files[0]);
}
return $.Deferred(function ($dfd) {
$.ajax({
url: 'backoffice/catalogs/update',
type: 'POST',
dataType: 'json',
data: formData,
processData: false, // Don't process the files
contentType: false, // Set content type to false as jQuery will tell the server its a query string request
success: function (data) {
$dfd.resolve(data);
$('#table-container').jtable('load');
},
error: function () {
$dfd.reject();
}
});
});
},
// Now for the fields:
fields: {
id_catalog: {
key: true,
create: false,
edit: false,
list: false
},
thumb_url: {
title: 'Image',
type: 'file',
create: false,
edit: true,
list: true,
display: function(data){
return '<img src="' + data.record.thumb_url + '" width="150" height="207" class="preview">';
},
input: function(data){
return '<img src="' + data.record.thumb_url + '" width="150" height="207" class="preview">';
},
width: "150",
listClass: "class-row-center"
},
image: {
title: 'Select File',
list: false,
create: true,
input: function(data) {
html = '<input type ="file" id="input-image" name="userfile" accept="image/*" />';
return html;
}
}
}
});
Now, you are able to process the files on your server side.
That's all folks.
Hello shels
I lost a lot of time to searching of a solution of this issue. I read a many articles and found that this solution working for me fine:
actions: {
 listAction: 'modules_data.php?action=list',
deleteAction: 'modules_data.php?action=delete',
updateAction: 'modules_data.php?action=update',
createAction: 'modules_data.php?action=create'
},
...
image_file: {
title: 'Album cover',
list: true,
create: true,
edit: true,
input: function(data) {
return '<form target="iframeTarget" class="formUploadFile" action="file_upload.php" method="post" enctype="multipart/form-data"> <input type="file" onchange="this.form.submit()" name="myFile"/> </form> <iframe class="upload-iframe" style="display: none;" src="#" name="iframeTarget"></iframe>';
}
},
image: {
title: 'Album cover',
list: true,
create: true,
edit: true,
input: function(data) {
html = '<input type ="hidden" id="image" name="image" />';
return html;
}
},
....
How to capture submit response in a form created dynamically?
So you can capture submit event using:
...
formSubmitting: function(event, data) {
filename = $('input[type=file]').val().split('\\').pop();
($("#" + data.form.attr("id")).find('input[name="image"]').val(filename));
},
And save data to the server side script.
I think, it's a good solution and hope will be helpful.
Best Regards
Kameliya
console.log not working for FormData. Use instead
for (var data of formData) {
console.log(data);
}

Resources