Can't get values for a table located inside a frame through Puppeteer - node.js

Below is the error I get when I try to apply the 'table[0].$$eval' method (see code snipped below) :
Failed to execute 'querySelectorAll' on 'Element': '# 297d0e3 > table > tbody > tr:nth-child(1)' is not a valid selector
const puppeteer = require('puppeteer')
const scrape = async () => {
const browser = await puppeteer.launch({headless:false,defaultViewport: null,args: [
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process'
]});
const page = await browser.newPage();
await page.goto('https://dealers.carwow.co.uk/dealers/sign_in')
await page.type('#dealer_email', 'email')
await page.type('#dealer_password', 'password')
await page.click('#new_dealer > p > input')
await new Promise(resolve => setTimeout(resolve, 5000));
let xpathArray = await page.$x('//*[#id="dealer-dashboard"]/div[3]/div/div/a')
await xpathArray[0].click()
await new Promise(resolve => setTimeout(resolve, 5000));
const frameHandle = await page.$x('//*[#id="klipfolio-iframe"]');
await new Promise(resolve => setTimeout(resolve, 5000));
const frame = await frameHandle[0].contentFrame();
await frame.waitForXPath('//*[#id="0297d0e3"]/table');
const table = await frame.$x('//*[#id="0297d0e3"]/table');
console.log(table)
browser.close()
};
The above function returns an array containing a ElementHandle (below) rather than an element.
[
ElementHandle {
_disposed: false,
_context: ExecutionContext {
_client: [CDPSession],
_world: [DOMWorld],
_contextId: 17,
_contextName: ''
},
_client: CDPSession {
eventsMap: [Map],
emitter: [Object],
_callbacks: Map(0) {},
_connection: [Connection],
_targetType: 'page',
_sessionId: '326BCCF50B6BBE8CA175CB21AB46C382'
},
_remoteObject: {
type: 'object',
subtype: 'node',
className: 'HTMLTableElement',
description: 'table.layout-grid',
objectId: '3652992625290954585.17.4'
},
_page: Page {
eventsMap: Map(0) {},
emitter: [Object],
_closed: false,
_timeoutSettings: [TimeoutSettings],
_pageBindings: Map(0) {},
_javascriptEnabled: true,
_workers: Map(0) {},
_fileChooserInterceptors: Set(0) {},
_userDragInterceptionEnabled: false,
_client: [CDPSession],
_target: [Target],
_keyboard: [Keyboard],
_mouse: [Mouse],
_touchscreen: [Touchscreen],
_accessibility: [Accessibility],
_frameManager: [FrameManager],
_emulationManager: [EmulationManager],
_tracing: [Tracing],
_coverage: [Coverage],
_screenshotTaskQueue: [ScreenshotTaskQueue],
_viewport: null
},
_frameManager: FrameManager {
eventsMap: [Map],
emitter: [Object],
_frames: [Map],
_contextIdToContext: [Map],
_isolatedWorlds: [Set],
_client: [CDPSession],
_page: [Page],
_networkManager: [NetworkManager],
_timeoutSettings: [TimeoutSettings],
_mainFrame: [Frame]
}
}
]
I have tried iterating over the array then doing applying the method (see below) to extract data from a table (see picture)
What exactly is an Element handle and how do I solve this issue?
table[0].$$eval('#\30 297d0e3 > table > tbody > tr:nth-child(1)', rows => {
return Array.from(rows, row => {
const columns = row.querySelectorAll('td');
return Array.from(columns, column => column.innerText);
});
});

If you need a selector with a number in its id, try this workaround:
table[0].$$eval('[id="the_number"] > table > tbody > tr:nth-child(1)', rows => {

Related

A non-serializable value was detected in the state: redux toolkit

I am trying to implement Redux toolkit store in a react native application. It is my first time. I am using axios to make requests to the API.
I am getting this error on a post request:
A non-serializable value was detected in the state, in the path: `user`. Value: {"_A": null,
"_x": 0, "_y": 0, "_z": null}
Take a look at the reducer(s) handling this action type: userreg/fulfilled.
Here are my codes:
const initialState={
token: '',
session: '',
loading: false,
}
export const logInUser = createAsyncThunk('userreg', async(name, thunkAPI)=>{
const {user_username, user_password} = name
//const m = JSON.stringify(name)
const res = await axios.post(`${BASE_URL}/login`,
{
user_username, user_password
}
)
//console.log(res.data)
return res.data
})
const userSlice = createSlice({
name: 'user',
initialState,
reducers:{
addSession:(state, action)=>{
state.session = AsyncStorage.getItem('sessionId')
},
addToken:(state, action)=>{
state.token = AsyncStorage.getItem('token')
}
},
extraReducers: (builders)=>{
builders.addCase(logInUser.pending, (state, action)=>{
state.loading = true
})
builders.addCase(logInUser.fulfilled, async(state, {payload})=>{
console.log(payload, 'slode ier')
state.loading = false
state.token = payload.userToken;
state.session = payload.session;
await AsyncStorage.setItem('sessionId', payload.session);
await AsyncStorage.setItem('token', payload.userToken)
})
}
})
export default userSlice.reducer;
const handleUserLog = async ()=>{
const logDetails = {
user_username: userLogin.user_username,
user_password: userLogin.user_password,
}
dispatch(logInUser(logDetails))
}
When I implemented the same code with fetch instead of axios to make the same request to the API, it didn't give this error.

Adding additional spec files to an angular project, not loading/defining correctly?

Caveat: I am not the author of this project. Whoever originally wrote this is no longer with the organization and I am seemingly the most knowledgeable on this topic at this point.
I know a little about javascript and unit tests, so I successfully added one .spec.js file. I tried adding a second one for another module, reusing a lot of the spec setup, and it immediately broke.
Project resources:
Nodejs 12.16.1
jasmine-node-karma: "^1.6.1"
karma: "^6.3.12"
Contents of ./karma.conf.js:
module.exports = function(config) {
config.set({
basePath: './public',
frameworks: ['jasmine', 'jquery-3.2.1'],
files: [
"../node_modules/angular/angular.js",
"../node_modules/angular-mocks/angular-mocks.js",
"../node_modules/bootstrap/dist/js/bootstrap.js",
"../public/**/*.js",
],
exclude: [
],
preprocessors: {
},
client: {
captureConsole: true
},
browserConsoleLogOptions: {
terminal: true,
level: ""
},
reporters: ['progress'],
port: 9876,
colors: true,
logLevel: config.LOG_INFO,
autoWatch: true,
browsers: ['FirefoxHeadless', 'ChromeHeadlessNoSandbox', 'PhantomJS'],
customLaunchers: {
ChromeHeadlessNoSandbox: {
base: 'ChromeHeadless',
flags: ['--no-sandbox']
},
FirefoxHeadless: {
base: 'Firefox',
flags: ['-headless'],
}
},
singleRun: false,
concurrency: Infinity
})
}
Originally I added ./public/controllers.spec.js to match the existing ./public/controllers.js. These unit tests pass and continue to do so.
Yesterday I added ./public/backups/backupcontrollers.spec.js to match ./public/backups/backupcontrollers.js.
Contents of ./public/backups/backupcontrollers.js:
/**
* Angular controller.
*/
'use strict'
const backupApp = angular.module('backup', [])
const backupTypePath = 'elasticsearch'
backupApp.controller('BackupFormController', ['$scope', '$filter', '$http', function ($scope, $filter, $http) {
console.log('Started BackupFormController')
$scope.itemInstances = []
$scope.fetchStatus = 'Ready!'
$scope.processSelection = function (item, backupType = backupTypePath) {
$scope.currentItem = item.metadata.name
$scope.getBackup(backupType)
console.log('currentItem after selecting from dropdown: ' + $scope.currentItem)
}
$scope.init = function (backupType = backupTypePath) {
$scope.refreshItemInstances(backupType)
console.log('currentItem after loading page for first time: ' + $scope.currentItem)
}
$scope.getBackup = function (backupType = backupTypePath) {
const path = `/v1/backup/${backupType}`
$scope.fetchStatus = `Fetching Backups for Item ${$scope.currentItem}...`
console.log(`Fetching backups for item from ${path}`)
$http.get('/api', { headers: { path: path, item: $scope.currentItem } })
.success(function (data, status, headers, config) {
console.log(`Got data from GET on path ${path}, HTTP status ${status}: ${JSON.stringify(data)}`)
if (typeof data === 'string' || data instanceof String) {
$scope.backups = data.split(/\r?\n/)
} else {
$scope.backups = data
}
$scope.fetchStatus = 'Ready!'
console.log('Done fetching backup list for item:' + $scope.currentItem + '!')
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
// Refresh the list of displayed Item instances
$scope.refreshItemInstances = function (backupType = backupTypePath) {
console.log('Fetching list of all items in the system ...')
$scope.fetchStatus = 'Fetching Items ... '
$http.get('/env')
.success(function (data, status, headers, config) {
console.log(data)
for (let i = 0; i < data.length; i++) {
$scope.itemInstances.push(data[i])
}
$scope.currentItem = $scope.itemInstances[0].metadata.name
console.log('Done fetching list of all items!')
console.log('currentItem after fetching list of all items: ' + $scope.currentItem)
$scope.fetchStatus = 'Ready!'
$scope.getBackup(backupType)
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
}])
Contents of ./public/backups/backupcontrollers.spec.js:
describe('BackupFormController', function () {
let $controller, $rootScope, $httpBackend
beforeEach(module('backup'))
const mockBackupString = 'string of backup data'
const mockBackupData = {
body: mockBackupString
}
const mockItemsUnsorted = [
{
metadata: {
name: 'prod-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'dev-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'integ-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
}
]
beforeEach(inject(function ($injector) {
$rootScope = $injector.get('$rootScope')
const $controller = $injector.get('$controller')
$httpBackend = $injector.get('$httpBackend')
const mockEnv = $httpBackend.when('GET', '/env')
.respond(mockItemsUnsorted)
const mockAPI = $httpBackend.when('GET', '/api')
.respond(mockBackupString)
const createController = function () {
return $controller('BackupFormController', { '$scope': $rootScope })
}
}))
describe('$scope.getBackup', function () {
beforeEach(function () {
spyOn(console, 'log')
})
it('should GET /api and set $scope.backups', function () {
controller = createController()
console.log('Dumping fetchStatus: ', $rootScope.fetchStatus)
$rootScope.init()
$httpBackend.flush()
expect($rootScope.backups).toEqual(mockBackupString)
expect(console.log).toHaveBeenCalled()
})
})
})
It seems like this new spec isn't working correctly at all; when I run npm test I see the normal successful tests from ./public/controllers.spec.js but also:
Chrome Headless 105.0.5195.125 (Mac OS 10.15.7) BackupFormController $scope.getBackup should GET /api and set $scope.backups FAILED
ReferenceError: createController is not defined
at UserContext.<anonymous> (backup/backupcontrollers.spec.js:51:7)
at <Jasmine>
This is the only output concerning ./public/backups/backupcontrollers.spec.js.
Has anybody run into this before? I found some posts regarding including angular-mocks, but as you can see in karma.conf.js, it's being included.

puppeteer - scrape all a->innerText

I've this part of webpage that I want to scrape href or innerText
<span class="hash-tag text-truncate"><<test that i want to scrape>></span>
This is my code:
const nodeChildren = await page.$$('.hash-tag', (uiElement) => {
uiElement.map((option) => option.innerText)
});
console.log(nodeChildren);
result is:
_page: Page {
eventsMap: Map(0) {},
emitter: [Object],
_closed: false,
_timeoutSettings: [TimeoutSettings],
_pageBindings: Map(0) {},
_javascriptEnabled: true,
_workers: Map(0) {},
_fileChooserInterceptors: Set(0) {},
_userDragInterceptionEnabled: false,
_handlerMap: [WeakMap],
_client: [CDPSession],
How I can it?
try:
const textAndHrefs = await page.$$eval(".hash-tag a", els =>
els.map(el => ({text: el.innerText, href: el.href})))
try textContent instead of innerText because it's buggy in Puppeteer.
const nodeChildren = await page.$$('.hash-tag', (uiElement) => {
uiElement.map((option) => option.textContent)
});
console.log(nodeChildren);

Return a node js response inside session.withTransaction

I am using session.withTransaction() to execute multiple updates in the mongo db. Please note that promiseArray has multiple Stock.update statements to update stock quantities.
await session.withTransaction(
async () => {
promiseResults = await Promise.all(promiseArray);
for (const result of promiseResults) {
recordCounter++;
if (result.nModified === 1) {
stockItemsNoUpdate.push(goodReturnSummary[recordCounter]);
}
}
if (stockItemsNoUpdate.length > 0) {
return res.status(200).send(response);
}
existingGoodReturnSummary = GoodReturn.build({
_id: sheetId,
goodReturnSummary,
agency,
createdBy,
});
await existingGoodReturnSummary.save({ session: session });
existingGoodReturnSummary = await GoodReturn.calculateTotalGoodReturnAmount(
existingGoodReturnSummary,
session
);
},
{
readPreference: 'primary',
readConcern: { level: 'local' },
writeConcern: { w: 'majority' },
}
);
If stockItemsNoUpdate.length > 0 I need to abort this transaction and send the response. done by below code segment.
if (stockItemsNoUpdate.length > 0) {
return res.status(200).send(response);
}
But I cannot do this because of the below error
Any idea on how to resolve this ??
Cheers
See Nodejs mongodb's Transaction API `withTransaction` always return null and https://jira.mongodb.org/browse/NODE-2014.
https://jira.mongodb.org/browse/NODE-2014?focusedCommentId=2420255&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-2420255 provides a workaround.

Reduce period for web scraping job with puppeter node.js

I have made a job script for web scraping periodically a page and save some information in a MongoDB database. I have tried to get as much performance as i can, and for now i'm able to execute the script each 10s. However, i would like to reduce it even more, with a period between 1-10 seconds if possible. The problem is that when i reduce it, my code throws the following warning and some executions get stacked unresolved:
(node:9472) MaxListenersExceededWarning: Possible EventEmitter memory leak detected. 11 exit listeners added. Use emitter.setMaxListeners() to increase limit
Is there a way to improve the code?
const $ = require('cheerio');
const MarketModel = require('./models/marketModel');
const mongoose = require('mongoose');
const puppeteer = require('puppeteer');
var schedule = require('node-schedule');
const {
Cluster
} = require('puppeteer-cluster');
//Connection to DataBase:
mongoose.connect('mongodb://localhost:27017/Tradheo', {
useNewUrlParser: true
});
mongoose.connection.on('error', error => console.log(error));
mongoose.Promise = global.Promise;
getMarketData = async () => {
console.log("Web scraping to get market data...")
let markets = []
let marketSpain = {
country: 'Spain',
name: 'IBEX 35',
companies: []
}
let marketGermany = {
country: 'Germany',
name: 'DAX',
companies: []
}
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 2,
});
await cluster.task(async ({
page,
data: url
}) => {
await page.goto({
waitUntil: 'domcontentloaded'
});
await page.setRequestInterception(true);
page.on('request', request => {
if (request.resourceType() === 'document') {
request.continue();
} else {
request.abort();
}
});
const html = await page.content();
if (url === 'https://uk.investing.com/equities/spain') {
console.log('Spain data page content loaded');
$("table[class='genTbl closedTbl crossRatesTbl elpTbl elp30'] > tbody > tr", html).each((i, elem) => {
marketSpain.companies.push({
name: $("td[class='bold left noWrap elp plusIconTd'] > a", html).eq(i).html(),
last: $("td", elem).eq(2).text(),
high: $("td", elem).eq(3).text(),
low: $("td", elem).eq(4).text(),
change: $("td", elem).eq(5).text(),
changePerCent: $("td", elem).eq(6).text(),
volume: $("td", elem).eq(7).text(),
time: $("td", elem).eq(8).text(),
purchase: false,
sale: false
});
});
markets.push(marketSpain);
} else {
console.log('Germany data page content loaded');
$("table[class='genTbl closedTbl crossRatesTbl elpTbl elp30'] > tbody > tr", html).each((i, elem) => {
marketGermany.companies.push({
name: $("td[class='bold left noWrap elp plusIconTd'] > a", html).eq(i).html(),
last: $("td", elem).eq(2).text(),
high: $("td", elem).eq(3).text(),
low: $("td", elem).eq(4).text(),
change: $("td", elem).eq(5).text(),
changePerCent: $("td", elem).eq(6).text(),
volume: $("td", elem).eq(7).text(),
time: $("td", elem).eq(8).text(),
purchase: false,
sale: false
});
});
markets.push(marketGermany);
}
if (markets.length === 2) {
MarketModel.create({
markets,
}, (err) => {
if (err) return handleError(err);
})
console.log("Done!")
}
});
cluster.queue(url1);
cluster.queue(url2);
await cluster.idle();
await cluster.close();
}
var j = schedule.scheduleJob('*/10 * 8-17 * * 1-5', function () {
const now = new Date();
//Checks that time is between 8:30 - 17:35 (schedule of the stock exchange)
if (now.getHours() >= 8 && !(now.getHours() == 8 && now.getMinutes() < 30) && now.getHours() <= 17 && !(now.getHours() == 17 && now.getMinutes() > 35)) {
getMarketData();
}
});
UPDATE: I have added some improvements like setting waitUntil property to 'domcontentloaded' and request interception to avoid waiting for images, and any kind of resources apart from html content, to be loaded. However, seems to be insufficient to achieve the goal.

Resources