I'm working on an extension that is supposed to extract information from the DOM based specific classes/tags,etc, then allow the user to save the information as a CSV file.
I'm getting stuck on a couple of places and haven't been able to find answers to questions similar enough.
Where I am tripped up at is:
1) Making sure that the page has completely loaded so the chrome.tabs.query doesn't return null a couple of times before the promise actually succeeds and allows the blocksF to successfully inject. I have tried placing it within a settimeout function but the chrome api doesn't seem to work within such the function.
2) Saving the extracted information so when the user moves onto a new page, the information is still there. I'm not sure if I should use the chrome.storage api call or simply save the information as an array and keep passing it through. It's just text, so I don't believe that it should take up too much space.
Then main function of the background.js is below.
let mainfunc = chrome.tabs.onUpdated.addListener(
async(id, tab) => {
if (buttonOn == true) {
let actTab = await chrome.tabs.query({
active: true,
currentWindow: true,
status: "complete"
}).catch(console.log(console.error()));
if (!actTab) {
console.log("Could not get URL. Turn extension off and on again.");
} else {
console.log("Tab information recieved.")
};
console.log(actTab);
let blocksF = chrome.scripting.executeScript({
target: { tabId: actTab[0]['id'] },
func: createBlocks
})
.catch(console.error)
if (!blocksF) {
console.log("Something went wrong.")
} else {
console.log("Buttons have been created.")
};
/*
Adds listeners and should return value of the works array if the user chose to get the information
*/
let listenersF = chrome.scripting.executeScript({
target: { tabId: actTab[0]['id'] },
func: loadListeners
})
.catch(console.error)
if (!listenersF) {
console.log("Listeners failed to load.")
} else {
console.log("Listeners loaded successfully.")
};
console.log(listenersF)
};
});
Information from the DOM is extracted through an event listener on a div/button that is added. The event listener is added within the loadListeners function.
let workArr = document.getElementById("getInfo").addEventListener("click", () => {
let domAr = Array.from(
document.querySelectorAll(<class 1>, <class 2>),
el => {
return el.textContent
}
);
let newAr = []
for (let i = 0; i < domAr.length; i++) {
if (i % 2 == 0) {
newAr.push([domAr[i], domAr[i + 1]])
}
}
newAr.forEach((work, i) => {
let table = document.getElementById('extTable');
let row = document.createElement("tr");
row.appendChild(document.createElement("td")).textContent = work[0];
row.appendChild(document.createElement("td")).textContent = work[1];
table.appendChild(row);
});
return newAr
I've been stuck on this for a couple of weeks now. Any help would be appreciated. Thank you!
There are several issues.
chrome methods return a Promise in MV3 so you need to await it or chain on it via then.
tabs.onUpdated listener's parameters are different. The second one is a change info which you can check for status instead of polling the active tab, moreover the update may happen while the tab is inactive.
catch(console.log(console.error())) doesn't do anything useful because it immediately calls these two functions so it's equivalent to catch(undefined)
Using return newArr inside a DOM event listener doesn't do anything useful because the caller of this listener is the internal DOM event dispatcher which doesn't use the returned value. Instead, your injected func should return a Promise and call resolve inside the listener when done. This requires Chrome 98 which added support for resolving Promise returned by the injected function.
chrome.tabs.onUpdated.addListener(onTabUpdated);
async function onTabUpdated(tabId, info, tab) {
if (info.status === 'complete' &&
/^https?:\/\/(www\.)?example\.com\//.test(tab.url) &&
await exec(tabId, createBlocks)) {
const [{result}] = await exec(tabId, loadListeners);
console.log(result);
// here you can save it in chrome.storage if necessary
}
}
function exec(tabId, func) {
// console.error returns `undefined` so we don't need try/catch,
// because executeScript is always an array of objects on success
return chrome.scripting.executeScript({target: {tabId}, func})
.catch(console.error);
}
function loadListeners() {
return new Promise(resolve => {
document.getElementById('getInfo').addEventListener('click', () => {
const result = [];
// ...add items to result
resolve(result);
});
});
}
Related
I am trying to get the selected element to the sidebar pane in my chrome extension.
It's working fine if the page has no frames when the element is in the frame, it's not working.
As per the document I have to pass the frameURL, but how do I get the frame or Iframe URL?
Thank you.
Note: This issue is duplicate that was opened in 3 years ago, but still no solution there, so re-opening it again.
In devtools.js
chrome.devtools.panels.elements.createSidebarPane(name, (panel) => {
// listen for the elements changes
function updatePanel() {
chrome.devtools.inspectedWindow.eval("parseDOM($0)", {
frameURL: // how to pass dynamic
useContentScriptContext: true
}, (result, exceptipon) => {
if (result) {
console.log(result)
}
if (exceptipon) {
console.log(exceptipon)
}
});
}
chrome.devtools.panels.elements.onSelectionChanged.addListener(updatePanel);
});
I ran into this as well. I ended up needing to add a content_script on each page/iframe and a background page to help pass messages between devtools and content scripts.
The key bit is that in the devtools page, we should ask the content_scripts to send back what their current url is. For every content script that was registered, we can then call chrome.devtools.inspectedWindow.eval("setSelectedElement($0)", { useContentScriptContext: true, frameURL: msg.iframe } );
Or in full:
chrome.devtools.panels.elements.createSidebarPane( "example", function( sidebar ) {
const port = chrome.extension.connect({ name: "example-name" });
// announce to content scripts that they should message back with their frame urls
port.postMessage( 'SIDEBAR_INIT' );
port.onMessage.addListener(function ( msg) {
if ( msg.iframe ) {
// register with the correct frame url
chrome.devtools.panels.elements.onSelectionChanged.addListener(
() => {
chrome.devtools.inspectedWindow.eval("setSelectedElement($0)", { useContentScriptContext: true, frameURL: msg.iframe } );
}
);
} else {
// otherwise assume other messages from content scripts should update the sidebar
sidebar.setObject( msg );
}
} );
}
);
Then in the content_script, we should only process the event if we notice that the last selected element ($0) is different, since each frame on the page will also handle this.
let lastElement;
function setSelectedElement( element ) {
// if the selected element is the same, let handlers in other iframe contexts handle it instead.
if ( element !== lastElement ) {
lastElement = element;
// Pass back the object we'd like to set on the sidebar
chrome.extension.sendMessage( nextSidebarObject( element ) );
}
}
There's a bit of setup, including manifest changes, so see this PR for a full example:
https://github.com/gwwar/z-context/pull/21
You can found url of the frame this way:
document.querySelectorAll('iframe')[0].src
Assuming there is at lease one iframe.
Please note, you cannot use useContentScriptContext: true, as it will make the script execute as a context page (per documentation) and it will be in a separate sandboxed environment.
I had a slightly different problem, but it might be helpful for your case too, I was dynamically inserting an iframe to a page, and then tried to eval a script in it. Here the code that worked:
let win = chrome.devtools.inspectedWindow
let code = `
(function () {
let doc = window.document
let insertFrm = doc.createElement('IFRAME')
insertFrm.src = 'about:runner'
body.appendChild(insertFrm)
})()`
win.eval(code, function (result, error) {
if (error) {
console.log('Eror in insertFrame(), result:', result)
console.error(error)
} else {
let code = `
(function () {
let doc = window.document
let sc = doc.createElement('script')
sc.src = '${chrome.runtime.getURL('views/index.js')}'
doc.head.appendChild(sc)
})()`
win.eval(code, { frameURL: 'about:bela-runner' }, function (result, error) {
if (error) {
console.log('Eror in insertFrame(), result:', result)
console.error(error)
}
})
}
})
I'm working on a proxy that caches files and I'm trying to add some logic that prevents multiple clients from downloading the same files before the proxy has a chance to cache them.
Basically, the logic I'm trying to implement is the following:
Client 1 requests a file. The proxy checks if the file is cached. If it's not, it requests it from the server, caches it, then sends it to the client.
Client 2 requests the same file after client 1 requested it, but before the proxy has a chance to cache it. So the proxy will tell client 2 to wait a few seconds because there is already a download in progress.
A better approach would probably be to give client 2 a "try again later" message, but let's just say that's currently not an option.
I'm using Nodejs with the anyproxy library. According to the documentation, delayed responses are possible by using promises.
However, I don't really see a way to achieve what I want using Promises. From what I can tell, I could do something like this:
module.exports = {
*beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
return new Promise((resolve, reject) => {
setTimeout(() => { // delay
resolve({ response: responseDetail.response });
}, 10000);
});
}
}
};
But that would mean simply waiting for a maximum amount of time and hoping the download finishes by then.
And I don't want that.
I would prefer to be able to do something like this (but with Promises, somehow):
module.exports = {
*beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
var i = 0;
for(i = 0 ; i < 10 ; i++) {
JustSleep(1000);
if(!thereIsADownloadInProgressFor(requestDetail.url))
return { response: responseDetail.response };
}
}
}
};
Is there any way I can achieve this with Promises in Nodejs?
Thanks!
You can use a Map to cache your file downloads.
The mapping in Map would be url -> Promise { file }
// Map { url => Promise { file } }
const cache = new Map()
const thereIsADownloadInProgressFor = url => cache.has(url)
const getCachedFilePromise = url => cache.get(url)
const downloadFile = async url => {/* download file code here */}
const setAndReturnCachedFilePromise = url => {
const filePromise = downloadFile(url)
cache.set(url, filePromise)
return filePromise
}
module.exports = {
beforeSendRequest(requestDetail) {
if(thereIsADownloadInProgressFor(requestDetail.url)) {
return getCachedFilePromise(requestDetail.url).then(file => ({ response: file }))
} else {
return setAndReturnCachedFilePromise(requestDetail.url).then(file => ({ response: file }))
}
}
};
You don't need to send a try again response, simply serve the same data to both requests. All you need to do is store the requests somewhere in the caching system and trigger all of them when the fetching is done.
Here's a cache implementation that does only a single fetch for multiple requests. No delays and no try-laters:
export class class Cache {
constructor() {
this.resultCache = {}; // this object is the cache storage
}
async get(key, cachedFunction) {
let cached = this.resultCache[key];
if (cached === undefined) { // No cache so fetch data
this.resultCache[key] = {
pending: [] // This is the magic, store further
// requests in this pending array.
// This way pending requests are directly
// linked to this cache data
}
try {
let result = await cachedFunction(); // Wait for result
// Once we get result we need to resolve all pending
// promises. Loop through the pending array and
// resolve them. See code below for how we store pending
// requests.. it will make sense:
this.resultCache[key].pending
.forEach(waiter => waiter.resolve(result));
// Store the result of the cache so later we don't
// have to fetch it again:
this.resultCache[key] = {
data: result
}
// Return result to original promise:
return result;
// Note: yes, this means pending promises will get triggered
// before the original promise is resolved but normally
// this does not matter. You will need to modify the
// logic if you want promises to resolve in original order
}
catch (err) { // Error when fetching result
// We still need to trigger all pending promises to tell
// them about the error. Only we reject them instead of
// resolving them:
if (this.resultCache[key]) {
this.resultCache[key].pending
.forEach((waiter: any) => waiter.reject(err));
}
throw err;
}
}
else if (cached.data === undefined && cached.pending !== undefined) {
// Here's the condition where there was a previous request for
// the same data. Instead of fetching the data again we store
// this request in the existing pending array.
let wait = new Promise((resolve, reject) => {
// This is the "waiter" object above. It is basically
// It is basically the resolve and reject functions
// of this promise:
cached.pending.push({
resolve: resolve,
reject: reject
});
});
return await wait; // await response form original request.
// The code above will cause this to return.
}
else {
// Return cached data as normal
return cached.data;
}
}
}
The code may look a bit complicated but it is actually quite simple. First we need a way to store the cached data. Normally I'd just use a regular object for this:
{ key : result }
Where the cached data is stored in the result. But we also need to store additional metadata such as pending requests for the same result. So we need to modify our cache storage:
{ key : {
data: result,
pending: [ array of requests ]
}
}
All this is invisible and transparent to code using this Cache class.
Usage:
const cache = new Cache();
// Illustrated with w3c fetch API but you may use anything:
cache.get( URL , () => fetch(URL) )
Note that wrapping the fetch in an anonymous function is important because we want the Cache.get() function to conditionally call the fetch to avoid multiple fetch being called. It also gives the Cache class flexibility to handle any kind of asynchronous operation.
Here's another example for caching a setTimeout. It's not very useful but it illustrates the flexibility of the API:
cache.get( 'example' , () => {
return new Promise((resolve, reject) => {
setTimeout(resolve, 1000);
});
});
Note that the Cache class above does not have any invalidations or expiry logic for the sake of clarity but it's fairly easy to add them. For example if you want the cache to expire after some time you can just store the timestamp along with the other cache data:
{ key : {
data: result,
timestamp: timestamp,
pending: [ array of requests ]
}
}
Then in the "no-cache" logic simply detect the expiry time:
if (cached === undefined || (cached.timestamp + timeout) < now) ...
I am trying to:
Poll a public API every 5 seconds
Store the resulting JSON in a variable
Store the next query to this same API in a second variable
Compare the first variable to the second
Print the second variable if it is different from the first
Else: Print the phrase: 'The objects are the same' if they haven't changed
Unfortunately, the comparison part appears to fail. I am realizing that this implementation is probably lacking the appropriate variable scoping but I can't put my finger on it. Any advice would be highly appreciated.
data: {
chatters: {
viewers: {
},
},
},
};
//prints out pretty JSON
function prettyJSON(obj) {
console.log(JSON.stringify(obj, null, 2));
}
// Gets Users from Twitch API endpoint via axios request
const getUsers = async () => {
try {
return await axios.get("http://tmi.twitch.tv/group/user/sixteenbitninja/chatters");
} catch (error) {
console.error(error);
}
};
//Intended to display
const displayViewers = async (previousResponse) => {
const usersInChannel = await getUsers();
if (usersInChannel.data.chatters.viewers === previousResponse){
console.log("The objects are the same");
} else {
if (usersInChannel.data.chatters) {
prettyJSON(usersInChannel.data.chatters.viewers);
const previousResponse = usersInChannel.data.chatters.viewers;
console.log(previousResponse);
intervalFunction(previousResponse);
}
}
};
// polls display function every 5 seconds
const interval = setInterval(function () {
// Calls Display Function
displayViewers()
}, 5000);```
The issue is that you are using equality operator === on objects. two objects are equal if they have the same reference. While you want to know if they are identical. Check this:
console.log({} === {})
For your usecase you might want to store stringified version of the previousResponse and compare it with stringified version of the new object (usersInChannel.data.chatters.viewers) like:
console.log(JSON.stringify({}) === JSON.stringify({}))
Note: There can be issues with this approach too, if the order of property changes in the response. In which case, you'd have to check individual properties within the response objects.
May be you can use npm packages like following
https://www.npmjs.com/package/#radarlabs/api-diff
I'm writing a Windows Node.js server app (using ES6 btw).
The first thing I want to do - in the top-level code - is sit in a while loop, calling an async function which searches for a particular registry key/value. This function is 'proven' - it returns the value data if found, or else throws:
async GetRegValue(): Promise<string> { ... }
I need to sit in a while loop until the registry item exists, and then grab the value data. (With a delay between retries).
I think I know how to wait for an async call to complete (one way or the other) before progressing with the rest of the start-up, but I can't figure out how to sit in a loop waiting for it to succeed.
Any advice please on how to achieve this?
(I'm fairly new to typescript, and still struggling to get my head round all async/await scenarios!)
Thanks
EDIT
Thanks guys. I know I was 'vague' about my code - I didn't want to put my real/psuedo code attempts, since they have all probably overlooked the points you can hopefully help me understand.
So I just kept it as a textual description... I'll try though:
async GetRegValue(): Promise<string> {
const val: RegistryItem = await this.GetKeyValue(this.KEY_SW, this.VAL_CONN);
return val.value
}
private async GetKeyValue(key: string, name: string): Promise<RegistryItem> {
return await new Promise((resolve, reject) => {
new this.Registry({
hive: this.Hive, key
}).get(name, (err, items) => {
if (err) {
reject(new Error('Registry get failed'));
}
else {
resolve( items );
}
});
})
.catch(err => { throw err });
}
So I want to do something like:
let keyObtained = false
let val
while (keyObtained == false)
{
// Call GetRegValue until val returned, in which case break from loop
// If exception then pause (e.g. ~100ms), then loop again
}
}
// Don't execute here till while loop has exited
// Then use 'val' for the subsequent statements
As I say, GetRegValue() works fine in other places I use it, but here I'm trying to pause further execution (and retry) until it does come back with a value
You can probably just use recursion. Here is an example on how you can keep calling the GetRegValue function until is resolves using the retryReg function below.
If the catch case is hit, it will just call GetRegValue over and over until it resolves successfully.
you should add a counter in the catch() where if you tried x amount of times you give up.
Keep in mind I mocked the whole GetRegValue function, but given what you stated this would still work for you.
let test = 0;
function GetRegValue() {
return new Promise((resolve, reject) => {
setTimeout(function() {
test++;
if (test === 4) {
return resolve({
reg: "reg value"
});
}
reject({
msg: "not ready"
});
}, 1000);
});
}
function retryReg() {
GetRegValue()
.then(registryObj => {
console.log(`got registry obj: ${JSON.stringify(registryObj)}`)
})
.catch(fail => {
console.log(`registry object is not ready: ${JSON.stringify(fail)}`);
retryReg();
});
}
retryReg();
I don't see why you need this line:
.catch(err => { throw err });
The loop condition of while isn't much use in this case, as you don't really need a state variable or expression to determine if the loop should continue:
let val;
while (true)
{
try {
val = await GetRegValue(/* args */);
break;
} catch (x) {
console.log(x); // or something better
}
await delay(100);
}
If the assignment to val succeeds, we make it to the break; statement and so we leave the loop successfully. Otherwise we jump to the catch block and log the error, wait 100 ms and try again.
It might be better to use a for loop and so set a sensible limit on how many times to retry.
Note that delay is available in an npm package of the same name. It's roughly the same as:
await new Promise(res => setTimeout(res, 100));
When I trigger a .click() event in a non-headless mode in puppeteer, nothing happens, not even an error.. "non-headless mode so i could visually monitor what is being clicked"
const scraper = {
test: async () => {
let browser, page;
try {
browser = await puppeteer.launch({
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox"]
});
page = await browser.newPage();
} catch (err) {
console.log(err);
}
try {
await page.goto("https://www.betking.com/sports/s/eventOdds/1-840-841-0-0,1-1107-1108-0-0,1-835-3775-0-0,", {
waitUntil: "domcontentloaded"
});
console.log("scraping, wait...");
} catch (err) {
console.log(err);
}
console.log("waiting....");
try {
await page.waitFor('.eventsWrapper');
} catch (err) {
console.log(err, err.response);
}
try {
let oddsListData = await page.evaluate(async () => {
let regionAreaContainer = document.querySelectorAll('.areaContainer.region .regionGroup > .regionAreas > div:first-child > .area:nth-child(5)');
regionAreaContainer = Array.prototype.slice.call(regionAreaContainer);
let t = []; //Used to monitor the element being clicked
regionAreaContainer.forEach(async (region) => {
let dat = await region.querySelector('div');
dat.innerHTML === "GG/NG" ? t.push(dat.innerHTML) : false; //Used to confirm that the right element is being clicked
dat.innerHTML === "GG/NG" ? dat.click() : false;
})
return t;
})
console.log(oddsListData);
} catch (err) {
console.log(err);
}
}
}
I expect it to click the specified button and load in some dynamic data on the page.
In Chrome's console, I get the error
Transition Rejection($id: 1 type: 2, message: The transition has been superseded by a different transition, detail: Transition#3( 'sportsMultipleEvents'{"eventMarketIds":"1-840-841-0-0,1-1107-1108-0-0,1-835-3775-0-0,"} -> 'sportsMultipleEvents'{"eventMarketIds":"1-840-841-0-0,1-1107-1108-0-0,1-835-3775-535-14,"} ))
Problem
Behaving non-human-like by executing code like element.click() (inside the page context) or element.value = '..' (see this answer for a similar problem) seems to be problematic for Angular applications. You want to try to behave more human-like by using puppeteer functions like page.click() as they simulate a "real" mouse click instead of just triggering the element's click event.
In addition the page seems to rebuild parts of the page whenever one of the items is clicked. Therefore, you need to execute the selector again after each click.
Code sample
To behave more human-like and requery the elements after each click you can change the latter part of your code to something like this:
let list = await page.$x("//div[div/text() = 'GG/NG']");
for (let i = 0; i < list.length; i++) {
await list[i].click();
// give the page some time and then query the selectors again
await page.waitFor(500);
list = await page.$x("//div[div/text() = 'GG/NG']");
}
This code uses an XPath expression to query the div elements which contain another div element with the given text. After that, a click is simulated on the element and then the contents of the page are queried another time to respect the change of the DOM elements.
Here might be a less confusing way to click those:
for(var div of document.querySelectorAll('div')){
if(div.innerHTML === 'GG/NG') div.click()
}