I would like to extract the hyperlink from a webpage by using queryselector all, but there are no results coming out.
Below is my code.
Sub ScrapLink()
Application.ScreenUpdating = False
Dim IE As New InternetExplorer, html As HTMLDocument
Dim x As Long
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.Navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .ReadyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 1)
DoEvents
With .Document.getElementById("bm_ann_detail_iframe").contentDocument
Dim links As Object, i As Long
Set links = .Document.querySelectorAll("p.att_download_pdf[href^='/FileAccess/apbursaweb/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
Range("A" & Rows.Count).End(xlUp).Offset(1).Value = links.Item(i - 1)
End With
Next i
.Quit
End With
End With
End Sub
You could just avoid the initial page and use the URL direct from the frame. This would be my preference unless you don't know, for some reason, this URL.
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, nodeList As Object, i As Long
With IE
.Visible = True
.navigate2 "http://disclosure.bursamalaysia.com/FileAccess/viewHtml?e=2906127"
While .Busy Or .readyState < 4: DoEvents: Wend
Set nodeList = .document.querySelectorAll(".att_download_pdf [href^='/FileAccess/apbursaweb/download']")
For i = 0 To nodeList.Length - 1
Debug.Print nodeList.item(i).href
Next
.Quit
End With
End Sub
Or you can jump right on over to the iframe src after page load:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, nodeList As Object, i As Long
With IE
.Visible = True
.Navigate2 "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .readyState < 4: DoEvents: Wend
.Navigate2 .document.querySelector("iframe").src
While .Busy Or .readyState < 4: DoEvents: Wend
Set nodeList = .document.querySelectorAll(".att_download_pdf [href^='/FileAccess/apbursaweb/download']")
For i = 0 To nodeList.Length - 1
Debug.Print nodeList.item(i).href
Next
.Quit
End With
End Sub
Try the following. It should fetch you the links you wish to grab:
Sub ScrapLink()
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim frame As Object, i As Long
With IE
.Visible = True
.navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .readyState < 4: DoEvents: Wend
Set Html = .document
End With
Application.Wait Now + TimeValue("00:00:03") 'This delay may vary in your case
Set frame = Html.getElementById("bm_ann_detail_iframe").contentWindow.document
With frame.querySelectorAll("p.att_download_pdf a")
For i = 0 To .Length - 1
Cells(i + 1, 1) = .item(i).getAttribute("href")
Next i
End With
End Sub
If you wish to kick out the delay then try changing the portion below with the above one:
Do: Set frame = Html.getElementById("bm_ann_detail_iframe"): DoEvents: Loop While frame Is Nothing
With frame.contentWindow.document.querySelectorAll("p.att_download_pdf a")
For i = 0 To .Length - 1
Cells(i + 1, 1) = .item(i).getAttribute("href")
Next i
End With
Related
Url changes dynamically
For Example
https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page=2
https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page=3
into same excel sheet
Sub GetJobTitles()
Const Url$ = "https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA"
Dim post As Object, R&
With CreateObject("InternetExplorer.Application")
.Visible = True
.navigate Url
While .Busy Or .readyState < 4: DoEvents: Wend
For Each post In .document.getElementsByTagName("article")
R = R + 1: Cells(R, 1) = post.getElementsByClassName("just_job_title")(0).innerText
Cells(R, 2) = post.getElementsByClassName("name")(0).innerText
Cells(R, 3) = post.getElementsByClassName("location")(0).innerText
Next post
.Quit
End With
End Sub
The following should work. Give it a shot:
Sub GetJobInfo()
Const URL$ = "https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page="
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim post As Object, elem$, R&, I&: I = 1
Do
With IE
.Visible = True
.navigate URL & I
While .Busy Or .readyState < 4: DoEvents: Wend
On Error Resume Next
elem = .document.getElementsByTagName("article")(0).innerText
On Error GoTo 0
If elem = "" Then Exit Do
For Each post In .document.getElementsByTagName("article")
R = R + 1: Cells(R, 1) = post.getElementsByClassName("just_job_title")(0).innerText
Cells(R, 2) = post.getElementsByClassName("name")(0).innerText
Cells(R, 3) = post.getElementsByClassName("location")(0).innerText
Next post
End With
I = I + 1
elem = ""
Application.Wait Now + TimeValue("00:00:05")
Loop
IE.Quit
End Sub
I tried the code below. Most of the execution time was stuck in on error goto 0 and no success in further steps .fireevent ("onchange"). Is there some way I can optimize the process better?
Public Sub makeselections()
Dim ie As New InternetExplorer, var As String, ele As Object
var = ThisWorkbook.Worksheets("Sheet2").Cells(1, 1).value
With ie
.Visible = True
.Navigate2 "https://www.marketwatch.com/investing/stock/" & var & "/financials"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#autocomplete_input").value = var
.querySelector("#investing_ac_button").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
Do
On Error Resume Next
Set ele = .querySelector("[value^='/investing/stock/" & LCase(var) & "/financials/Income/quarter']")
On Error GoTo 0
Loop While ele Is Nothing
.querySelector("[value^='/investing/stock/" & LCase(var) & "/financials/Income/quarter']").Selected = True
.querySelector(".financials select").FireEvent "onchange"
End With
End With
End Sub
It is presumably stuck as ele remains Nothing i.e. ticker is not found or at least that href value isn't found. Use a timed loop to allow for exit
Option Explicit
Public Sub MakeSelections()
Dim ie As New InternetExplorer, var As String, ele As Object, t As Date
Const MAX_WAIT_SEC As Long = 10
var = ThisWorkbook.Worksheets("Sheet2").Cells(1, 1).Value
With ie
.Visible = True
.Navigate2 "https://www.marketwatch.com/investing/stock/" & var & "/financials"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#autocomplete_input").Value = var
.querySelector("#investing_ac_button").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
t = Timer
Do
On Error Resume Next
Set ele = .querySelector("[value$='quarter']")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
ele.Selected = True
.querySelector(".financials select").FireEvent "onchange"
End With
End With
End Sub
For some reason I am not able to access elements inside iframe.
I have tried:
ie.Document.getElementById("sisaltosivu").contentDocument.getElementById("uusihaku")(0).Click
Here is my code:
Sub ChechAutomate()
Dim ie As New InternetExplorer, url As String, ws As Worksheet
Set ws = ThisWorkbook.Sheets("Other Data")
url = "https://www.asiakastieto.fi/data/atdb"
With ie
.Visible = True
.Navigate2 url
While .Busy Or .ReadyState < 4: DoEvents: Wend
With .Document
If .querySelectorAll("#btnb").Length > 0 Then
Else
.querySelector("[name=user1]").Value = "x"
.querySelector("[name=user2]").Value = "x"
.querySelector("[name=passwd]").Value = "x"
.querySelector("[class=btnb]").Click
Application.Wait (Now + TimeValue("00:00:01"))
ie.Navigate2 "https://www.asiakastieto.fi/sopimusasiakas/kotimaa?lang=FI"
Application.Wait (Now + TimeValue("00:00:01"))
ie.Document.getElementById("sisaltosivu").contentDocument.getElementById("uusihaku")(0).Click
End If
End With
End With
End Sub
You are no longer outside the iframe when you navigate to the src of the iframe. This line:
ie.Navigate2 "https://www.asiakastieto.fi/sopimusasiakas/kotimaa?lang=FI"
should mean you are now in the document referenced within the iframe and should access direct with:
.document.getElementById("uusihaku")(0).Click
I've created a vba script to parse some links out of a table from a webpage. The ID I've used within my script to reach the table is dynamic and I like to stick to this way. The script is doing fine at this moment if I go for my first approach. But, I do not want to use this same line Html.querySelectorAll("#DataTables_Table_0 tbody tr") twice so I rectified my first script to be like the second one.
But the second approach spits out this error Invalid use of Null pointing at this line For i = 0 To elem.Length - 1 whereas my first script is still using the length to get the content.
My current approach: (working one)
Sub GetCardLinks()
Const timeLimit& = 10
Const baseUrl = "https://www.psacard.com"
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim R&, T As Double, elem As Object
With IE
.Visible = True
.navigate "https://www.psacard.com/psasetregistry/baseball/company-sets/16"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .document
T = Timer
Do
Set elem = Html.querySelectorAll("#DataTables_Table_0 tbody tr")
If Timer - T > timeLimit Then Exit Do
DoEvents
Loop While elem.Length = 0
With Html.querySelectorAll("#DataTables_Table_0 tbody tr")
For i = 0 To .Length - 1
R = R + 1: Sheets(1).Cells(R, 1) = .Item(i).querySelector("td a[href^='/psasetregistry/baseball/company-sets/']").getAttribute("href")
Next i
End With
.Quit
End With
End Sub
My second approach: (I do not want to use this Html.querySelectorAll("#DataTables_Table_0 tbody tr") twice)
Sub GetCardLinks()
Const baseUrl = "https://www.psacard.com"
Const timeLimit& = 10
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim R&, T As Date, elem As Object
With IE
.Visible = False
.navigate "https://www.psacard.com/psasetregistry/baseball/company-sets/16"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .document
T = Timer
Do
Set elem = Html.querySelectorAll("#DataTables_Table_0 tbody tr")
If Timer - T > timeLimit Then Exit Do
DoEvents
Loop While elem.Length = 0
For i = 0 To elem.Length - 1
R = R + 1: ThisWorkbook.Sheets(1).Cells(R, 1) = elem.Item(i).querySelector("td a[href^='/psasetregistry/baseball/company-sets/']").getAttribute("href")
Next i
.Quit
End With
End Sub
What possible change should I bring about to make my second approach work?
Don't set to html variable. Work off the .document
Option Explicit
Sub GetCardLinks()
Const baseUrl = "https://www.psacard.com"
Const timeLimit& = 10
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim R&, T As Date, elem As Object, i As Long
With IE
.Visible = True
.navigate "https://www.psacard.com/psasetregistry/baseball/company-sets/16"
While .Busy = True Or .readyState < 4: DoEvents: Wend
T = Timer
Do
Set elem = .document.querySelectorAll("#DataTables_Table_0 tbody tr")
If Timer - T > timeLimit Then Exit Do
DoEvents
Loop While elem.Length = 0
For i = 0 To elem.Length - 1
R = R + 1: ThisWorkbook.Sheets(1).Cells(R, 1) = elem.item(i).querySelector("td a[href^='/psasetregistry/baseball/company-sets/']").getAttribute("href")
Next i
.Quit
End With
End Sub
I have the following in order to get data from webpage and I arrange them per column in a worksheet. I take one of the data which is a URL and after I put them in cells I want to navigate again to that page and get my last info.
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer: Set ie = New InternetExplorer
Dim i As Long
Const MAX_WAIT_SEC As Long = 20
With ie
.Visible = True
.Navigate2 "https://www.skroutz.gr/s/8988836/Mattel-Hot-Wheels-%CE%91%CF%85%CF%84%CE%BF%CE%BA%CE%B9%CE%BD%CE%B7%CF%84%CE%AC%CE%BA%CE%B9%CE%B1-%CE%A3%CE%B5%CF%84-%CF%84%CF%89%CE%BD-10.html"
While .Busy Or .ReadyState < 4: DoEvents: Wend
Dim finalPrices As Object, sellers As Object, availability As Object
Dim products As Object, t As Date
Set products = .Document.querySelectorAll(".card.js-product-card")
t = Timer
Do
DoEvents
ie.Document.parentWindow.execScript "window.scrollBy(0, window.innerHeight);", "javascript"
Set finalPrices = .Document.querySelectorAll(".card.js-product-card span.final-price")
Application.Wait Now + TimeSerial(0, 0, 1)
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop Until finalPrices.Length = products.Length
Set sellers = .Document.querySelectorAll(".card.js-product-card .shop.cf a[title]")
Set availability = .Document.querySelectorAll(".card.js-product-card span.availability")
With ThisWorkbook.Worksheets("TESTINGS")
For i = 0 To 5
If availability.Item(i).innerText = "Άμεσα Διαθέσιμο σε 1 έως 3 ημέρες" Then
.Cells(2, i + 4) = sellers.Item(i)
.Cells(3, i + 4) = finalPrices.Item(i).innerText
.Cells(4, i + 4) = availability.Item(i).innerText
End If
Next
.Columns("D:I").AutoFit
ie.Quit
'Do While ie.Busy Or Not ie.ReadyState = READYSTATE_COMPLETE
'DoEvents
'Loop
'Dim place As Object, mylink As String
'For i = 0 To 5
' ie.Visible = True
' mylink = .Cells(2, i + 4).Value
' If mylink <> "" Then
' ie.Navigate2 mylink
' Set place = ie.Document.querySelector(".shop-stores.cf")
' .Cells(5, i + 4) = place.innerText
' End If
'Next
End With
End With
End Sub
If I add the following which checks the cell with a URL, and if it has value inside then opens the URL gets the value and finish then I get Automation Error
Do While ie.Busy Or Not ie.ReadyState = READYSTATE_COMPLETE
DoEvents
Loop
Dim place As Object, mylink As String
For i = 0 To 5
ie.Visible = True
mylink = .Cells(2, i + 4).Value
If mylink <> "" Then
ie.Navigate2 mylink
Set place = ie.Document.querySelector(".shop-stores.cf")
.Cells(5, i + 4) = place.innerText
End If
Next
Then I get an Automation error on the following line
Set place = ie.Document.querySelector(".shop-stores.cf")
Can't go from one page to another?
Do I have to create a different sub and call it from GetInfo() sub?
You need a proper wait after each navigate2 and make your ie object visible outside of all loops e.g. structure. You could add in a timed loop for setting of place variable. I have added a safeguard test of If Not Is Nothing. Have your .Quit at the end after you have finished with the ie object.
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, i As Long
Const MAX_WAIT_SEC As Long = 20
With ie
.Visible = True
.Navigate2 "https://www.skroutz.gr/s/8988836/Mattel-Hot-Wheels-%CE%91%CF%85%CF%84%CE%BF%CE%BA%CE%B9%CE%BD%CE%B7%CF%84%CE%AC%CE%BA%CE%B9%CE%B1-%CE%A3%CE%B5%CF%84-%CF%84%CF%89%CE%BD-10.html"
While .Busy Or .readyState < 4: DoEvents: Wend
'code with first link
Dim place As Object, mylink As String
For i = 0 To 5
mylink = ActiveSheet.Cells(2, i + 4).Value
If mylink <> vbNullString Then
.Navigate2 mylink
While .Busy Or .readyState < 4: DoEvents: Wend
On Error Resume Next
Set place = .document.querySelector(".shop-stores.cf")
On Error GoTo 0
If Not place Is Nothing Then
ActiveSheet.Cells(5, i + 4) = place.innerText
Set place = Nothing
End If
End If
Next
.Quit
End With
End Sub