Cannot navigate from an already loaded webpage to another with vba - excel

I have the following in order to get data from webpage and I arrange them per column in a worksheet. I take one of the data which is a URL and after I put them in cells I want to navigate again to that page and get my last info.
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer: Set ie = New InternetExplorer
Dim i As Long
Const MAX_WAIT_SEC As Long = 20
With ie
.Visible = True
.Navigate2 "https://www.skroutz.gr/s/8988836/Mattel-Hot-Wheels-%CE%91%CF%85%CF%84%CE%BF%CE%BA%CE%B9%CE%BD%CE%B7%CF%84%CE%AC%CE%BA%CE%B9%CE%B1-%CE%A3%CE%B5%CF%84-%CF%84%CF%89%CE%BD-10.html"
While .Busy Or .ReadyState < 4: DoEvents: Wend
Dim finalPrices As Object, sellers As Object, availability As Object
Dim products As Object, t As Date
Set products = .Document.querySelectorAll(".card.js-product-card")
t = Timer
Do
DoEvents
ie.Document.parentWindow.execScript "window.scrollBy(0, window.innerHeight);", "javascript"
Set finalPrices = .Document.querySelectorAll(".card.js-product-card span.final-price")
Application.Wait Now + TimeSerial(0, 0, 1)
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop Until finalPrices.Length = products.Length
Set sellers = .Document.querySelectorAll(".card.js-product-card .shop.cf a[title]")
Set availability = .Document.querySelectorAll(".card.js-product-card span.availability")
With ThisWorkbook.Worksheets("TESTINGS")
For i = 0 To 5
If availability.Item(i).innerText = "Άμεσα Διαθέσιμο σε 1 έως 3 ημέρες" Then
.Cells(2, i + 4) = sellers.Item(i)
.Cells(3, i + 4) = finalPrices.Item(i).innerText
.Cells(4, i + 4) = availability.Item(i).innerText
End If
Next
.Columns("D:I").AutoFit
ie.Quit
'Do While ie.Busy Or Not ie.ReadyState = READYSTATE_COMPLETE
'DoEvents
'Loop
'Dim place As Object, mylink As String
'For i = 0 To 5
' ie.Visible = True
' mylink = .Cells(2, i + 4).Value
' If mylink <> "" Then
' ie.Navigate2 mylink
' Set place = ie.Document.querySelector(".shop-stores.cf")
' .Cells(5, i + 4) = place.innerText
' End If
'Next
End With
End With
End Sub
If I add the following which checks the cell with a URL, and if it has value inside then opens the URL gets the value and finish then I get Automation Error
Do While ie.Busy Or Not ie.ReadyState = READYSTATE_COMPLETE
DoEvents
Loop
Dim place As Object, mylink As String
For i = 0 To 5
ie.Visible = True
mylink = .Cells(2, i + 4).Value
If mylink <> "" Then
ie.Navigate2 mylink
Set place = ie.Document.querySelector(".shop-stores.cf")
.Cells(5, i + 4) = place.innerText
End If
Next
Then I get an Automation error on the following line
Set place = ie.Document.querySelector(".shop-stores.cf")
Can't go from one page to another?
Do I have to create a different sub and call it from GetInfo() sub?

You need a proper wait after each navigate2 and make your ie object visible outside of all loops e.g. structure. You could add in a timed loop for setting of place variable. I have added a safeguard test of If Not Is Nothing. Have your .Quit at the end after you have finished with the ie object.
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, i As Long
Const MAX_WAIT_SEC As Long = 20
With ie
.Visible = True
.Navigate2 "https://www.skroutz.gr/s/8988836/Mattel-Hot-Wheels-%CE%91%CF%85%CF%84%CE%BF%CE%BA%CE%B9%CE%BD%CE%B7%CF%84%CE%AC%CE%BA%CE%B9%CE%B1-%CE%A3%CE%B5%CF%84-%CF%84%CF%89%CE%BD-10.html"
While .Busy Or .readyState < 4: DoEvents: Wend
'code with first link
Dim place As Object, mylink As String
For i = 0 To 5
mylink = ActiveSheet.Cells(2, i + 4).Value
If mylink <> vbNullString Then
.Navigate2 mylink
While .Busy Or .readyState < 4: DoEvents: Wend
On Error Resume Next
Set place = .document.querySelector(".shop-stores.cf")
On Error GoTo 0
If Not place Is Nothing Then
ActiveSheet.Cells(5, i + 4) = place.innerText
Set place = Nothing
End If
End If
Next
.Quit
End With
End Sub

Related

Url changes dynamically and need to extract the data into same excel sheet.using VBA

Url changes dynamically
For Example
https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page=2
https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page=3
into same excel sheet
Sub GetJobTitles()
Const Url$ = "https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA"
Dim post As Object, R&
With CreateObject("InternetExplorer.Application")
.Visible = True
.navigate Url
While .Busy Or .readyState < 4: DoEvents: Wend
For Each post In .document.getElementsByTagName("article")
R = R + 1: Cells(R, 1) = post.getElementsByClassName("just_job_title")(0).innerText
Cells(R, 2) = post.getElementsByClassName("name")(0).innerText
Cells(R, 3) = post.getElementsByClassName("location")(0).innerText
Next post
.Quit
End With
End Sub
The following should work. Give it a shot:
Sub GetJobInfo()
Const URL$ = "https://www.ziprecruiter.com/candidate/search?days=5&search=nav&location=USA&page="
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim post As Object, elem$, R&, I&: I = 1
Do
With IE
.Visible = True
.navigate URL & I
While .Busy Or .readyState < 4: DoEvents: Wend
On Error Resume Next
elem = .document.getElementsByTagName("article")(0).innerText
On Error GoTo 0
If elem = "" Then Exit Do
For Each post In .document.getElementsByTagName("article")
R = R + 1: Cells(R, 1) = post.getElementsByClassName("just_job_title")(0).innerText
Cells(R, 2) = post.getElementsByClassName("name")(0).innerText
Cells(R, 3) = post.getElementsByClassName("location")(0).innerText
Next post
End With
I = I + 1
elem = ""
Application.Wait Now + TimeValue("00:00:05")
Loop
IE.Quit
End Sub

How to Improve Data Scraping Using VBA?

I have below code, which fetch data from a Intranet. But its taking more time to fetch data.can someone help me to modify the code to increase the performance.
Thanks In Advance
Note-I haven't posted URL as it is clients website. Sorry about that.
Sub FetchData()
Dim IE As Object
Dim Doc As HTMLDocument
Dim myStr As String
On Error Resume Next
Set IE = CreateObject("InternetExplorer.Application") 'SetBrowser
IE.Visible = False
IE.navigate "URL" 'Open website
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set Doc = IE.Document
Doc.getElementById("tbxUserID").Value = InputBox("Please Enter Your ID")
Doc.getElementById("txtPassword").Value = InputBox("Please Enter Your
Password")
Doc.getElementById("BtnLogin").Click
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
IE.navigate "URL"
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Dim LastRow As Long
Set wks = ActiveSheet
LastRow = wks.Cells(wks.Rows.Count, "A").End(xlUp).Row
Set rowNo = wks.Range("A1:A" & LastRow)
For rowNo = 2 To LastRow
Doc.getElementById("txtField1").Value =
ThisWorkbook.Sheets("Sheet1").Range("A" & rowNo).Value
Doc.getElementById("CtrlQuickSearch1_imgBtnSumbit").Click
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
strVal1 = Doc.querySelectorAll("span")(33).innerText
ThisWorkbook.Sheets("Sheet1").Range("B" & rowNo).Value = strVal1
strVal2 = Doc.querySelectorAll("span")(35).innerText
ThisWorkbook.Sheets("Sheet1").Range("C" & rowNo).Value = strVal2
Next
End Sub
Can't guarantee this will run. Points to note:
Use of Worksheets collection
Use of Option Explicit - this means you then have to use the right datatype throughout. Currently you have undeclared variables and, for example, rowNo is used as a Long and as a range.
Removal of On Error Resume Next
Putting all worksheets into variables
Placement of values into array and looping array to get id values. Looping sheet is expensive
Use of early binding and adding class to InternetExplorer
Assumption that after login a new url present and that you need to navigate back to that before each new loop value
Removal of hungarian notation
Ids are fastest selector method so no improvement there
With your css type selectors, e.g. .document.querySelectorAll("span")(33), you might seek whether there is a single node short selector that can be used, rather than using nodeList
VBA:
Option Explicit
Public Sub FetchData()
Dim ie As Object, ie As InternetExplorer
Dim lastRow As Long, wks As Worksheet, i As Long, ws As Worksheet
Set ie = New SHDocVw.InternetExplorer 'SetBrowser
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set wks = ActiveSheet '<==use explicit sheet name if possible
lastRow = wks.Cells(wks.rows.Count, "A").End(xlUp).Row
loopvalues = Application.Transpose(wks.Range("A2:A" & lastRow).Value)
With ie
.Visible = False
.Navigate2 "URL" 'Open website
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("tbxUserID").Value = InputBox("Please Enter Your ID")
.document.getElementById("txtPassword").Value = InputBox("Please Enter Your Password")
.document.getElementById("BtnLogin").Click
While .Busy Or ie.readyState < 4: DoEvents: Wend
Dim newURL As String, val1 As String, val2 As String
newURL = .document.URL
For i = LBound(loopvalues) To UBound(loopvalues)
.document.getElementById("txtField1").Value = loopvalues(i)
.document.getElementById("CtrlQuickSearch1_imgBtnSumbit").Click
While .Busy Or .readyState < 4: DoEvents: Wend
val1 = .document.querySelectorAll("span")(33).innerText
ws.Range("B" & i).Value = val1
val2 = .document.querySelectorAll("span")(35).innerText
ws.Range("C" & i).Value = val2
.Navigate2 newURL
While .Busy Or ie.readyState < 4: DoEvents: Wend
Next
.Quit
End With
End Sub

Extract hyperlink using CSS selector query

I would like to extract the hyperlink from a webpage by using queryselector all, but there are no results coming out.
Below is my code.
Sub ScrapLink()
Application.ScreenUpdating = False
Dim IE As New InternetExplorer, html As HTMLDocument
Dim x As Long
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.Navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .ReadyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 1)
DoEvents
With .Document.getElementById("bm_ann_detail_iframe").contentDocument
Dim links As Object, i As Long
Set links = .Document.querySelectorAll("p.att_download_pdf[href^='/FileAccess/apbursaweb/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
Range("A" & Rows.Count).End(xlUp).Offset(1).Value = links.Item(i - 1)
End With
Next i
.Quit
End With
End With
End Sub
You could just avoid the initial page and use the URL direct from the frame. This would be my preference unless you don't know, for some reason, this URL.
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, nodeList As Object, i As Long
With IE
.Visible = True
.navigate2 "http://disclosure.bursamalaysia.com/FileAccess/viewHtml?e=2906127"
While .Busy Or .readyState < 4: DoEvents: Wend
Set nodeList = .document.querySelectorAll(".att_download_pdf [href^='/FileAccess/apbursaweb/download']")
For i = 0 To nodeList.Length - 1
Debug.Print nodeList.item(i).href
Next
.Quit
End With
End Sub
Or you can jump right on over to the iframe src after page load:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, nodeList As Object, i As Long
With IE
.Visible = True
.Navigate2 "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .readyState < 4: DoEvents: Wend
.Navigate2 .document.querySelector("iframe").src
While .Busy Or .readyState < 4: DoEvents: Wend
Set nodeList = .document.querySelectorAll(".att_download_pdf [href^='/FileAccess/apbursaweb/download']")
For i = 0 To nodeList.Length - 1
Debug.Print nodeList.item(i).href
Next
.Quit
End With
End Sub
Try the following. It should fetch you the links you wish to grab:
Sub ScrapLink()
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim frame As Object, i As Long
With IE
.Visible = True
.navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5978065"
While .Busy Or .readyState < 4: DoEvents: Wend
Set Html = .document
End With
Application.Wait Now + TimeValue("00:00:03") 'This delay may vary in your case
Set frame = Html.getElementById("bm_ann_detail_iframe").contentWindow.document
With frame.querySelectorAll("p.att_download_pdf a")
For i = 0 To .Length - 1
Cells(i + 1, 1) = .item(i).getAttribute("href")
Next i
End With
End Sub
If you wish to kick out the delay then try changing the portion below with the above one:
Do: Set frame = Html.getElementById("bm_ann_detail_iframe"): DoEvents: Loop While frame Is Nothing
With frame.contentWindow.document.querySelectorAll("p.att_download_pdf a")
For i = 0 To .Length - 1
Cells(i + 1, 1) = .item(i).getAttribute("href")
Next i
End With

Retrieving a URL from Internet Explorer with VBA

I have written some VBA code in Excel to retrieve the latitude and longitude from a Google Maps URL and paste it into a cell in my worksheet. My problem is in retrieving the URL from internet explorer. Below I have two examples of my code, one macro returns an about:blank as though the object doesn't have the LocationURL property, and the other example seems like it is saving all of my previous searches, so it cycles through all of my previous searches and pastes the very first searches' URL. Example 2 uses a shell suggestion that I found online to reassign the properties to the oIE object. I can get both to slightly work, but neither will do exactly what I need from the macro.
Cell(8,8) is a hyperlink to google maps where I'm searching an address, and Cell(8,9) is where I want to paste the URL after google maps has redirected and has the latitude and longitude in the URL.
Example 1:
Sub CommandButton1_Click()
Dim ie As Object
Dim Doc As HTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.Navigate "http://www.google.com/maps?q=" & Range("I7").Value
Do
DoEvents
Loop Until ie.ReadyState = 4
Set Doc = ie.Document
Cells(8, 9).Value = ie.LocationName
End Sub
Example 2:
Sub Macro()
Dim oIE, oShell, objShellWindows, strPath, X
strPath = Cells(8, 8)
Set oIE = CreateObject("InternetExplorer.Application")
'This is to resolve oIE.navigate "about:blank" issue
oIE.Top = 0
oIE.Left = 0
oIE.Width = 500
oIE.Height = 500
oIE.Navigate strPath
Do While oIE.Busy And oIE.ReadyState < 2
DoEvents
Loop
'Reassigning oIE.LocationName & vbCrLf & oIE.LocationURL values after redirect in IE
Set oShell = CreateObject("WScript.Shell")
Set objShellWindows = CreateObject("Shell.Application").Windows
For X = objShellWindows.Count - 1 To 0 Step -1
Set oIE = objShellWindows.Item(X)
If Not oIE Is Nothing Then
If StrComp(oIE.LocationURL, strPath, 1) = 0 Then
Do While oIE.Busy And oIE.ReadyState < 2
DoEvents
Loop
oIE.Visible = 2
Exit For
End If
End If
Cells(8, 9).Value = oIE.LocationURL
Set oIE = Nothing
Next
Set objShellWindows = Nothing
Set oIE = Nothing
End Sub
Thanks,
Andrew
Is this as simple as looping until the document.URL changes? In my timed loop I wait for the string safe=vss in the original page load to disappear.
Option Explicit
Public Sub GetNewURL()
Dim IE As New InternetExplorer, newURL As String, t As Date
Const MAX_WAIT_SEC As Long = 5
With IE
.Visible = True
.navigate2 "http://www.google.com/maps?q=" & "glasgow" '<==Range("I7").Value
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
newURL = .document.URL
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While InStr(newURL, "safe=vss") > 0
Debug.Print newURL
End With
End Sub

Run-time error 438 Object doesn't support this property or method

I'm dealing with a problem that's been dealt this before, but not in this situation.
I'm pulling addresses from the USPS website using VBA. When I place in my cell "ele.innertext" I get all of the innertext within the class, but VBA won't let me isolate the innertext to the item level - ele.item(1).innertext, for example, give me the above error. Do you know why?
My browser is IE11.
Relevant HTML:
<div id="zipByAddressDiv" class="industry-detail">Loading...</div>
<!-- start Handlebars template -->
<script id="zipByAddressTemplate" type="text/x-handlebars-template">
<ul class="list-group industry-detail">
{{#each addressList}}
<li class="list-group-item paginate">
<div class="zipcode-result-address">
<p>{{companyName}}</p>
<p>{{addressLine1}}</p>
<p>{{city}} {{state}} <strong>{{zip5}}-{{zip4}}</strong></p>
VBA:
Sub USPS()
Dim eRow As Long
Dim ele As Object
Dim objie As Object
Dim wscript As Object
Dim test As String
Dim testarray() As String
'Dim goods As Object
Dim r As Integer
Dim x As Long: x = 0
Dim vFacility As Variant
Dim y As Variant
'Dim IE As New InternetExplorer
Sheets("Address").Select
eRow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
Set objie = CreateObject("InternetExplorer.Application")
For r = 4 To 8
myaddress = Cells(r, 5).Value
mycity = Cells(r, 7).Value
mystate = Cells(r, 8).Value
myzipcode = Cells(r, 9).Value
'myaddress = Range("a2").Value
'mycity = Range("c2").Value
'mystate = Range("d2").Value
'myzipcode = Range("e2").Value
With objie
.Visible = True
.navigate "https://tools.usps.com/go/ZipLookupAction!input.action"
Do While .Busy
DoEvents
Loop
Set what = .document.getElementsByName("tAddress")
what.Item(0).Value = myaddress
Set zipcode = .document.getElementsByName("tCity")
zipcode.Item(0).Value = mycity
Set zipcode1 = .document.getElementsByName("tState")
zipcode1.Item(0).Value = mystate
Set zipcode2 = .document.getElementsByName("tZip-byaddress")
zipcode2.Item(0).Value = myzipcode
.document.getElementByID("zip-by-address").Click
Do While .Busy
DoEvents
Loop
For Each ele In .document.all
Select Case ele.className
Case "industry-detail"
test = ele.innertext
testarray = Split(test, vbCrLf)
Worksheets("Address").Cells(r, 11).Value = testarray(4)
'Debug.Print test
'Debug.Print "and"
'Debug.Print testarray(4)
End Select
Next ele
End With
Next r
Set objie = Nothing
Set ele = Nothing
Set IE = Nothing
'IE.Quit
End Sub
What I think you are trying to do is input address details and retrieve the found zipcode. This method uses CSS selectors to target the page styling and I start immediately with the address search URL. I use id selectors where possible (which is the same as saying .document.getElementById("yourID"), denoted by # as these are the quickest retrieval methods. When it comes to choosing state, which is a dropdown, I select the appropriate option. You could concantenate the search state 2 letter abbreviation into the option string e.g.
Dim state As String
state = "NY"
.querySelector("option[value=" & state & "]").Selected = True
There is a loop to ensure the target element is present in new search results page. I use another CSS selector of #zipByAddressDiv strong to target just the zipcode, which is in bold, in the results. The bold is set by the strong tag.
strong tag holding zipcode in result:
CSS query:
The above CSS selector is target by id using #zipByAddressDiv and then, rather than splitting into an array to get the value you want, it uses a descendant selector to target the strong tag element holding the required value.
VBA:
Option Explicit
Public Sub AddressSearch()
Dim IE As New InternetExplorer, t As Date, ele As Object
Const MAX_WAIT_SEC As Long = 5
With IE
.Visible = True
.navigate "https://tools.usps.com/zip-code-lookup.htm?byaddress"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#tAddress").Value = "1 Main Street"
.querySelector("#tCity").Value = "New York"
.querySelector("option[value=NY]").Selected = True
' .querySelector("#tZip-byaddress").Value = 10045
.querySelector("#zip-by-address").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = .document.querySelector("#zipByAddressDiv strong")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
Debug.Print ele.innerText
.Quit
End With
End Sub
Here is what that looks like in a loop:
Option Explicit
Public Sub AddressSearch()
Dim IE As New InternetExplorer, t As Date, ele As Object, i As Long
Dim ws As Worksheet: Set ws = ThisWorkbook.Worksheets("Address")
Const MAX_WAIT_SEC As Long = 5
With IE
.Visible = True
For i = 4 To 8
.navigate "https://tools.usps.com/zip-code-lookup.htm?byaddress"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#tAddress").Value = ws.Cells(i, 5).Value
.querySelector("#tCity").Value = ws.Cells(i, 7).Value
.querySelector("option[value=" & ws.Cells(i, 8).Value & "]").Selected = True
' .querySelector("#tZip-byaddress").Value = 10045
.querySelector("#zip-by-address").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = .document.querySelector("#zipByAddressDiv strong")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
ws.Cells(i, 11) = ele.innerText
Set ele = Nothing
Next
.Quit
End With
End Sub

Resources