Use Excel VBA to Extract Data From a Webpage - excel

I'm trying to use Excel VBA to extract some data from a webpage (https://www.churchofjesuschrist.org/maps/meetinghouses/lang=eng&q=1148+W+100+N). The code I'm using will open Internet Explorer, navigate to the website, and it will extract the top most result. But I can't seem to figure out how to extract the rest of the results (i.e. ward, language, contact name, contact #). Thoughts?
Sub MeethinghouseLocator()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy
DoEvents
Wend
Application.Wait (Now + TimeValue("0:00:01"))
IE.document.querySelector("button.search-input__execute.button--primary").Click
Dim Doc As HTMLDocument
Set Doc = IE.document
Application.Wait (Now + TimeValue("0:00:01"))
'WardName
Dim aaaaFONT As String
aaaaFONT = Trim(Doc.getElementsByClassName("location-header__name ng-binding")(0).innerText)
Sheets("Sheet1").Range("D6").Value = aaaaFONT
Application.Wait (Now + TimeValue("0:00:01"))
'Language
Dim aaabFONT As String
aaabFONT = Trim(Doc.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
Sheets("Sheet1").Range("E6").Value = aaabFONT
'Click 1st Link
IE.document.getElementsByClassName("location-header__name ng-binding")(0).Click
Application.Wait (Now + TimeValue("0:00:01"))
'Contact Name
Dim aaacFONT As String
aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
Sheets("Sheet1").Range("H6").Value = aaacFONT
'Contact Name Function
Range("F6").Select
ActiveCell.FormulaR1C1 = _
"=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
'Contact Phone Number
Dim aaadFONT As String
aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
Sheets("Sheet1").Range("G6").Value = aaadFONT
IE.Quit
End Sub

Most of your code works actually so I'm not sure what issue are you facing but you didn't account for the loading after clicking each link so I have added While loop to check for its Ready and ReadyState property before continuing.
EDIT: The code now loops through all the wards listed in the result, the idea is to keep the first IE at the result page and pass the URL of the ward and the input row to sub ExtractWard where it will open another IE, navigate to the given URL and extract the ward details.
Sub MeethinghouseLocator()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
IE.document.querySelector("button.search-input__execute.button--primary").Click
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
Dim Doc As HTMLDocument
Set Doc = IE.document
Application.Wait (Now + TimeValue("0:00:01"))
Dim wardContent As Object
Set wardContent = Doc.getElementsByClassName("maps-card__content")(2)
Dim wardCollection As Object
Set wardCollection = wardContent.getElementsByClassName("location-header")
Dim rowNum As Long
rowNum = 6
Dim i As Long
For i = 0 To wardCollection.Length - 1
With wardCollection(i)
'WardName
Dim aaaaFONT As String
aaaaFONT = Trim(.getElementsByClassName("location-header__name ng-binding")(0).innerText)
Sheets("Sheet1").Cells(rowNum, "D").Value = aaaaFONT
'Language
Dim aaabFONT As String
aaabFONT = Trim(.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
Sheets("Sheet1").Cells(rowNum, "E").Value = aaabFONT
Dim wardURL As String
wardURL = .getElementsByClassName("location-header__name ng-binding")(0).href
ExtractWard wardURL, rowNum
End With
rowNum = rowNum + 1
Next i
Set Doc = Nothing
IE.Quit
Set IE = Nothing
End Sub
Private Sub ExtractWard(argURL As String, argRow As Long)
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate argURL
IE.Visible = True
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
Dim Doc As HTMLDocument
Set Doc = IE.document
'Contact Name
Dim aaacFONT As String
aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
Sheets("Sheet1").Cells(argRow, "H").Value = aaacFONT
'Contact Name Function
Sheets("Sheet1").Cells(argRow, "F").FormulaR1C1 = _
"=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
'Contact Phone Number
Dim aaadFONT As String
aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
Sheets("Sheet1").Cells(argRow, "G").Value = aaadFONT
Set Doc = Nothing
IE.Quit
Set IE = Nothing
End Sub

Related

How to click one href that changes every login?

I need to do a click event on this href link
<li>Report</li>
The PID and KEY change everytime, so I cant use .navigate .. Is there any code to do like a partial match for looking for "*" & "TerminalReport" & "*" and click?
The code is to long to post here, it is telling me to add more details so I will post the code from the begining to the part I'm getting stuck unles I use .navigate
Private Sub CommandButton2_Click()
Dim IE As Object
Dim Dc_User As String
Dim Dc_Pass As String
Dim Dc_URL As String
Dim txtNam As Object, txtPwd As Object
Dc_User = "LOGIN"
Dc_Pass = "PASSWORD"
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
.navigate "https://www.solarmanpv.com/portal/LoginPage.aspx"
While IE.readyState <> 4
DoEvents
Wend
IE.document.getElementById("uNam").Value = Dc_User
IE.document.getElementById("uPwd").Value = Dc_Pass
IE.document.getElementById("Loginning").Click
Application.Wait Now + #12:00:05 AM#
'THIS IS WHERE I NEED HELP
.navigate "https://www.solarmanpv.com/portal/Terminal/TerminalReport.aspx?pid=20635&key=78775J8"
End With
End Sub
Something like this:
Private Sub CommandButton2_Click()
Dim IE As Object
Dim Dc_User As String
Dim Dc_Pass As String
Dim Dc_URL As String
Dim txtNam As Object, txtPwd As Object
Dim links As Object, link As Object
Dc_User = "LOGIN"
Dc_Pass = "PASSWORD"
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
.navigate "https://www.solarmanpv.com/portal/LoginPage.aspx"
WaitFor IE
IE.document.getElementById("uNam").Value = Dc_User
IE.document.getElementById("uPwd").Value = Dc_Pass
IE.document.getElementById("Loginning").Click
Application.Wait Now + TimeSerial(0, 0, 5) '5 secs
Set links = .document.getElementById("ct100_cur_item_list").getElementsByTagName("A")
For Each link In links
Debug.Print link.href
If link.href Like "*TerminalReport*" Then
link.Click
Exit For
End If
Next link
End With
End Sub
Sub WaitFor(IE As Object)
While IE.readyState <> 4
DoEvents
Wend
End Sub

VBA Get webpage Link and Download File to

Is a long time ago to programming VBA. I want to fetch in a web Table all Linked PDF Files in coll um (x). First I have created the logins and navigate to the site to fetch file.
Site table:
Sub goToShopWH()
Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim clip As Object
'Table Grapping
Dim ieTable As Object
'create a new instance of ie
'Set ieApp = New InternetExplorer
Set ieApp = CreateObject("InternetExplorer.Application")
'you don’t need this, but it’s good for debugging
ieApp.Visible = True
'assume we’re not logged in and just go directly to the login page
ieApp.Navigate "https://website.com/ishop/Home.html"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents:
Loop
' Login to site
Set ieDoc = ieApp.Document
'fill in the login form – View Source from your browser to get the control names
With ieDoc '.forms("loginForm_0")
.getElementById("loginField").Value = "username"
.getElementById("password").Value = "Password"
.getElementById("loginSubmit").Click
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
'now that we’re in, go to the page we want
'Switsh to search form
ieApp.Navigate "https://website.com/ishop/account/MyAccount,$comp$account$AccountNavigation.orderHistory.sdirect?sp=Saccount%2FOrderHistory"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
' fillout form
Set ieDoc = ieApp.Document
With ieDoc '.forms("Form_0")
.getElementById("PropertySelection_2").Value = "7"
.getElementById("commtxt").Value = "190055" 'Projekt Nummer oder Text.
.getElementById("Submit_0").Click
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
With ieDoc '.forms("Form_1")
.getElementById("PropertySelection").Value = "3"
.getElementById("PropertySelection").FireEvent ("onchange")
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
End With
Set webpage = ieApp.Document
Set table_data = webpage.getElementsByTagName("tr")
End Sub
Please help my to solve this to get I want import Table to sheet2 and download all PDF in ("Table"),("tbody")(tr)(1 to X),(td)(10),(href).click
I think you are after something like this.
Sub webpage()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
Or, perhaps this.
' place your URL in cell L1
Sub HREF_Web()
Dim doc As HTMLDocument
Dim output As Object
Set IE = New InternetExplorer
IE.Visible = False
IE.navigate Range("L1")
Do
'DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set doc = IE.document
Set output = doc.getElementsByTagName("a")
i = 5
For Each link In output
'If link.InnerHTML = "" Then
Range("A" & i).Value2 = link
' End If
i = i + 1
Next
MsgBox "Done!"
End Sub

How to use Excel-VBA to pick up a date from a web popup calendar?

I'm working on VBA code in the last step to fill a web form. The problem is that the cell value which I passed to the date field does not match the format. Even if I try to pass string "yyyy-mm-dd".
When I click the field there is a popup calendar for picking up a date.
Below is my codes and web html and screenshot:
Error:
HTML Code:
Sub AuditBookings()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLTable As MSHTML.IHTMLElement
Dim HTMLTables As MSHTML.IHTMLElementCollection
Dim i, j, n As Integer
Dim DataWs As Worksheet
Dim DataRange, CFRange As Range
Dim WebCF, WebCODate As String
IE.Visible = True
IE.navigate "https://xxx/ebkassembly/login.aspx"
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Set HTMLDoc = IE.document
If IE.LocationURL <> "https://xxx/Ebooking/Entry/Index2.aspx" Then 'check login URL
HTMLDoc.getElementById("userName").Value = ThisWorkbook.Worksheets("Menu").Range("B1").Value 'fill the username
HTMLDoc.getElementById("userPwd").Value = ThisWorkbook.Worksheets("Menu").Range("B2").Value 'fill the password
HTMLDoc.getElementById("accSubmit").Click
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Application.Wait (Now + TimeValue("0:00:10"))
IE.navigate "https://xxx/ebksettlement/HotelAudit/hotelAuditIndex.aspx"
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Application.Wait (Now + TimeValue("0:00:10"))
Else
IE.navigate "https://xxx/ebksettlement/HotelAudit/hotelAuditIndex.aspx"
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy
Loop
Application.Wait (Now + TimeValue("0:00:10"))
End If
Set DataWs = ThisWorkbook.Worksheets("CtripReport")
DataWs.Activate
i = 1
Set HTMLTable = HTMLDoc.getElementById("pendingtb")
n = HTMLTable.Children(0).Children.Length 'get QTY of unaudited bookings
For i = i To n - 1
WebCF = HTMLTable.Children(0).Children(i).Children(7).all(0).innerText 'get confirmation number from web
If WebCF <> "" Then
Set DataRange = DataWs.Range("K2", "K" & n) 'set search range
Set CFRange = DataRange.Find(what:=WebCF, lookat:=xlPart)'start matching
If Not CFRange Is Nothing Then
CFRange.Select
HTMLTable.Children(0).Children(i).Children(1).all(5).Click 'click check-out button
Application.Wait (Now + TimeValue("0:00:02"))
'HTMLTable.Children(0).Children(i).Children(3).all(3).Value = CFRange.Offset(0, -4).Value ' fill check-out date
HTMLTable.Children(0).Children(i).Children(3).all(3).Click
HTMLTable.Children(0).Children(i).Children(3).all(3).Value = "2019-08-27"
HTMLTable.Children(0).Children(i).Children(4).all(1).innerText = CFRange.Offset(0, -3).Value ' fill guest name
HTMLTable.Children(0).Children(i).Children(6).all(0).Value = CFRange.Offset(0, -1).Value ' fill room number
'HTMLTable.Children(0).Children(i).Children(7).all.innerText = CFRange.Value ' fill confirmation number
'HTMLTable.Children(0).Children(i).Children(9).all(1).Click ' click save button
End If
End If
Next
'MsgBox "Total " & ThisWorkbook.Worksheets("CtripReport").Range("A2", Range("A1").End(xlDown)).Count & " records are scraped"
End Sub

How to select value from drop down on web url?

I am trying to select data from the dropdown in the web URL, my all code is working fine but I am unable to select the value from the dropdown.
Sub pulldata2()
Dim tod As String, UnderLay As String
Dim IE As Object
Dim doc As HTMLDocument
'Html table
Dim Tbl As HTMLTable, Cel As HTMLTableCell, Rw As HTMLTableRow, Col As HTMLTableCol
Dim TrgRw As Long, TrgCol As Long
'Create new sheet
tod = ThisWorkbook.Sheets("URLList").Range("C2").Value
have = False
For Each sht In ThisWorkbook.Sheets
If sht.Name = tod Then
have = True
Exit For
End If
Next sht
If have = False Then
Sheets.Add After:=Sheets(Sheets.Count)
ActiveSheet.Name = tod
Else
If MsgBox("Sheet " & tod & " already exists Overwrite Data?", vbYesNo) = vbNo Then Exit Sub
End If
'Start Internetexplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=25APR2019"
Do While IE.Busy Or IE.readyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE.document
Dim ColOff As Long
'Put data to sheet and loop to next URL
For Nurl = 2 To 191
ColOff = (Nurl - 2) * 23
TrgRw = 1
UnderLay = ThisWorkbook.Sheets("URLList").Range("A" & Nurl).Value
doc.getElementById("underlyStock").Value = UnderLay
doc.parentWindow.execScript "goBtnClick('stock');", "javascript"
'now i want to select data from dropdown id=date, value= 27JUN2019
doc.querySelector("Select[name=date] option[value=27JUN2019]").Selected = True
Do While IE.Busy Or IE.readyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set Tbl = doc.getElementById("octable")
ThisWorkbook.Sheets(tod).Cells(TrgRw, ColOff + 1).Value = UnderLay
ThisWorkbook.Sheets(tod).Cells(TrgRw, ColOff + 1).Font.Size = 20
ThisWorkbook.Sheets(tod).Cells(TrgRw, ColOff + 1).Font.Bold = True
ThisWorkbook.Sheets(tod).Cells(TrgRw, ColOff + 1).Select
TrgRw = TrgRw + 1
For Each Rw In Tbl.Rows
TrgCol = 1
For Each Cel In Rw.Cells
ThisWorkbook.Sheets(tod).Cells(TrgRw, ColOff + TrgCol).Value = Cel.innerText
TrgCol = TrgCol + Cel.colSpan ' if Column span is > 1 multiple
Next Cel
TrgRw = TrgRw + 1
Next Rw
TrgRw = TrgRw + 1
Next Nurl
'exit the internetexplorer
IE.Quit
Set IE = Nothing
End Sub
why my code not working, I am new in VBA please help to find an error in my code.
Simply alter the url rather than use dropdown
https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=27JUN2019
You can also use xhr to get the content rather than a slow browser. I use the clipboard to write out the table.
Option Explicit
Public Sub GetInfo()
Dim html As Object, hTable As Object, ws As Worksheet, clipboard As Object
Set html = New HTMLDocument
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=27JUN2019", False
.send
html.body.innerHTML = .responseText
Set hTable = html.getElementById("octable")
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ws.Range("A1").PasteSpecial
End With
End Sub
Alternative:
1) You could loop the tr and td within hTable above to write out the table
2) You could also use powerquery from web (via data tab Excel 2016+ , or using free powerquery add-in for 2013. You paste the url into the top of the pop up browser and press Go then select the table to import.
Changing stocks:
Stocks are part of the url query string e.g. symbol=NIFTY , so you can concatenate the new symbol into the url during a loop
"https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=" & yourSymbolGoesHere & "&date=27JUN2019"
If you really want to use IE be sure to encase the value of the attribute within '' e.g. '27JUN2019'
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub ClickButton()
Dim ie As InternetExplorer
Const URL As String = "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=25APR2019"
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("[value='27JUN2019']").Selected = True
Stop
End With
End With
End Sub

scrape with excel vba changing input data before scraping

In am trying to change the data on the website for an input field and have that information refreshed on the page. I have updated the input field, but I am not sure how to refresh the page so that the inner table uses the new data from the input field
Below is my code:
Dim IE As InternetExplorer
Dim htmldoc As HTMLDocument
Dim ieURL As String
Dim sPicker As String
ieURL = "https://www.investing.com/commodities/crude-oil-historical-data"
sPicker = "10/01/2017 - 12/31/2017"
'Open InternetExplorer
Set IE = New InternetExplorer
IE.Visible = True
IE.Navigate ieURL
Set htmldoc = IE.document 'Document webpage
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Dim drp As HTMLFormElement
Set drp = htmldoc.getElementById("widgetFieldDateRange")
drp.innerText = sPicker 'Set the new timeframe for scraping
Dim inpt As HTMLInputElement
Set inpt = htmldoc.getElementById("picker")
inpt.Value = sPicker 'Set the new timeframe for scraping
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Thanks for your help
Try the below script. It should solve the issue.
Sub Web_Data()
Dim IE As New InternetExplorer, html As New HTMLDocument
Dim post As Object, elem As Object, t_data As Object
Dim trow As Object, tcel As Object
With IE
.Visible = True
.navigate "https://www.investing.com/commodities/crude-oil-historical-data"
While .readyState < 4: DoEvents: Wend
Set html = .document
End With
Application.Wait Now + TimeValue("00:00:05")
html.getElementById("widgetFieldDateRange").Click
Application.Wait Now + TimeValue("00:00:03")
Set post = html.getElementById("startDate")
post.innerText = ""
post.Focus
Application.SendKeys "10/01/2017"
Application.Wait Now + TimeValue("00:00:03")
Set elem = html.getElementById("endDate")
elem.innerText = ""
elem.Focus
Application.SendKeys "12/31/2017"
Application.Wait Now + TimeValue("00:00:03")
html.getElementById("applyBtn").Click
Application.Wait Now + TimeValue("00:00:03")
Set t_data = html.getElementById("curr_table")
For Each trow In t_data.Rows
For Each tcel In trow.Cells
y = y + 1: Cells(x + 1, y) = tcel.innerText
Next tcel
y = 0
x = x + 1
Next trow
End Sub
Reference to add to the library:
1. Microsoft Internet Controls
2. Microsoft HTML Object Library

Resources