Getting Links/URL from a webpage-Excel VBA - excel

I want to write a macro which will take the search result links in a webpage. I have written like this
Sub webpage()
Dim internet As InternetExplorer
Dim internetdata As HTMLDocument
Dim internetlink As Object
Dim internetinnerlink As Object
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
internet.Navigate ("URL")
Do While internet.Busy
DoEvents
Loop
Do Until internet.ReadyState = READYSTATE_COMPLETE
DoEvents
Loop
Set internetdata = internet.Document
Set internetlink = internetdata.getElementsByTagName("a")
i = 1
For Each internetinnerlink In internetlink
ActiveSheet.Cells(i, 2) = internetinnerlink.href
i = i + 1
Next internetinnerlink
End Sub
Above code takes all the links from the web page, but i need only the search result links. i have uploaded one image, if that is my webpage, i need to take only the search result links and not all the links. please help me to fix this

Try this code
Sub webpage()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub

Related

Scrape CDC vaccination data using VBA

I am trying to scrape the vaccination data from the below CDC website:
https://covid.cdc.gov/covid-data-tracker/#vaccinations
I have tried querySelectorAll but no luck. Can anyone help take a look? Much appreciated!
Sub useClassnames()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
.Visible = False
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementById("maincontent")
Debug.Print allRowOfData.innerHTML
'Set element = appIE.document.querySelectorAll(".container mt-5")
'For Each elements In element
' Debug.Print elements
'Next elements
'For Each element In allRowOfData
'Debug.Print element
'Next element
End Sub
Here you have, just change your worksheet name or number :)
Option Explicit
Const updatedCol = 1
Const dosesDistributedColVal = 2
Const peopleInicVaccColVal = 3
Sub useClassnames()
'declare worksheet variable and set headers
Dim targetWsh As Worksheet: Set targetWsh = ThisWorkbook.Sheets(1)
targetWsh.Cells(1, 1).Value = "Last Update"
targetWsh.Cells(1, 2).Value = "Doses Distributed"
targetWsh.Cells(1, 3).Value = "People Initiating Vaccination"
Dim lstRegisterRow As Long: lstRegisterRow = targetWsh.Range("A" & targetWsh.Rows.Count).End(xlUp).Row + 1
'open IE and navigate to site
Dim appIE As InternetExplorer: Set appIE = New InternetExplorer
appIE.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
appIE.Visible = False
While appIE.Busy = True Or appIE.readyState < 4: DoEvents: Wend
Dim oHtmlDoc As HTMLDocument: Set oHtmlDoc = appIE.document
Dim oHtmlElementColl As IHTMLElementCollection
'Get and write last update date
Application.Wait (Now + TimeValue("0:00:02")) 'wait 2 secs to avoid error, if recieve error, add seconds as needed
Set oHtmlElementColl = oHtmlDoc.getElementsByTagName("small")
targetWsh.Cells(lstRegisterRow, updatedCol) = oHtmlElementColl(0).innerHTML
'Get and write Doses Distributed and People Initiating Vaccination
Set oHtmlElementColl = oHtmlDoc.GetElementsByClassName("card-number")
targetWsh.Cells(lstRegisterRow, dosesDistributedColVal) = oHtmlElementColl(0).innerText
targetWsh.Cells(lstRegisterRow, peopleInicVaccColVal) = oHtmlElementColl(1).innerText
appIE.Quit
End Sub

is there a way to download a row of a table or an entire table itself on a webpage using vba?

I am able to download the inner text but not the exact table
I have tried using inner text and it only gave the text of the entire row
Private Sub CommandButton1_Click()
Dim i As Integer
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLAs As MSHTML.IHTMLElementCollection
Dim HTMLA As MSHTML.IHTMLElement
Dim tdocs As Object
Dim iframeDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://portal.3gpp.org/#55931-tdocs"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.Document
Set tdocs = HTMLDoc.getElementsByName("lt-55931-tdocs")
tdocs(0).Click
Set iframeDoc = HTMLDoc.frames("dnn_ctr559_View_ctl00_ctl01_ctr596_ETSIFrame_htmModule").Document
Dim img As MSHTML.IHTMLElement
Set img = iframeDoc.getElementById("btnSearch")
img.Click
Application.Wait (Now + TimeValue("0:00:59")) '30sec
End If
For i = 1 To 2000
Set img = iframeDoc.getElementById("rgTdocList_ctl00__" & i)
Range("A" & i) = img.innerText
End Sub
I am getting the row in one column but i want them to be how it is originally on the web page
You can do something like this:
'...
'...
Application.Wait (Now + TimeValue("0:00:30")) '30sec
Dim tds, x
For i = 1 To 2000
Set img = iframeDoc.getElementById("rgTdocList_ctl00__" & i)
If img Is Nothing Then Exit For 'no more rows
Set tds = img.getElementsByTagName("td") '<< get cells for this row
'write out each cell's content
For x = 0 To tds.Length - 1
Cells(i, x + 1) = tds(x).innerText
Next x
Next i

Object doesn´t support this property or method

I followed a Tutorial and have an error while executing the code i copied:
Private Sub CommandButton1_Click()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.navigate URL
Do Until internet.readyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.document
Set div_result = internetdata.getElementById("res")
Set header_links = div_result.getElementsByTagName("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
The error comes at
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
What ist wrong here?
EDIT:
The code should give the URLs out of a google search and write it to excel list:
The code is copied from here:
Getting Links/URL from a webpage-Excel VBA
The issue is that Google obviously changed the code of the website since the tutorial was written. This method is not very reliable as everytime Google changes the website it can easily break your code.
Try the following
Set div_result = internetdata.getElementById("res")
Set header_links = div_result.getElementsByTagName("a")
Dim h As Variant
For Each h In header_links
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = h.href
Next
If you want just the web results without the featured content and video links, you can use below. Also in this code you can control the page number you want to get the links fromby changing the variable pageNo.
Private Sub CommandButton1_Click()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Const pageNo = 10 '0 is page 1, 10 is page 2 and so on 0;10;20;30;40
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.navigate URL & "&start=" & pageNo
Do Until internet.readyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.document
Set div_result = internetdata.getElementById("res")
Set header_links = div_result.getelementsbytagname("h2")
Dim h As Variant
For Each div In header_links
If div.innertext = "Web results" Then
Set Links = div.ParentElement.getelementsbytagname("a")
For Each link In Links
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
End If
Next
MsgBox "done"
End Sub

VBA: copy data from website into excel

I have a VBA code that selects info from drop-down menus on a government website and then submits the query. The requested data then opens up in another IE page. I am trying to copy this data into excel; however, I am unable to do so.
My code currently copies the text on the first IE page that contains the drop-down menus. The government website is: http://www.osfi-bsif.gc.ca/Eng/wt-ow/Pages/FINDAT.aspx
I have look all over the internet for a solution but nothing seems to work...
Here is my code:
Sub GetOsfiFinancialData()
Dim UrlAddress As String
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Dim ie As Object
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
End With
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
'Select Bank
ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = Z005
'open window with financial data
Dim objButton
Set objButton = ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
'select new pop-up window
marker = 0
Set objshell = CreateObject("Shell.Application")
IE_count = objshell.Windows.Count
For x = 0 To (IE_count - 1)
On Error Resume Next ' sometimes more web pages are counted than are open
my_title = objshell.Windows(x).document.Title
If my_title Like "Consolidated Monthly Balance Sheet" & "*" Then 'compare to find if the desired web page is already open
Set ie = objshell.Windows(x)
marker = 1
Exit For
Else
End If
Next
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
Dim doc As MSHTML.HTMLDocument
Dim tables As MSHTML.IHTMLElementCollection
Dim table As MSHTML.HTMLTable
Dim clipboard As MSForms.DataObject
Set doc = ie.document
Set tables = doc.getElementsByTagName("body")
Set table = tables(0)
Set clipboard = New MSForms.DataObject
'paste in sheets
Dim test
Set test = ActiveWorkbook.Sheets("Test")
clipboard.SetText table.outerHTML
clipboard.PutInClipboard
test.Range("A1").PasteSpecial xlPasteAll
clipboard.Clear
MsgBox ("Task Completed")
End Sub
Your help is greatly appreciated!
You were using the current test with document.Title. I found that For Each of all windows looking for the full title worked in combination with copy pasting the pop-up window outerHTML. No additional wait time was required.
Inside the For Each Loop, after you reset the IE instance to the new window, you can obtain the new URL with ie.document.url. As you already have the data loaded you might as well just copy paste it straight away in my opinion.
Code:
Option Explicit
Public Sub GetOsfiFinancialData()
Dim UrlAddress As String, objButton, ie As Object
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = "Z005"
Set objButton = .document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
Dim objShellWindows As New SHDocVw.ShellWindows, currentWindow As IWebBrowser2
For Each currentWindow In objShellWindows
If currentWindow.document.Title = "Consolidated Monthly Balance Sheet - Banks, Trust and Loan" Then
Set ie = currentWindow
Exit For
End If
Next
Dim clipboard As Object
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText ie.document.body.outerHTML
clipboard.PutInClipboard
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
.Quit
End With
End Sub
References (VBE > Tools > References):
Microsoft Internet Controls
I don't have time to get into all the stuff about controlling one browser from another, but I think you can figure that part out, especially since you made some great progress on this already. Get URL#2 from URL#1, like you are doing, but with some better data controls around it, and then do this...
Option Explicit
Sub Web_Table_Option_One()
Dim xml As Object
Dim html As Object
Dim objTable As Object
Dim result As String
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Set xml = CreateObject("MSXML2.XMLHTTP.6.0")
With xml
.Open "GET", "http://ws1.osfi-bsif.gc.ca/WebApps/Temp/2f40b7ef-d024-4eca-a8a3-fb82153efafaFinancialData.aspx", False
.send
End With
result = xml.responseText
Set html = CreateObject("htmlfile")
html.body.innerHTML = result
Set objTable = html.getElementsByTagName("Table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End Sub

runtime error 462 with internet explorer

I am trying to access a webpage from excel using VBA. I can get internet explorer to launch, and I see the webpage come up, but I get a runtime error 462 when I hit the Do Until internet.ReadyState >= 4 line of code. any ideas? ultimately I want to be able to parse a site and get a list of the links on that site and pick one, and then "click" on that link. suggestions and help would be great. here is the function (that I found on the web) that I am using:
Public Sub clicklick()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.count).End(xlUp).row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
thank you,
alan
I had a similar error with VBA and
using Set objIE = New InternetExplorerMedium instead of Set objIE = CreateObject("InternetExplorer.Application") did not solve the problem. I solved the issue by going to IE Settings -> Internet Options -> Security and unchecking the "Enable protected Mode" option
Try replcaing your lines :
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
With:
While internet.busy
DoEvents
Wend
Full Code (tested)
Option Explicit
Public Sub clicklick()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim h As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
internet.Visible = True
While internet.busy
DoEvents
Wend
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub

Resources