Excel VBA - Extracting data from web page - excel

I am trying to pull seller information from Amazon page with price by automating web browser. I am trying to run the below code, but the error I am getting is:
Object Variable or With Block variable not set.
Can someone guide me where i am going wrong.
Option Explicit
Sub RunNewModule()
Dim ie As InternetExplorer
Dim html As HTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://www.amazon.com/gp/offer-listing/B00SVA81Z2/ref=dp_olp_new_mbc?ie=UTF8&condition=new"
Dim priceData As Variant
Dim sellerdata As Variant
Dim item As Variant
Dim cntr As Integer
priceData = html.getElementsByClassName("olpOfferPrice").getElementsByTagName("span")(0).innerText
cntr = 1
For Each item In priceData
Range("B" & cntr) = item.innerText
cntr = cntr + 1
Next item
sellerdata = html.getElementsByClassName("olpSellerName").getElementsByTagName("span")(0).innerText
cntr = 1
For Each item In sellerdata
Range("A" & cntr) = item.innerText
cntr = cntr + 1
Next item
End Sub

You didn't assign html and it's null now.
You should assign it this way:
Set html= ie.Document
To get an element by it's class name:
Dim ie As InternetExplorer
Dim html As IHTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://stackoverflow.com/questions/34463544/vba-fetching-data-from-class-name"
While ie.Busy
DoEvents
Wend
While ie.ReadyState < 4
DoEvents
Wend
Set html = ie.Document
Dim elements As IHTMLElementCollection
Set elements = html.getElementsByClassName("question-hyperlink")
If elements.Length > 0 Then
MsgBox elements(0).innerText
End If
ie.Quit
Set ie = Nothing
Don't forget to add reference to:
Microsoft Internet Controls
Microsoft Html Object library
For that amazon link:
Dim ie As InternetExplorer
Dim html As HTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://www.amazon.in/gp/offer-listing/B00EYCBGNA/ref=dp_olp_new_mbc?ie=UTF8&condition=new"
While ie.Busy
DoEvents
Wend
While ie.ReadyState < 4
DoEvents
Wend
Set html = ie.Document
Dim elements As IHTMLElementCollection
Set elements = html.getElementsByClassName("olpOfferPrice")
For i = 0 To elements.Length - 1
Sheet1.Range("A" & (i + 1)) = elements(i).innerText
Next i
Set elements = html.getElementsByClassName("olpSellerName")
For i = 0 To elements.Length - 1
Sheet1.Range("B" & (i + 1)) = elements(i).innerText
Next i
ie.Quit
Set ie = Nothing

Related

How to scrape a table?

The website I am trying to scrape: https://apps.nl.tele2.nl/zakelijk/postcodecheck/business-extern
It is a tool that scrapes zip codes + availabilities
Sub Pulldatafromweb()
Dim IE As Object
Dim doc As HTMLDocument
Dim ieobj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer
i = 1
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate "https://apps.nl.tele2.nl/zakelijk/postcodecheck/business-extern"
Do While IE.Busy Or IE.readyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE.document
doc.getElementById("postcode").Value = ThisWorkbook.Sheets("sheet2").Range("A3").Value
doc.getElementById("housenumber").Value = ThisWorkbook.Sheets("sheet2").Range("b3").Value
doc.getElementById("submit").Click
For Each htmlelement In ieobj.document.getElementsByClassName("wrapper")(0).getElementsByTagName("TR")
With ActiveSheet
.Range("A" & i).Value = htmlEle.Children(0).textContent
.Range("B" & i).Value = htmlEle.Children(1).textContent
.Range("C" & i).Value = htmlEle.Children(2).textContent
End With
i = i + 1
Next
End Sub
It seems there is an issue with the name of the table.
You appear to be looking for the wrong element, the table you want has the classname 'infotable'.
This code returns the data from that table.
Sub Pulldatafromweb()
Dim IE As Object
Dim doc As HTMLDocument
Dim ieobj As InternetExplorer
Dim htmlElement As IHTMLElement
Dim i As Long
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate "https://apps.nl.tele2.nl/zakelijk/postcodecheck/business-extern"
Do While IE.Busy Or IE.readyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE.document
doc.getElementById("postcode").Value = ThisWorkbook.Sheets("sheet2").Range("A3").Value
doc.getElementById("housenumber").Value = ThisWorkbook.Sheets("sheet2").Range("b3").Value
doc.getElementById("submit").Click
Set doc = IE.document
For Each htmlElement In doc.getElementsByClassName("infotable")(0).getElementsByTagName("tr")
i = i + 1
With ActiveSheet
.Range("A" & i).Value = htmlElement.Children(0).textContent
.Range("B" & i).Value = htmlElement.Children(1).textContent
.Range("C" & i).Value = htmlElement.Children(2).textContent
End With
Next htmlElement
End Sub

Scrape CDC vaccination data using VBA

I am trying to scrape the vaccination data from the below CDC website:
https://covid.cdc.gov/covid-data-tracker/#vaccinations
I have tried querySelectorAll but no luck. Can anyone help take a look? Much appreciated!
Sub useClassnames()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
.Visible = False
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementById("maincontent")
Debug.Print allRowOfData.innerHTML
'Set element = appIE.document.querySelectorAll(".container mt-5")
'For Each elements In element
' Debug.Print elements
'Next elements
'For Each element In allRowOfData
'Debug.Print element
'Next element
End Sub
Here you have, just change your worksheet name or number :)
Option Explicit
Const updatedCol = 1
Const dosesDistributedColVal = 2
Const peopleInicVaccColVal = 3
Sub useClassnames()
'declare worksheet variable and set headers
Dim targetWsh As Worksheet: Set targetWsh = ThisWorkbook.Sheets(1)
targetWsh.Cells(1, 1).Value = "Last Update"
targetWsh.Cells(1, 2).Value = "Doses Distributed"
targetWsh.Cells(1, 3).Value = "People Initiating Vaccination"
Dim lstRegisterRow As Long: lstRegisterRow = targetWsh.Range("A" & targetWsh.Rows.Count).End(xlUp).Row + 1
'open IE and navigate to site
Dim appIE As InternetExplorer: Set appIE = New InternetExplorer
appIE.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
appIE.Visible = False
While appIE.Busy = True Or appIE.readyState < 4: DoEvents: Wend
Dim oHtmlDoc As HTMLDocument: Set oHtmlDoc = appIE.document
Dim oHtmlElementColl As IHTMLElementCollection
'Get and write last update date
Application.Wait (Now + TimeValue("0:00:02")) 'wait 2 secs to avoid error, if recieve error, add seconds as needed
Set oHtmlElementColl = oHtmlDoc.getElementsByTagName("small")
targetWsh.Cells(lstRegisterRow, updatedCol) = oHtmlElementColl(0).innerHTML
'Get and write Doses Distributed and People Initiating Vaccination
Set oHtmlElementColl = oHtmlDoc.GetElementsByClassName("card-number")
targetWsh.Cells(lstRegisterRow, dosesDistributedColVal) = oHtmlElementColl(0).innerText
targetWsh.Cells(lstRegisterRow, peopleInicVaccColVal) = oHtmlElementColl(1).innerText
appIE.Quit
End Sub

VBA Get webpage Link and Download File to

Is a long time ago to programming VBA. I want to fetch in a web Table all Linked PDF Files in coll um (x). First I have created the logins and navigate to the site to fetch file.
Site table:
Sub goToShopWH()
Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim clip As Object
'Table Grapping
Dim ieTable As Object
'create a new instance of ie
'Set ieApp = New InternetExplorer
Set ieApp = CreateObject("InternetExplorer.Application")
'you don’t need this, but it’s good for debugging
ieApp.Visible = True
'assume we’re not logged in and just go directly to the login page
ieApp.Navigate "https://website.com/ishop/Home.html"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents:
Loop
' Login to site
Set ieDoc = ieApp.Document
'fill in the login form – View Source from your browser to get the control names
With ieDoc '.forms("loginForm_0")
.getElementById("loginField").Value = "username"
.getElementById("password").Value = "Password"
.getElementById("loginSubmit").Click
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
'now that we’re in, go to the page we want
'Switsh to search form
ieApp.Navigate "https://website.com/ishop/account/MyAccount,$comp$account$AccountNavigation.orderHistory.sdirect?sp=Saccount%2FOrderHistory"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
' fillout form
Set ieDoc = ieApp.Document
With ieDoc '.forms("Form_0")
.getElementById("PropertySelection_2").Value = "7"
.getElementById("commtxt").Value = "190055" 'Projekt Nummer oder Text.
.getElementById("Submit_0").Click
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
With ieDoc '.forms("Form_1")
.getElementById("PropertySelection").Value = "3"
.getElementById("PropertySelection").FireEvent ("onchange")
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
End With
Set webpage = ieApp.Document
Set table_data = webpage.getElementsByTagName("tr")
End Sub
Please help my to solve this to get I want import Table to sheet2 and download all PDF in ("Table"),("tbody")(tr)(1 to X),(td)(10),(href).click
I think you are after something like this.
Sub webpage()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
Or, perhaps this.
' place your URL in cell L1
Sub HREF_Web()
Dim doc As HTMLDocument
Dim output As Object
Set IE = New InternetExplorer
IE.Visible = False
IE.navigate Range("L1")
Do
'DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set doc = IE.document
Set output = doc.getElementsByTagName("a")
i = 5
For Each link In output
'If link.InnerHTML = "" Then
Range("A" & i).Value2 = link
' End If
i = i + 1
Next
MsgBox "Done!"
End Sub

Extracting HTML code from websites by looping through list of URLs

I'm using Excel VBA to launch an IE browser tab based on the URL in each of the rows in column D. Then the relevant HTML code is extracted based on pre-defined classes and populated in columns A - C.
Pretty sure I missed a step. The process stops at D2 and does not proceed to extract HTML from the next URLs (in cells D3, D4, etc).
Thanks in advance for any suggestions!
Sub useClassnames()
Dim element As IHTMLElement
Dim elements As IHTMLElementCollection
Dim IE As InternetExplorer
Dim html As HTMLDocument
Dim shellWins As New ShellWindows
Dim IE_TabURL As String
Dim intRowPosition As Integer
Set IE = New InternetExplorer
IE.Visible = False
intRowPosition = 2
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate Sheet1.Range("D" & intRowPosition)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
While Sheet1.Range("D" & intRowPosition) <> vbNullString
IE.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
Wend
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading Web page…"
DoEvents
Loop
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long
Dim erow As Long
count = 0
For Each element In elements
If element.className = "container-bs" Then
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
count = count + 1
End If
Next element
Range("A2:C2000").Select
Columns("A:A").EntireColumn.AutoFit
Columns("B:B").ColumnWidth = 36
End Sub
Your lines
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
etc happen after the While loop. It needs to be inside.
Your If statement:
If element.className = "container-bs"
should be redundant as you are already looping over a collection of that classname; so I have removed this.
You are not working off element in the loop, so essentially you are using it to control your incremented counter variable. This suggests you can use a better coding strategy for retrieving the items of interest.
Always state the parent worksheet and don't rely on implicit Activesheet references - that is bug prone.
I would expect a structure more like as follows (I cannot account for refactoring to remove element)
Option Explicit
Public Sub UseClassnames()
Dim element As IHTMLElement, elements As IHTMLElementCollection, ie As InternetExplorer
Dim html As HTMLDocument, intRowPosition As Long
intRowPosition = 2
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
While Sheet1.Range("D" & intRowPosition) <> vbNullString
If intRowPosition = 2 Then
ie.navigate Sheet1.Range("D" & intRowPosition)
Else
ie.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
End If
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
Set html = ie.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long, erow As Long
count = 0
For Each element In elements
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
With Sheet1
.Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
.Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
.Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
End With
count = count + 1
Next element
intRowPosition = intRowPosition + 1
Wend
With Sheet1
.Range("A2:C2000").Select
.Columns("A:A").EntireColumn.AutoFit
.Columns("B:B").ColumnWidth = 36
End With
End Sub

scrape with excel vba changing input data before scraping

In am trying to change the data on the website for an input field and have that information refreshed on the page. I have updated the input field, but I am not sure how to refresh the page so that the inner table uses the new data from the input field
Below is my code:
Dim IE As InternetExplorer
Dim htmldoc As HTMLDocument
Dim ieURL As String
Dim sPicker As String
ieURL = "https://www.investing.com/commodities/crude-oil-historical-data"
sPicker = "10/01/2017 - 12/31/2017"
'Open InternetExplorer
Set IE = New InternetExplorer
IE.Visible = True
IE.Navigate ieURL
Set htmldoc = IE.document 'Document webpage
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Dim drp As HTMLFormElement
Set drp = htmldoc.getElementById("widgetFieldDateRange")
drp.innerText = sPicker 'Set the new timeframe for scraping
Dim inpt As HTMLInputElement
Set inpt = htmldoc.getElementById("picker")
inpt.Value = sPicker 'Set the new timeframe for scraping
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Thanks for your help
Try the below script. It should solve the issue.
Sub Web_Data()
Dim IE As New InternetExplorer, html As New HTMLDocument
Dim post As Object, elem As Object, t_data As Object
Dim trow As Object, tcel As Object
With IE
.Visible = True
.navigate "https://www.investing.com/commodities/crude-oil-historical-data"
While .readyState < 4: DoEvents: Wend
Set html = .document
End With
Application.Wait Now + TimeValue("00:00:05")
html.getElementById("widgetFieldDateRange").Click
Application.Wait Now + TimeValue("00:00:03")
Set post = html.getElementById("startDate")
post.innerText = ""
post.Focus
Application.SendKeys "10/01/2017"
Application.Wait Now + TimeValue("00:00:03")
Set elem = html.getElementById("endDate")
elem.innerText = ""
elem.Focus
Application.SendKeys "12/31/2017"
Application.Wait Now + TimeValue("00:00:03")
html.getElementById("applyBtn").Click
Application.Wait Now + TimeValue("00:00:03")
Set t_data = html.getElementById("curr_table")
For Each trow In t_data.Rows
For Each tcel In trow.Cells
y = y + 1: Cells(x + 1, y) = tcel.innerText
Next tcel
y = 0
x = x + 1
Next trow
End Sub
Reference to add to the library:
1. Microsoft Internet Controls
2. Microsoft HTML Object Library

Resources