scrape with excel vba changing input data before scraping - excel

In am trying to change the data on the website for an input field and have that information refreshed on the page. I have updated the input field, but I am not sure how to refresh the page so that the inner table uses the new data from the input field
Below is my code:
Dim IE As InternetExplorer
Dim htmldoc As HTMLDocument
Dim ieURL As String
Dim sPicker As String
ieURL = "https://www.investing.com/commodities/crude-oil-historical-data"
sPicker = "10/01/2017 - 12/31/2017"
'Open InternetExplorer
Set IE = New InternetExplorer
IE.Visible = True
IE.Navigate ieURL
Set htmldoc = IE.document 'Document webpage
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Dim drp As HTMLFormElement
Set drp = htmldoc.getElementById("widgetFieldDateRange")
drp.innerText = sPicker 'Set the new timeframe for scraping
Dim inpt As HTMLInputElement
Set inpt = htmldoc.getElementById("picker")
inpt.Value = sPicker 'Set the new timeframe for scraping
' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop
Thanks for your help

Try the below script. It should solve the issue.
Sub Web_Data()
Dim IE As New InternetExplorer, html As New HTMLDocument
Dim post As Object, elem As Object, t_data As Object
Dim trow As Object, tcel As Object
With IE
.Visible = True
.navigate "https://www.investing.com/commodities/crude-oil-historical-data"
While .readyState < 4: DoEvents: Wend
Set html = .document
End With
Application.Wait Now + TimeValue("00:00:05")
html.getElementById("widgetFieldDateRange").Click
Application.Wait Now + TimeValue("00:00:03")
Set post = html.getElementById("startDate")
post.innerText = ""
post.Focus
Application.SendKeys "10/01/2017"
Application.Wait Now + TimeValue("00:00:03")
Set elem = html.getElementById("endDate")
elem.innerText = ""
elem.Focus
Application.SendKeys "12/31/2017"
Application.Wait Now + TimeValue("00:00:03")
html.getElementById("applyBtn").Click
Application.Wait Now + TimeValue("00:00:03")
Set t_data = html.getElementById("curr_table")
For Each trow In t_data.Rows
For Each tcel In trow.Cells
y = y + 1: Cells(x + 1, y) = tcel.innerText
Next tcel
y = 0
x = x + 1
Next trow
End Sub
Reference to add to the library:
1. Microsoft Internet Controls
2. Microsoft HTML Object Library

Related

Use Excel VBA to Extract Data From a Webpage

I'm trying to use Excel VBA to extract some data from a webpage (https://www.churchofjesuschrist.org/maps/meetinghouses/lang=eng&q=1148+W+100+N). The code I'm using will open Internet Explorer, navigate to the website, and it will extract the top most result. But I can't seem to figure out how to extract the rest of the results (i.e. ward, language, contact name, contact #). Thoughts?
Sub MeethinghouseLocator()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy
DoEvents
Wend
Application.Wait (Now + TimeValue("0:00:01"))
IE.document.querySelector("button.search-input__execute.button--primary").Click
Dim Doc As HTMLDocument
Set Doc = IE.document
Application.Wait (Now + TimeValue("0:00:01"))
'WardName
Dim aaaaFONT As String
aaaaFONT = Trim(Doc.getElementsByClassName("location-header__name ng-binding")(0).innerText)
Sheets("Sheet1").Range("D6").Value = aaaaFONT
Application.Wait (Now + TimeValue("0:00:01"))
'Language
Dim aaabFONT As String
aaabFONT = Trim(Doc.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
Sheets("Sheet1").Range("E6").Value = aaabFONT
'Click 1st Link
IE.document.getElementsByClassName("location-header__name ng-binding")(0).Click
Application.Wait (Now + TimeValue("0:00:01"))
'Contact Name
Dim aaacFONT As String
aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
Sheets("Sheet1").Range("H6").Value = aaacFONT
'Contact Name Function
Range("F6").Select
ActiveCell.FormulaR1C1 = _
"=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
'Contact Phone Number
Dim aaadFONT As String
aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
Sheets("Sheet1").Range("G6").Value = aaadFONT
IE.Quit
End Sub
Most of your code works actually so I'm not sure what issue are you facing but you didn't account for the loading after clicking each link so I have added While loop to check for its Ready and ReadyState property before continuing.
EDIT: The code now loops through all the wards listed in the result, the idea is to keep the first IE at the result page and pass the URL of the ward and the input row to sub ExtractWard where it will open another IE, navigate to the given URL and extract the ward details.
Sub MeethinghouseLocator()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
IE.document.querySelector("button.search-input__execute.button--primary").Click
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
Dim Doc As HTMLDocument
Set Doc = IE.document
Application.Wait (Now + TimeValue("0:00:01"))
Dim wardContent As Object
Set wardContent = Doc.getElementsByClassName("maps-card__content")(2)
Dim wardCollection As Object
Set wardCollection = wardContent.getElementsByClassName("location-header")
Dim rowNum As Long
rowNum = 6
Dim i As Long
For i = 0 To wardCollection.Length - 1
With wardCollection(i)
'WardName
Dim aaaaFONT As String
aaaaFONT = Trim(.getElementsByClassName("location-header__name ng-binding")(0).innerText)
Sheets("Sheet1").Cells(rowNum, "D").Value = aaaaFONT
'Language
Dim aaabFONT As String
aaabFONT = Trim(.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
Sheets("Sheet1").Cells(rowNum, "E").Value = aaabFONT
Dim wardURL As String
wardURL = .getElementsByClassName("location-header__name ng-binding")(0).href
ExtractWard wardURL, rowNum
End With
rowNum = rowNum + 1
Next i
Set Doc = Nothing
IE.Quit
Set IE = Nothing
End Sub
Private Sub ExtractWard(argURL As String, argRow As Long)
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate argURL
IE.Visible = True
While IE.Busy Or IE.readyState <> 4
DoEvents
Wend
Dim Doc As HTMLDocument
Set Doc = IE.document
'Contact Name
Dim aaacFONT As String
aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
Sheets("Sheet1").Cells(argRow, "H").Value = aaacFONT
'Contact Name Function
Sheets("Sheet1").Cells(argRow, "F").FormulaR1C1 = _
"=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
'Contact Phone Number
Dim aaadFONT As String
aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
Sheets("Sheet1").Cells(argRow, "G").Value = aaadFONT
Set Doc = Nothing
IE.Quit
Set IE = Nothing
End Sub

VBA Get webpage Link and Download File to

Is a long time ago to programming VBA. I want to fetch in a web Table all Linked PDF Files in coll um (x). First I have created the logins and navigate to the site to fetch file.
Site table:
Sub goToShopWH()
Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim clip As Object
'Table Grapping
Dim ieTable As Object
'create a new instance of ie
'Set ieApp = New InternetExplorer
Set ieApp = CreateObject("InternetExplorer.Application")
'you don’t need this, but it’s good for debugging
ieApp.Visible = True
'assume we’re not logged in and just go directly to the login page
ieApp.Navigate "https://website.com/ishop/Home.html"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents:
Loop
' Login to site
Set ieDoc = ieApp.Document
'fill in the login form – View Source from your browser to get the control names
With ieDoc '.forms("loginForm_0")
.getElementById("loginField").Value = "username"
.getElementById("password").Value = "Password"
.getElementById("loginSubmit").Click
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
'now that we’re in, go to the page we want
'Switsh to search form
ieApp.Navigate "https://website.com/ishop/account/MyAccount,$comp$account$AccountNavigation.orderHistory.sdirect?sp=Saccount%2FOrderHistory"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
' fillout form
Set ieDoc = ieApp.Document
With ieDoc '.forms("Form_0")
.getElementById("PropertySelection_2").Value = "7"
.getElementById("commtxt").Value = "190055" 'Projekt Nummer oder Text.
.getElementById("Submit_0").Click
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
With ieDoc '.forms("Form_1")
.getElementById("PropertySelection").Value = "3"
.getElementById("PropertySelection").FireEvent ("onchange")
End With
Do
DoEvents
Loop Until ieApp.ReadyState = 4
Application.Wait Now + TimeSerial(0, 0, 5)
End With
Set webpage = ieApp.Document
Set table_data = webpage.getElementsByTagName("tr")
End Sub
Please help my to solve this to get I want import Table to sheet2 and download all PDF in ("Table"),("tbody")(tr)(1 to X),(td)(10),(href).click
I think you are after something like this.
Sub webpage()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.google.co.in/search?q=how+to+program+in+vba"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getelementbyid("res")
Set header_links = div_result.getelementsbytagname("h3")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
Or, perhaps this.
' place your URL in cell L1
Sub HREF_Web()
Dim doc As HTMLDocument
Dim output As Object
Set IE = New InternetExplorer
IE.Visible = False
IE.navigate Range("L1")
Do
'DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set doc = IE.document
Set output = doc.getElementsByTagName("a")
i = 5
For Each link In output
'If link.InnerHTML = "" Then
Range("A" & i).Value2 = link
' End If
i = i + 1
Next
MsgBox "Done!"
End Sub

Web scraping in Investing.com with Excel vba

I have no knowledge of vba. Only the macro recorder is used.
I need to download the data from a web page to an Excel spreadsheet and with my knowledge of vba I am not capable.
In particular, what I want to do a macro to download to Excel a data table of the page: https://www.investing.com/equities/cellnex-telecom-historical-data
This download would have to be configured in terms of time, date range and ordering.
The steps would be the following:
1.- The objective is to copy the data from the "CLNX historical data" table to an Excel spreadsheet.
2.- That download should be done by previously selecting "Monthly" in the drop-down menu by calling "Term".
3.- That the download is made by previously selecting the range of dates for the last 2 years.
4.- Finally, order the table in descending order by the column "Maximum".
5.- Once the term, the date range and the order are selected, copy the data from the "CLNX historical data" table to an Excel spreadsheet.
I have tried with the macro recorder but I am not able to configure the term, the date range or the ordering.
Could someone help me?
Thanks for your help.
The code:
Sub DataInvesting()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"
Do Until IE.readyState = 4
DoEvents
Loop
IE.Document.getElementsByClassName("newInput selectBox float_lang_base_1")(0).Value = "Monthly"
IE.Visible = True
Set IE = Nothing
Set appIE = Nothing
End Sub
I have just tested the following code and it works, instead of creating an instance of internet explorer every time we need to run this macro, we will use xmlhttp requests. Just copy the entire code and paste it into a module in vba. Don't forget to add references (Tools/References) to Microsoft HTML Object Library and Microsoft XML v6.0.
Option Explicit
Sub Export_Table()
'Html Objects---------------------------------------'
Dim htmlDoc As MSHTML.HTMLDocument
Dim htmlBody As MSHTML.htmlBody
Dim ieTable As MSHTML.HTMLTable
Dim Element As MSHTML.HTMLElementCollection
'Workbooks, Worksheets, Ranges, LastRow, Incrementers ----------------'
Dim wb As Workbook
Dim Table As Worksheet
Dim i As Long
Set wb = ThisWorkbook
Set Table = wb.Worksheets("Sheet1")
'-------------------------------------------'
Dim xmlHttpRequest As New MSXML2.XMLHTTP60 '
'-------------------------------------------'
i = 2
'Web Request --------------------------------------------------------------------------'
With xmlHttpRequest
.Open "POST", "https://www.investing.com/instruments/HistoricalDataAjax", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.send "curr_id=951681&smlID=1695217&header=CLNX+Historical+Data&st_date=01%2F01%2F2017&end_date=03%2F01%2F2019&interval_sec=Monthly&sort_col=date&sort_ord=DESC&action=historical_data"
If .Status = 200 Then
Set htmlDoc = CreateHTMLDoc
Set htmlBody = htmlDoc.body
htmlBody.innerHTML = xmlHttpRequest.responseText
Set ieTable = htmlDoc.getElementById("curr_table")
For Each Element In ieTable.getElementsByTagName("tr")
Table.Cells(i, 1) = Element.Children(0).innerText
Table.Cells(i, 2) = Element.Children(1).innerText
Table.Cells(i, 3) = Element.Children(2).innerText
Table.Cells(i, 4) = Element.Children(3).innerText
Table.Cells(i, 5) = Element.Children(4).innerText
Table.Cells(i, 6) = Element.Children(5).innerText
Table.Cells(i, 7) = Element.Children(6).innerText
i = i + 1
DoEvents: Next Element
End If
End With
Set xmlHttpRequest = Nothing
Set htmlDoc = Nothing
Set htmlBody = Nothing
Set ieTable = Nothing
Set Element = Nothing
End Sub
Public Function CreateHTMLDoc() As MSHTML.HTMLDocument
Set CreateHTMLDoc = CreateObject("htmlfile")
End Function
I can't test this as despite setting up a free account it keeps saying the password is wrong. Fed up with 5 password resets and same problem and suspect it want my social media details.
The following broadly outlines steps I would consider though some timed waits are most likely needed.
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub Info()
Dim ie As New InternetExplorer
Const URL As String = ""https://www.investing.com/equities/cellnex-telecom-historical-data""
With ie
.Visible = True
.Navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector(".login").Click
While .Busy Or .readyState < 4: DoEvents: Wend
.Navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
With .document.querySelector("#loginFormUser_email")
.Focus
.Value = "Bob#gmail.com"
End With
With .document.querySelector("#loginForm_password")
.Focus
.Value = "systemSucksDoesn'tAcceptMyPassword"
End With
Application.Wait Now + TimeSerial(0, 0, 2)
.document.querySelector("[onclick*=submitLogin]").Click
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector("#data_interval").Click
.document.querySelector("[value=Monthly]").Click
With .document.querySelector("#picker")
.Focus
.Value = "03/08/2017 - 03/08/2019"
.FireEvent "onchange"
End With
'TODO Sorting column when clarified which column
.document.querySelector("[title='Download Data']").Click
Application.Wait Now + TimeSerial(0, 0, 10)
Stop
.Quit
End With
End Sub
Try this.
Sub Web_Table_Option()
Dim HTMLDoc As New HTMLDocument
Dim objTable As Object
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Dim objIE As InternetExplorer
Set objIE = New InternetExplorer
objIE.Navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"
Do Until objIE.ReadyState = 4 And Not objIE.Busy
DoEvents
Loop
Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
HTMLDoc.body.innerHTML = objIE.Document.body.innerHTML
With HTMLDoc.body
Set objTable = .getElementsByTagName("curr_table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End With
objIE.Quit
End Sub

VBA: copy data from website into excel

I have a VBA code that selects info from drop-down menus on a government website and then submits the query. The requested data then opens up in another IE page. I am trying to copy this data into excel; however, I am unable to do so.
My code currently copies the text on the first IE page that contains the drop-down menus. The government website is: http://www.osfi-bsif.gc.ca/Eng/wt-ow/Pages/FINDAT.aspx
I have look all over the internet for a solution but nothing seems to work...
Here is my code:
Sub GetOsfiFinancialData()
Dim UrlAddress As String
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Dim ie As Object
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
End With
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
'Select Bank
ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = Z005
'open window with financial data
Dim objButton
Set objButton = ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
'select new pop-up window
marker = 0
Set objshell = CreateObject("Shell.Application")
IE_count = objshell.Windows.Count
For x = 0 To (IE_count - 1)
On Error Resume Next ' sometimes more web pages are counted than are open
my_title = objshell.Windows(x).document.Title
If my_title Like "Consolidated Monthly Balance Sheet" & "*" Then 'compare to find if the desired web page is already open
Set ie = objshell.Windows(x)
marker = 1
Exit For
Else
End If
Next
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
Dim doc As MSHTML.HTMLDocument
Dim tables As MSHTML.IHTMLElementCollection
Dim table As MSHTML.HTMLTable
Dim clipboard As MSForms.DataObject
Set doc = ie.document
Set tables = doc.getElementsByTagName("body")
Set table = tables(0)
Set clipboard = New MSForms.DataObject
'paste in sheets
Dim test
Set test = ActiveWorkbook.Sheets("Test")
clipboard.SetText table.outerHTML
clipboard.PutInClipboard
test.Range("A1").PasteSpecial xlPasteAll
clipboard.Clear
MsgBox ("Task Completed")
End Sub
Your help is greatly appreciated!
You were using the current test with document.Title. I found that For Each of all windows looking for the full title worked in combination with copy pasting the pop-up window outerHTML. No additional wait time was required.
Inside the For Each Loop, after you reset the IE instance to the new window, you can obtain the new URL with ie.document.url. As you already have the data loaded you might as well just copy paste it straight away in my opinion.
Code:
Option Explicit
Public Sub GetOsfiFinancialData()
Dim UrlAddress As String, objButton, ie As Object
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = "Z005"
Set objButton = .document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
Dim objShellWindows As New SHDocVw.ShellWindows, currentWindow As IWebBrowser2
For Each currentWindow In objShellWindows
If currentWindow.document.Title = "Consolidated Monthly Balance Sheet - Banks, Trust and Loan" Then
Set ie = currentWindow
Exit For
End If
Next
Dim clipboard As Object
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText ie.document.body.outerHTML
clipboard.PutInClipboard
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
.Quit
End With
End Sub
References (VBE > Tools > References):
Microsoft Internet Controls
I don't have time to get into all the stuff about controlling one browser from another, but I think you can figure that part out, especially since you made some great progress on this already. Get URL#2 from URL#1, like you are doing, but with some better data controls around it, and then do this...
Option Explicit
Sub Web_Table_Option_One()
Dim xml As Object
Dim html As Object
Dim objTable As Object
Dim result As String
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Set xml = CreateObject("MSXML2.XMLHTTP.6.0")
With xml
.Open "GET", "http://ws1.osfi-bsif.gc.ca/WebApps/Temp/2f40b7ef-d024-4eca-a8a3-fb82153efafaFinancialData.aspx", False
.send
End With
result = xml.responseText
Set html = CreateObject("htmlfile")
html.body.innerHTML = result
Set objTable = html.getElementsByTagName("Table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End Sub

Excel VBA - Extracting data from web page

I am trying to pull seller information from Amazon page with price by automating web browser. I am trying to run the below code, but the error I am getting is:
Object Variable or With Block variable not set.
Can someone guide me where i am going wrong.
Option Explicit
Sub RunNewModule()
Dim ie As InternetExplorer
Dim html As HTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://www.amazon.com/gp/offer-listing/B00SVA81Z2/ref=dp_olp_new_mbc?ie=UTF8&condition=new"
Dim priceData As Variant
Dim sellerdata As Variant
Dim item As Variant
Dim cntr As Integer
priceData = html.getElementsByClassName("olpOfferPrice").getElementsByTagName("span")(0).innerText
cntr = 1
For Each item In priceData
Range("B" & cntr) = item.innerText
cntr = cntr + 1
Next item
sellerdata = html.getElementsByClassName("olpSellerName").getElementsByTagName("span")(0).innerText
cntr = 1
For Each item In sellerdata
Range("A" & cntr) = item.innerText
cntr = cntr + 1
Next item
End Sub
You didn't assign html and it's null now.
You should assign it this way:
Set html= ie.Document
To get an element by it's class name:
Dim ie As InternetExplorer
Dim html As IHTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://stackoverflow.com/questions/34463544/vba-fetching-data-from-class-name"
While ie.Busy
DoEvents
Wend
While ie.ReadyState < 4
DoEvents
Wend
Set html = ie.Document
Dim elements As IHTMLElementCollection
Set elements = html.getElementsByClassName("question-hyperlink")
If elements.Length > 0 Then
MsgBox elements(0).innerText
End If
ie.Quit
Set ie = Nothing
Don't forget to add reference to:
Microsoft Internet Controls
Microsoft Html Object library
For that amazon link:
Dim ie As InternetExplorer
Dim html As HTMLDocument
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
ie.Navigate "http://www.amazon.in/gp/offer-listing/B00EYCBGNA/ref=dp_olp_new_mbc?ie=UTF8&condition=new"
While ie.Busy
DoEvents
Wend
While ie.ReadyState < 4
DoEvents
Wend
Set html = ie.Document
Dim elements As IHTMLElementCollection
Set elements = html.getElementsByClassName("olpOfferPrice")
For i = 0 To elements.Length - 1
Sheet1.Range("A" & (i + 1)) = elements(i).innerText
Next i
Set elements = html.getElementsByClassName("olpSellerName")
For i = 0 To elements.Length - 1
Sheet1.Range("B" & (i + 1)) = elements(i).innerText
Next i
ie.Quit
Set ie = Nothing

Resources