Scrape text from a website using Excel VBA - excel

I found this article explaining how to scrape certain tags from a website using Excel VBA.
The code below gets the content from the first <p> tag that it finds:
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim lastrow As Long
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
Cells(i, 2) = doc.title
On Error GoTo err_clear
Cells(i, 3) = doc.GetElementsByTagName("p")(0).innerText
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Range(Cells(i, 1), Cells(i, 3)).Columns.AutoFit
Next i
End Sub
I'd like to make the scraper get all the content that is within a <p> tag on a webpage. So I guess a foreach functionality of some kind is missing.
How can the content from multiple <p> tags be collected?
UPDATE
The working code!
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim lastrow As Long
Dim i As Integer
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
Cells(i, 2) = doc.Title
On Error GoTo err_clear
Dim el As Object
For Each el In doc.GetElementsByTagName("p")
counter = counter + 1
Cells(i, counter + 2).Value = Cells(counter + 1).Value & el.innerText
Next el
counter = 0
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Range(Cells(i, 1), Cells(i, 10)).Columns.AutoFit
Next i
End Sub

You're almost there! doc.GetElementsByTagName("p") returns a collection of HTMLParagraphElement objects of which you accessed the first entry using doc.GetElementsByTagName("p")(0). As you allude to, a For Each loop would let you access each in turn:
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim lastrow As Long
Dim i As Integer
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
Cells(i, 2) = doc.Title
On Error GoTo err_clear
Dim el As Object
For Each el In doc.GetElementsByTagName("p")
Cells(i, 3).Value = Cells(i, 3).Value & ", " & el.innerText
Next el
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Range(Cells(i, 1), Cells(i, 3)).Columns.AutoFit
Next i
End Sub

If you just need to get the content of the webpage in plain text this code is more concise
Function WEBSITE_TEXT(Destination As String) As String
' Requires a reference to Microsoft XML, v6.0
' Draws on the stackoverflow answer at bit.ly/parseXML
Dim myRequest As XMLHTTP60
Dim myDomDoc As DOMDocument60
' Check and clean inputs
On Error GoTo exitRoute
If Destination = "" Then
WEBSITE_TEXT = ""
Exit Function
End If
' Read the XML data from the Google Maps API
Set myRequest = New XMLHTTP60
myRequest.Open "GET", Destination, False
myRequest.send
' Parse HTML content
Dim html As New HTMLDocument
Dim text As String
html.body.innerHTML = myRequest.responseText
' Return the website content
text = html.body.innerText
If Not html Is Nothing Then WEBSITE_TEXT = text
exitRoute:
' Tidy up
text = ""
Set myRequest = Nothing
End Function

Related

Excel Macro to change page number mentioned in javascript:__doPostBack when called by IE.Navigate

I wrote excel macro to fetch data from multiple pages ( here around 25-40 pages ) . I have managed to change pages and scrape all pages from every page .
Sub Fetch_Data()
Dim IE As Object
Dim httpReq As Object
Dim HTMLdoc As Object
Dim resultsTable As Object
Dim tRow As Object, tCell As Object
Dim destCell As Range
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = False
'Application.ScreenUpdating = False
Application.StatusBar = "Data Fetching in progress, please wait..."
IE.Navigate "https://www.bseindia.com/markets/debt/TradenSettlement.aspx" 'load the Backshop Loan Locator page
Do
DoEvents
Loop Until IE.ReadyState = 4
Set HTMLdoc = IE.Document
'LR = Cells(Rows.Count, 1).End(xlUp).Row
With ActiveSheet
'.Cells.ClearContents
Set destCell = .Range("A1")
End With
Set resultsTable = HTMLdoc.getElementById("ContentPlaceHolder1_GridViewrcdsFC")
For Each tRow In resultsTable.Rows
For Each tCell In tRow.Cells
destCell.Offset(tRow.RowIndex, tCell.cellIndex).Value = tCell.innerText
Next
Next
'________________________________________________________________________________________________________________________
'Go to Next page
'IE.Navigate "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$2')"
i = 2
For i = 2 To 50
If i = 2 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$2')"
On Error GoTo ErrorHandler
ElseIf i = 3 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$3')"
On Error GoTo ErrorHandler
ElseIf i = 4 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$4')"
On Error GoTo ErrorHandler
ElseIf i = 5 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$5')"
On Error GoTo ErrorHandler
ErrorHandler:
GoTo XYZ
End If
IE.Navigate Url
Do
DoEvents
Loop Until IE.ReadyState = 4
Url = ""
LR = Cells(Rows.Count, 1).End(xlUp).Row - 1
With ActiveSheet
'.Cells.ClearContents
Set destCell = .Range("A" & LR)
End With
Set resultsTable = HTMLdoc.getElementById("ContentPlaceHolder1_GridViewrcdsFC")
For Each tRow In resultsTable.Rows
For Each tCell In tRow.Cells
destCell.Offset(tRow.RowIndex, tCell.cellIndex).Value = tCell.innerText
Next
Next
Next i
'________________________________________________________________________________________________________________________
XYZ: IE.Quit
Application.StatusBar = "Data Fetching Completed"
MsgBox ("Data Successfully Fetched")
Application.StatusBar = ""
Dim lrow As Long
Dim index As Long
Dim header As String
header = Range("A1").Value
lrow = Range("A" & Rows.Count).End(xlUp).Row
For index = 2 To lrow
If Range("A" & index).Value = header Then Rows(index).Delete
Next
End Sub
I want to change pages automatically without writing every page , I tried something like below , but pages are not getting changed , how to loop through pages :
For i = 2 To 4
x = "Page$" + CStr(i)
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC'," & x & ")"
On Error GoTo ErrorHandler
ErrorHandler:
GoTo XYZ
You have to look if there are url links to the other pages on the current page, find a tag and loop all the webpages. You can also look voor the url of each page and hardcode it.
Example with urls beneath tag "a":
Set AElements = HTMLDoc.getElementsByTagName("a")
For Each AElement In AElements
If AElement.id = "xxxxxxxxx" Then
Cells(Cell, 27) = AElement.src 'I write URL in the 27th column
'AElement.href
End If
Next AElement

How to scrape div.address text?

I'm trying to access webpage elements that I may need to scrape.
I have a handle on how to access headers by tag name but I am having trouble with the div.address tag in the image.
The Inspect Element view
My code. Problem line is marked with +++++++:
Sub gettitleheader()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim lastrow As Long
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
On Error GoTo err_clear
Cells(i, 2) = doc.getelementsbytagname("h1")(0).innerText
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
On Error GoTo err2_clear
Cells(i, 3) = doc.getelementsbytagname("address")(0).innerHTML '+++++++
err2_clear:
If Err2 <> 0 Then
Err2.Clear
Resume Next
End If
wb.Quit
Range(Cells(i, 1), Cells(i, 4)).Columns.AutoFit
Next i
End Sub
In the image below, the h1 tag is scraped and populated but not the address text.
Final output image, (I'm trying to fill that empty box)
Answered my own question! so proud!
Sub gettitleheader()
Dim wb As Object
Dim doc As Object
Dim y As String
Dim z As String
Dim sURL As String
Dim lastrow As Long
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
On Error GoTo err_clear
Cells(i, 2) = doc.getElementsByTagName("h1")(0).innerText
y = doc.getElementsByClassName("address")(0).innerText
Do While (InStr(y, Chr(10)))
y = Right(y, Len(y) - 1)
Loop
z = y
Cells(i, 3) = z
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit

Web scraping with getElementsByTagName()

I want to import restaurant data like Restaurant name, phone number, website & address to excel but unfortunately I am getting ads & garbage data. I have created a code using http://automatetheweb.net/vba-getelementsbytagname-method/ website but it is not helping out. Please rectify the issue in my code.
Website:https://www.yellowpages.com/atlanta-ga/attorneys
Please donot refer json as it is not working on other webs.
Sub Yellowcom()
'Dim ieObj As InternetExplorer
Dim htmlELe As IHTMLElement
Dim HTML As HTMLDocument
Dim i As Integer
Dim URL As String
Dim URLParameter As String
Dim page As Long
Dim links As Object
Dim IE As Object
i = 1
Set IE = CreateObject("InternetExplorer.Application")
'Set ieObj = New InternetExplorer
IE.Visible = True
URL = "https://www.yellowpages.com/atlanta-ga/attorneys"
'Application.Wait Now + TimeValue("00:00:05")
For page = 2 To 4
If page > 1 Then URLParameter = "?page=" & page
IE.navigate URL & URLParameter
' Wait for the browser to load the page
Do Until IE.readyState = 4
DoEvents
Loop
Set HTML = IE.document
Set links = HTML.getElementsByClassName("info")
For Each htmlELe In links
With ActiveSheet
.Range("A" & i).Value = htmlELe.Children(0).textContent
.Range("B" & i).Value = htmlELe.getElementsByTagName("a")(0).href
.Range("C" & i).Value = htmlELe.Children(2).textContent
.Range("D" & i).Value = htmlELe.Children(2).querySelector("a[href]")
'links2 = htmlELe.getElementsByClassName("links")(1)
' .Range("D" & i).Value = links2.href
End With
i = i + 1
Next htmlELe
Next page
IE.Quit
Set IE = Nothing
End Sub
Required Output should be like this
I would use xhr rather than a browser and store data in an array for each page and write that out to sheet. You could really dimension one array to hold all results in advance based on results per page and number of pages but the below is still efficient
Option Explicit
Public Sub GetListings()
Dim html As HTMLDocument, page As Long, html2 As HTMLDocument
Dim results As Object, headers(), ws As Worksheet, i As Long
Const START_PAGE As Long = 1
Const END_PAGE As Long = 2
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Name", "Phone", "Website", "Address")
Application.ScreenUpdating = False
Set html = New HTMLDocument
Set html2 = New HTMLDocument
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
With CreateObject("MSXML2.XMLHTTP")
For page = START_PAGE To END_PAGE
.Open "GET", "https://www.yellowpages.com/atlanta-ga/attorneys?page=" & page, False
.send
html.body.innerHTML = .responseText
Set results = html.querySelectorAll(".organic .result")
Dim output(), r As Long
ReDim output(1 To results.Length, 1 To 4)
r = 1
For i = 0 To results.Length - 1
On Error Resume Next
html2.body.innerHTML = results.item(i).outerHTML
output(r, 1) = html2.querySelector(".business-name").innerText
output(r, 2) = html2.querySelector(".phone").innerText
output(r, 3) = html2.querySelector(".track-visit-website").href
output(r, 4) = html2.querySelector(".street-address").innerText & " " & html2.querySelector(".locality").innerText
On Error GoTo 0
r = r + 1
Next
ws.Cells(GetLastRow(ws, 1) + 1, 1).Resize(UBound(output, 1), UBound(output, 2)) = output
Next
End With
Application.ScreenUpdating = True
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Sample of output:
The info class is also used for the advertisements. You first need to go to the collection where the classname is "search-results organic" and in there find all the "info" classes.
This means that you need an extra collection variable:
Set HTML = IE.document
Set OrganicLinks = HTML.getElementsByClassName("search-results organic")
Set links = OrganicLinks.item(0).getElementsByClassName("info")
For getting the right website, you need to use another reference. It's better to get it by classname, since that one is more unique:
On Error Resume Next
.Range("B" & i).Value = htmlELe.getElementsByClassName("track-visit-website")(0).href
On Error GoTo 0

VBA HTML Listing Info Pull

I am looking to follow a series of URL's that are found in column A (example: https://www.ebay.com/itm/Apple-iPhone-7-GSM-Unlocked-Verizon-AT-T-TMobile-Sprint-32GB-128GB-256GB/352381131997?epid=225303158&hash=item520b8d5cdd:m:mWgYDe4a79NeLuAlV-RmAQA:rk:7:pf:0) and pull the following information from them:
- Title
- Price
- Description
I think there are multiple issues with my code... For one, I can't get the program to follow specific URL's listed in the Excel (only if I specify one within the code). Also, pulling multiple fields has given me issues.
Option Explicit
Public Sub ListingInfo()
Dim ie As New InternetExplorer, ws As Worksheet, t As Date
Dim i As Integer
i = 0
Do While Worksheets("Sheet1").Cells(i, 1).Value <> ""
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 Worksheets("Sheet1").Cells(i, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Dim Links As Object, i As Long, count As Long
t = Timer
Do
On Error Resume Next
Set Title = .document.querySelectorAll("it-ttl")
Set price = .document.querySelectorAll("notranslate")
Set Description = .document.querySelectorAll("ds_div")
count = Links.Length
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While count = 0
For i = 0 To Title.Length - 1
ws.Cells(i + 1, 1) = Title.item(i)
ws.Cells(i + 1, 2) = price.item(i)
ws.Cells(i + 1, 3) = Description.item(i)
Next
.Quit
i = i + 1
Loop
End With
End Sub
I would use late binding for MSXML2.XMLHTTP and set a reference to the Microsoft HTML Object Library for the HTMLDocument.
Note: querySelector() references the first item it finds that matches its search string.
Here is the short version:
Public Sub ListingInfo()
Dim cell As Range
With ThisWorkbook.Worksheets("Sheet1")
For Each cell In .Range("A1", .Cells(.Rows.Count, 1).End(xlUp))
Dim Document As MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", cell.Value, False
.send
Set Document = New MSHTML.HTMLDocument
Document.body.innerHTML = .responseText
End With
cell.Offset(0, 1).Value = Document.getElementByID("itemTitle").innerText
cell.Offset(0, 2).Value = Document.getElementByID("prcIsum").innerText
If Not Document.querySelector(".viSNotesCnt") Is Nothing Then
cell.Offset(0, 3).Value = Document.querySelector(".viSNotesCnt").innerText
Else
'Try Something Else
End If
Next
End With
End Sub
A more elaborate solution would be to break the code up into smaller routines and load the data into an Array. The main advantage of this is that you can test each subroutine separately.
Option Explicit
Public Type tListingInfo
Description As String
Price As Currency
Title As String
End Type
Public Sub ListingInfo()
Dim source As Range
Dim data As Variant
With ThisWorkbook.Worksheets("Sheet1")
Set source = .Range("A1:D1", .Cells(.Rows.count, 1).End(xlUp))
data = source.Value
End With
Dim r As Long
Dim record As tListingInfo
Dim url As String
For r = 1 To UBound(data)
record = getListingInfo()
url = data(r, 1)
record = getListingInfo(url)
With record
data(r, 2) = .Description
data(r, 3) = .Price
data(r, 4) = .Title
End With
Next
source.Value = data
End Sub
Public Function getListingInfo(url As String) As tListingInfo
Dim ListingInfo As tListingInfo
Dim Document As MSHTML.HTMLDocument
Set Document = getHTMLDocument(url)
With ListingInfo
.Description = Document.getElementByID("itemTitle").innerText
.Price = Split(Document.getElementByID("prcIsum").innerText)(1)
.Title = Document.querySelectorAll(".viSNotesCnt")(0).innerText
Debug.Print .Description, .Price, .Title
End With
End Function
Public Function getHTMLDocument(url As String) As MSHTML.HTMLDocument
Const READYSTATE_COMPLETE As Long = 4
Dim Document As MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
If .readyState = READYSTATE_COMPLETE And .Status = 200 Then
Set Document = New MSHTML.HTMLDocument
Document.body.innerHTML = .responseText
Set getHTMLDocument = Document
Else
MsgBox "URL: " & vbCrLf & "Ready state: " & .readyState & vbCrLf & "HTTP request status: " & .Status, vbInformation, "URL Not Responding"
End If
End With
End Function
There are a lot of things to fix in your code. It is late here so I will just give pointers (and update fully later) and working code below:
Declare all variables and use appropriate type
Review For Loops and how transpose can be used to create a 1d array of urls pulled from sheet to loop over
Review the difference between querySelector and querySelectorAll methods
Review CSS selectors (you are specifying everything as type selector when in fact you are not selecting by tag for the elements of interest; nor by your stated text)
Think about placement of your IE object creation and of your .Navigate2 to make use of existing object
Make sure to use distinct loop counters
Be sure not to overwrite values in sheet
Code:
Option Explicit
Public Sub ListingInfo()
Dim ie As New InternetExplorer, ws As Worksheet
Dim i As Long, urls(), rowCounter As Long
Dim title As Object, price As Object, description As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
urls = Application.Transpose(ws.Range("A1:A2").Value) '<= Adjust
With ie
.Visible = True
For i = LBound(urls) To UBound(urls)
If InStr(urls(i), "http") > 0 Then
rowCounter = rowCounter + 1
.Navigate2 urls(i)
While .Busy Or .readyState < 4: DoEvents: Wend
Set title = .document.querySelector(".it-ttl")
Set price = .document.querySelector("#prcIsum")
Set description = .document.querySelector("#viTabs_0_is")
ws.Cells(rowCounter, 3) = title.innerText
ws.Cells(rowCounter, 4) = price.innerText
ws.Cells(rowCounter, 5) = description.innerText
Set title = Nothing: Set price = Nothing: Set description = Nothing
End If
Next
.Quit
End With
End Sub
Here's an approach using Web Requests, using MSXML. It should be significantly faster than using IE, and I'd encourage you to strongly consider using this approach wherever possible.
You'll need references to Microsoft HTML Object Library and Microsoft XML v6.0 to get this working.
Option Explicit
Public Sub SubmitRequest()
Dim URLs As Excel.Range
Dim URL As Excel.Range
Dim LastRow As Long
Dim wb As Excel.Workbook: Set wb = ThisWorkbook
Dim ws As Excel.Worksheet: Set ws = wb.Worksheets(1)
Dim ListingDetail As Variant
Dim i As Long
Dim j As Long
Dim html As HTMLDocument
ReDim ListingDetail(0 To 2, 0 To 10000)
'Get URLs
With ws
LastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
Set URLs = .Range(.Cells(1, 1), .Cells(LastRow, 1))
End With
'Update the ListingDetail
For Each URL In URLs
Set html = getHTML(URL.Value2)
ListingDetail(0, i) = html.getElementByID("itemTitle").innertext 'Title
ListingDetail(1, i) = html.getElementByID("prcIsum").innertext 'Price
ListingDetail(2, i) = html.getElementsByClassName("viSNotesCnt")(0).innertext 'Seller Notes
i = i + 1
Next
'Resize array
ReDim Preserve ListingDetail(0 To 2, 0 To i - 1)
'Dump in Column T,U,V of existing sheet
ws.Range("T1:V" & i).Value = WorksheetFunction.Transpose(ListingDetail)
End Sub
Private Function getHTML(ByVal URL As String) As HTMLDocument
'Add a reference to Microsoft HTML Object Library
Set getHTML = New HTMLDocument
With New MSXML2.XMLHTTP60
.Open "GET", URL
.send
getHTML.body.innerHTML = .responseText
End With
End Function

auto increase the date in the middle of the URL in macros

I have made the macros script which retrieves the data from the URL. What I need is that, I need to increase the date one by one and get the data for each. the URL is like this :
https://www.ukdogracing.net/racecards/01-05-2017/monmore
Ia m able to get the data with this script :
Sub GetData()
Dim IE As Object
Dim doc As Object
Dim strURL As String
Dim I As Integer
For I = 1 To 5
strURL = "https://www.ukdogracing.net/racecards/01-05-2017/monmore" + Trim(Str(I))
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
Next I
End Sub
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Dim ThisLink As Object 'variable for <a> tags
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
I = Range("B" & Rows.Count).End(xlUp).Row 'last row with data
Do While Cells(I, 1).Value = "" 'will loop until first not blank found in column A (starting from last row of data, from end to start)
For Each ThisLink In doc.getElementsByTagName("a") 'we check all <a> tags
If ThisLink.innerText = Cells(I, 2).Value Then Cells(I, 1).Value = ThisLink.href 'If the innertext is the name of the race, in column A we add link
Next ThisLink
I = I - 1 'we decrease row position
Loop
End Sub
But I need the script takes the date part of the URL and add one day each time till today and get the data. for example :
https://www.ukdogracing.net/racecards/01-06-2017/monmore
https://www.ukdogracing.net/racecards/01-07-2017/monmore
etc... How can I make the script to get the data for each day adding one each time.
Thanks in advance.
Replace the first sub with this one and it will run for the specified dates. I couldn't see I having any purpose so i removed it.
Sub GetData()
Dim IE As Object, doc As Object
Dim strURL As String, myDate As Date
Set IE = CreateObject("InternetExplorer.Application")
With IE
For myDate = CDate("01-05-2017") To CDate("01-09-2017")
strURL = "https://www.ukdogracing.net/racecards/" & Format(myDate, "mm-dd-yyyy") & "/monmore" ' Trim(Str(I))
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
Next myDate
.Quit
End With
End Sub

Resources