Paste table from web into range of cells - excel

I am trying to get a table from a website into my excel sheet. Since the website has a log in and I need to click a few buttons to get to the table, I am using VBA.
The code I have so far is just a test, it is not the actual website that I am trying to log into.
So far, the code is able to launch the website and get the inner text from the table, but it only pastes it into a single cell. How can I paste the table by keeping the same formatting?
Sub test()
Set IE = CreateObject("InternetExplorer.application")
IE.Visible = True
IE.navigate ("https://www.w3schools.com/html/html_tables.asp")
Do
If IE.readyState = 4 Then
IE.Visible = True
Exit Do
Else
DoEvents
End If
Loop
'get data
Dim tbl As HTMLTable
Set tbl = IE.document.getElementById("customers")
Cells(1, 1) = tbl.innerText
End Sub

You may perform webscraping using the following code enhancement, it work perfectly :
Sub test()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.application")
IE.Visible = True
IE.navigate ("https://www.w3schools.com/html/html_tables.asp")
Do
If IE.readyState = 4 Then
IE.Visible = True
Exit Do
Else
DoEvents
End If
Loop
'get data
Dim tbl As HTMLTable
Dim class1 As IHTMLElement, rowText As IHTMLElement, item As IHTMLElement
Dim rowNum As Long, colNum As Long
Set class1 = IE.document.getElementById("customers").children(0)
rowNum = 0
For Each rowText In class1.children
rowNum = rowNum + 1
colNum = 0
For Each item In rowText.children
colNum = colNum + 1
Sheet1.Cells(rowNum, colNum).Value = item.innerText
Next
Next
End Sub

Related

Web Scraping Table into Excel Worksheet

Good afternoon,
First off....I know using ie for anything isn't great since its not being supported anymore.
I have figured out how to scrape a table but I need to have the table data to be placed in cell A5. I tried adding .range("A5") to parts of the code but haven't been able to get it to work. Please see code below:
Private Sub CommandButton3_Click()
'Clear the range before scraping
ActiveSheet.Range("A5:k5000").ClearContents
'Navigating to webpage
Dim ie As Object
Dim url As String
url = "https://www.myfueltanksolutions.com/validate.asp"
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate url
Do While ie.Busy: DoEvents: Loop
Do Until ie.readyState = 4: DoEvents: Loop
'Login credentails and submit
Dim idoc As MSHTML.HTMLDocument
Set idoc = ie.document
idoc.all.CompanyID.Value = "CompanyID"
idoc.all.UserId.Value = "UserID"
idoc.all.Password.Value = "Password"
idoc.parentWindow.execScript "submitForm();"
Do While ie.Busy: DoEvents: Loop
Do Until ie.readyState = 4: DoEvents: Loop
'Scrapging table
Dim tbl As HTMLTable
Set tbl = ie.document.getElementById("RecentInventorylistform")
Dim rowcounter As Integer
Dim colcounter As Integer
rowcounter = 1
colcounter = 1
Dim tr As HTMLTableRow
Dim td As HTMLTableCell
Dim th
Dim mySh As Worksheet
Set mySh = ThisWorkbook.Sheets("Sheet1")
For Each tr In tbl.getElementsByTagname("tr")
'Loop thru table cells
For Each td In tr.getElementsByTagname("td")
mySh.Cells(rowcounter, colcounter).Value = td.innerText
colcounter = colcounter + 1
Next td
colcounter = 1
rowcounter = rowcounter + 1
Next tr
'Log out and close website
ie.navigate ("https://www.myfueltanksolutions.com/signout.asp?action=rememberlogin")
ie.Quit
'Last updated and message box at completion
Range("N1") = Now()
MsgBox "Data Imported Successfully. Press Ok to Continue."
End Sub
Thank you so much for the help!
Do you want to start from cell A5? If so, you should change the value in mySh.Cells(rowcounter, colcounter).Value. Cell A5 is Cells(5, 1), so you should start from Cells(5, 1). You can try to change the code like this:
Dim rowcounter As Integer
Dim colcounter As Integer
rowcounter = 5
colcounter = 1

Scrape CDC vaccination data using VBA

I am trying to scrape the vaccination data from the below CDC website:
https://covid.cdc.gov/covid-data-tracker/#vaccinations
I have tried querySelectorAll but no luck. Can anyone help take a look? Much appreciated!
Sub useClassnames()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
.Visible = False
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementById("maincontent")
Debug.Print allRowOfData.innerHTML
'Set element = appIE.document.querySelectorAll(".container mt-5")
'For Each elements In element
' Debug.Print elements
'Next elements
'For Each element In allRowOfData
'Debug.Print element
'Next element
End Sub
Here you have, just change your worksheet name or number :)
Option Explicit
Const updatedCol = 1
Const dosesDistributedColVal = 2
Const peopleInicVaccColVal = 3
Sub useClassnames()
'declare worksheet variable and set headers
Dim targetWsh As Worksheet: Set targetWsh = ThisWorkbook.Sheets(1)
targetWsh.Cells(1, 1).Value = "Last Update"
targetWsh.Cells(1, 2).Value = "Doses Distributed"
targetWsh.Cells(1, 3).Value = "People Initiating Vaccination"
Dim lstRegisterRow As Long: lstRegisterRow = targetWsh.Range("A" & targetWsh.Rows.Count).End(xlUp).Row + 1
'open IE and navigate to site
Dim appIE As InternetExplorer: Set appIE = New InternetExplorer
appIE.navigate "https://covid.cdc.gov/covid-data-tracker/#vaccinations"
appIE.Visible = False
While appIE.Busy = True Or appIE.readyState < 4: DoEvents: Wend
Dim oHtmlDoc As HTMLDocument: Set oHtmlDoc = appIE.document
Dim oHtmlElementColl As IHTMLElementCollection
'Get and write last update date
Application.Wait (Now + TimeValue("0:00:02")) 'wait 2 secs to avoid error, if recieve error, add seconds as needed
Set oHtmlElementColl = oHtmlDoc.getElementsByTagName("small")
targetWsh.Cells(lstRegisterRow, updatedCol) = oHtmlElementColl(0).innerHTML
'Get and write Doses Distributed and People Initiating Vaccination
Set oHtmlElementColl = oHtmlDoc.GetElementsByClassName("card-number")
targetWsh.Cells(lstRegisterRow, dosesDistributedColVal) = oHtmlElementColl(0).innerText
targetWsh.Cells(lstRegisterRow, peopleInicVaccColVal) = oHtmlElementColl(1).innerText
appIE.Quit
End Sub

Transform InternetExplorer to XML

I just discovered xml or xmlhttp and this is entirely new to me.
I am trying to create a macro wherein it would go through all the list of websites in column J starting at row 2 (header at row 1). Get the information that I want from each website, then display them in column K, which is right next to the websites where the information was taken from.
Column J has a list of websites, starting at J2. Let's say it would go all the way down to J10. From each website, there is a certain information I want to get, so the macro will visit the website at J2, get that information and paste it in K2, then visit the website in J3, paste that information in K3, and so on. I already have an existing list of website at column J, which also happens to be dynamic.
This is the current code that I have using IE that I want to convert into xml/xmlhttp.
Sub CommandButton1_Click()
Dim ie As Object
Dim lastrow As Integer
Dim i As Integer
Dim myURL As String
Dim sdd As String
Dim add As Variant
Dim html As Object
Dim mylinks As Object
Dim mylink As Object
Dim result As String
' Create InternetExplorer Object
Set ie = CreateObject("InternetExplorer.Application")
lastrow = Sheet1.Cells(Rows.Count, "J").End(xlUp).Row
For i = 2 To lastrow
myURL = Sheet1.Cells(i, "J").Value
' Hide InternetExplorer
ie.Visible = False
' URL to get data from
ie.navigate myURL
' Loop until page fully loads
Do While ie.readystate <> READYSTATE_COMPLETE
Loop
' Information i want to get from the URLs
sdd = ie.document.getelementsbyclassname("timeline-text")(0).innerText
' Format the result
add = Split(sdd, "$")
Range("K3") = add(1)
' Close InternetExplorer
ie.Quit
'Return to Normal?
ie.Visible = True
End
Next
' Clean up
Set ie = Nothing
Application.StatusBar = ""
End Sub
I am trying to get the "85100", not the $85,100
<span class="font-size-base font-normal">Est.</span>
<span itemprop="price" content="85100">
$85,100
</span>
I'm hoping you could help me with this problem.
Thank you in advance.
I would structure something like as follows, where the IE object is created outside the loop. You use css selectors throughout. You may need a timed loop to ensure element is present on page. Use a proper page load wait as shown.
Use an explicit worksheet name to put the worksheet in a variable to work with.
You might want to add a test that myURL has http/https in it as you may have blank cells in range and only want to work with likely urls values.
Option Explicit
Public Sub CommandButton1_Click()
Dim ie As Object, lastrow As Long, i As Long
Dim myURL As String, sdd As String, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1") ' <change as required
Set ie = CreateObject("InternetExplorer.Application")
lastrow = ws.Cells(Rows.Count, "J").End(xlUp).Row
With ie
.Visible = False
For i = 2 To lastrow
myURL = ws.Cells(i, "J").Value
.navigate2 myURL
While .Busy Or .readyState < 4: DoEvents: Wend
sdd = .document.querySelector(".price").getAttribute("content")
ws.Cells(i, "K") = sdd
Next
.Quit
End With
'Application.StatusBar = ""
End Sub
With a timed loop:
Public Sub CommandButton1_Click()
Dim ie As Object, lastrow As Long, i As Long, t As Date, ele As Object
Const MAX_WAIT_SEC As Long = 10
Dim myURL As String, sdd As String, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1") ' <change as required
Set ie = CreateObject("InternetExplorer.Application")
lastrow = ws.Cells(rows.Count, "J").End(xlUp).Row
With ie
.Visible = False
For i = 2 To lastrow
myURL = ws.Cells(i, "J").Value
.Navigate2 myURL
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set ele = HTMLDoc.querySelector(".price")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If Not ele Is Nothing Then
sdd = ele.getAttribute("content")
ws.Cells(i, "K") = sdd
End If
Next
.Quit
End With
'Application.StatusBar = vbnullstring
End Sub

Run Through A List Then Paste Result To Its Next Cell

I was able to create a macro wherein it would go through all the list of websites in column J starting at row 2 (header at row 1). Get the information that I want from each website, then display them in column K, which is right next to the websites where the information was taken from.
The only problem is that, I cant make the macro go to each of the websites.
I dont know what is wrong with the code.
Sub CommandButton1_Click()
Dim ie As Object
Dim lastrow As Integer
Dim i As Integer
Dim myURL As String
Dim sdd As String
Dim add As Variant
Dim html As Object
Dim mylinks As Object
Dim mylink As Object
Dim result As String
' Create InternetExplorer Object
Set ie = CreateObject("InternetExplorer.Application")
lastrow = Sheet1.Cells(Rows.Count, "J").End(xlUp).Row
For i = 2 To lastrow
myURL = Sheet1.Cells(i, "J").Value
' Hide InternetExplorer
ie.Visible = False
' URL to get data from
ie.navigate myURL
' Loop until page fully loads
Do While ie.readystate <> READYSTATE_COMPLETE
Loop
' Information i want to get from the URLs
sdd = ie.document.getelementsbyclassname("timeline-text")(0).innerText
' Format the result
add = Split(sdd, "$")
Range("K3") = add(1)
' Close InternetExplorer
ie.Quit
'Return to Normal?
ie.Visible = True
End
Next
' Clean up
Set ie = Nothing
Application.StatusBar = ""
End Sub
$85,100 is the information I want to get from one of the URLs.
<span class="font-size-base font-normal">Est.</span>
<span itemprop="price" content="85100">$85,100</span>
Don't quit IE until you've completed the loop. Simply navigate to the next URL and record the new page's information on the same row as the URL was collected from.
Sub CommandButton1_Click()
Dim lastrow As Long, i As Long
Dim sdd As String, myURL As String, result As String
Dim add As Variant
Dim ie As Object, html As Object, mylinks As Object, mylink As Object
' Create InternetExplorer Object
Set ie = CreateObject("InternetExplorer.Application")
' Hide InternetExplorer
ie.Visible = False
lastrow = Sheet1.Cells(Rows.Count, "J").End(xlUp).Row
For i = 2 To lastrow
'collect next web page url
myURL = Sheet1.Cells(i, "J").Value
' URL to get data from
ie.navigate myURL
' Loop until page fully loads
Do While ie.readystate <> READYSTATE_COMPLETE
'allow other process through the message queue
DoEvents
Loop
' Information i want to get from the URLs
sdd = ie.document.getelementsbyclassname("timeline-text")(0).innerText
' Format the result
add = Split(sdd, "$")
Sheet1.Cells(i, "K") = add(1)
Next i
'Return to Normal?
ie.Visible = True
' Close InternetExplorer
ie.Quit
' Clean up
Set ie = Nothing
Application.StatusBar = ""
End Sub
I've also added DoEvents within your page load wait loop. This allows other processes to execute instead of tying up resources in the single threaded VBA process.
there is something you have to take note of while programming. Always try to echo a visible result.
Do this before the loop after the lastrow
msgbox lastrow
E.g in between the for loop, do something like
msgbox sdd
You should be able to get where the errors lie.
I hope this helps.

VBA: copy data from website into excel

I have a VBA code that selects info from drop-down menus on a government website and then submits the query. The requested data then opens up in another IE page. I am trying to copy this data into excel; however, I am unable to do so.
My code currently copies the text on the first IE page that contains the drop-down menus. The government website is: http://www.osfi-bsif.gc.ca/Eng/wt-ow/Pages/FINDAT.aspx
I have look all over the internet for a solution but nothing seems to work...
Here is my code:
Sub GetOsfiFinancialData()
Dim UrlAddress As String
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Dim ie As Object
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
End With
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
'Select Bank
ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = Z005
'open window with financial data
Dim objButton
Set objButton = ie.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
'select new pop-up window
marker = 0
Set objshell = CreateObject("Shell.Application")
IE_count = objshell.Windows.Count
For x = 0 To (IE_count - 1)
On Error Resume Next ' sometimes more web pages are counted than are open
my_title = objshell.Windows(x).document.Title
If my_title Like "Consolidated Monthly Balance Sheet" & "*" Then 'compare to find if the desired web page is already open
Set ie = objshell.Windows(x)
marker = 1
Exit For
Else
End If
Next
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
Application.Wait (Now() + TimeValue("00:00:05"))
Dim doc As MSHTML.HTMLDocument
Dim tables As MSHTML.IHTMLElementCollection
Dim table As MSHTML.HTMLTable
Dim clipboard As MSForms.DataObject
Set doc = ie.document
Set tables = doc.getElementsByTagName("body")
Set table = tables(0)
Set clipboard = New MSForms.DataObject
'paste in sheets
Dim test
Set test = ActiveWorkbook.Sheets("Test")
clipboard.SetText table.outerHTML
clipboard.PutInClipboard
test.Range("A1").PasteSpecial xlPasteAll
clipboard.Clear
MsgBox ("Task Completed")
End Sub
Your help is greatly appreciated!
You were using the current test with document.Title. I found that For Each of all windows looking for the full title worked in combination with copy pasting the pop-up window outerHTML. No additional wait time was required.
Inside the For Each Loop, after you reset the IE instance to the new window, you can obtain the new URL with ie.document.url. As you already have the data loaded you might as well just copy paste it straight away in my opinion.
Code:
Option Explicit
Public Sub GetOsfiFinancialData()
Dim UrlAddress As String, objButton, ie As Object
UrlAddress = "http://ws1.osfi-bsif.gc.ca/WebApps/FINDAT/DTIBanks.aspx?T=0&LANG=E"
Set ie = CreateObject("internetexplorer.application")
With ie
.Silent = True
.Visible = False
.navigate UrlAddress
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_institutionTypeCriteria_institutionsDropDownList").Value = "Z005"
Set objButton = .document.getElementById("DTIWebPartManager_gwpDTIBankControl1_DTIBankControl1_submitButton")
objButton.Focus
objButton.Click
Dim objShellWindows As New SHDocVw.ShellWindows, currentWindow As IWebBrowser2
For Each currentWindow In objShellWindows
If currentWindow.document.Title = "Consolidated Monthly Balance Sheet - Banks, Trust and Loan" Then
Set ie = currentWindow
Exit For
End If
Next
Dim clipboard As Object
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText ie.document.body.outerHTML
clipboard.PutInClipboard
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
.Quit
End With
End Sub
References (VBE > Tools > References):
Microsoft Internet Controls
I don't have time to get into all the stuff about controlling one browser from another, but I think you can figure that part out, especially since you made some great progress on this already. Get URL#2 from URL#1, like you are doing, but with some better data controls around it, and then do this...
Option Explicit
Sub Web_Table_Option_One()
Dim xml As Object
Dim html As Object
Dim objTable As Object
Dim result As String
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Set xml = CreateObject("MSXML2.XMLHTTP.6.0")
With xml
.Open "GET", "http://ws1.osfi-bsif.gc.ca/WebApps/Temp/2f40b7ef-d024-4eca-a8a3-fb82153efafaFinancialData.aspx", False
.send
End With
result = xml.responseText
Set html = CreateObject("htmlfile")
html.body.innerHTML = result
Set objTable = html.getElementsByTagName("Table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End Sub

Resources