Export Web Text to excel vbs - excel

Hi all i have been searching the web like mad and came up with the following vba code in excel the problem is it exports the table but not the text within the table it seems to only recover all text in the table that you can click on but not any set text.
If it will be easier for anyone to assist with their own code to do what i need it to help will be highly appreciated.
Sub my_Procedure()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
Dim http As Object, html As New HTMLDocument
Dim paras As Object, para As Object, i As Long
Set http = CreateObject("MSXML2.XMLHTTP")
Set http = CreateObject("MSXML2.XMLHTTP")
http.Open "GET", "https://www.plus500.co.uk/?id=113082&tags=g_sr%2B1711614451_cpi%2BUKSearchBrand_cp%2B70887725030_agi%2BPlus500Core_agn%2Bplus%20500_ks%2Bkwd-842162906_tid%2Be_mt%2Bc_de%2Bg_nt%2B_ext%2B1006989_loc%2BUURL&%D7%90&gclid=CjwKCAjw1cX0BRBmEiwAy9tKHqylty6Mz9TbIA5VzgOiqxOcWg7biR652Hg9tksIR97hlUuAHLZilhoCTq0QAvD_BwE", False
http.send
html.body.innerHTML = http.responseText
Set paras = html.getElementsByTagName("Tbody")
i = 1
For Each para In paras
ThisWorkbook.Worksheets("Sheet3").Cells(i, 1).Value = para.innerText
i = i + 1
Next
Dim Doc As HTMLDocument
'Replace the URL of the webpage that you want to download
Web_URL = VBA.Trim(Sheets(1).Cells(1, 1))
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
Dim tdd As String
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, False
.send
HTML_Content.body.innerHTML = http.responseText
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("tbody")
With HTML_Content.getElementsByTagName("tbody")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
MsgBox "Process Completed"
Call StartTimer
End Sub

Related

VBA Macro- How to scrape these tables from website

I am trying to fetch all web tables but I am not able to fetch them. I have tried same program for other website and its working but for this particular website its not working at all. I have used other api url as well but its not fetching data properly
Sub Export_HTML_Table_To_Excel()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
'Replace the URL of the webpage that you want to download
Web_URL = "https://www.indiaratings.co.in/pressrelease/60901"
'Web_Url="https://www.indiaratings.co.in/pressReleases/GetPressreleaseData pressReleaseId=60901&uniqueIdentifier=122.186.172.34-20230209"
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, False
.send
HTML_Content.body.innerHTML = .responseText
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("table")
With HTML_Content.getElementsByTagName("table")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Worksheets("ABC").Cells(iRow, iCol).Select
Worksheets("ABC").Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
MsgBox "Process Completed"
End Sub
Kindly guide me for how to fetch this data properly

Scraping from VBA with queryselector

I have used code from this website to pull data from site:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, Html As New HTMLDocument
Dim prices As Object, info As Object
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://bazashifer.ru/proflist-profnastil", False
.send
sResponse = .responseText
End With
With Html
.body.innerHTML = sResponse
Set info = .querySelectorAll("div.views-field.views-field-title")
Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
End With
With Worksheets(2)
For i = 0 To info.Length - 1
.Cells(i + 1, 1) = info(i).innerText
.Cells(i + 1, 2) = prices(i).innerText
Next i
End With
Application.ScreenUpdating = True
End Sub
The code above works just as intended. I implemented code to take multiply links ( link 1, link 2, link 3 ) :
Option Explicit
Public Sub GetInfoAll()
Dim wsSheet As Worksheet, Rows As Long, http As New XMLHTTP60, Html As New HTMLDocument, links As Variant, link As Variant
Dim prices As Object, info As Object, i As Long, sResponse As String
Set wsSheet = Sheets(1)
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).row
links = wsSheet.Range("A1:A" & Rows)
With http
For Each link In links
.Open "GET", link, False
.send
sResponse = .responseText
With Html
.body.innerHTML = sResponse
Set info = .querySelectorAll("div.views-field.views-field-title")
Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
End With
With Worksheets(2)
For i = 0 To info.Length - 1
.Cells(i + 1, 1) = info(i).innerText
.Cells(i + 1, 2) = prices(i).innerText
Next i
End With
Next link
End With
End Sub
The above code works and should pull data into columns, but for the next link the code re-writes the data.
Any help would be great. Thanks
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).row
You need to have something like this during your output loop on Sheet 2 because you cant hard code the number of results.
Edit here's what I really meant about the output
Public Sub GetInfoAll()
Dim wsSheet As Worksheet, Rows As Long, http As New XMLHTTP60, Html As New HTMLDocument, links As Variant, link As Variant
Dim prices As Object, info As Object, i As Long, sResponse As String, offsetRows As Long
Dim wb As Workbook
Set wb = Application.Workbooks("Book1")
Set wsSheet = wb.Sheets(1)
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).Row
links = wsSheet.Range("A1:A" & Rows)
With http
For Each link In links
.Open "GET", link, False
.send
sResponse = .responseText
With Html
.body.innerHTML = sResponse
Set info = .querySelectorAll("div.views-field.views-field-title")
Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
End With
With wb.Worksheets(2)
For i = 0 To info.Length - 1
offsetRows = 0
offsetRows = wb.Worksheets(2).Cells(wb.Worksheets(2).Rows.Count, "A").End(xlUp).Row + 1
.Cells(offsetRows, 1) = info(i).innerText
.Cells(offsetRows, 2) = prices(i).innerText
Next i
End With
Next link
End With
End Sub
I think it is ideal to make use of container and then loop through it to parse the desired content. Consider the following an example. You can always append the rest to suit your need.
Public Sub GetInfo()
Dim Html As New HTMLDocument, Htmldoc As New HTMLDocument
Dim Wb As Workbook, ws As Worksheet, R&, I&
Dim link As Variant, linklist As Variant
Set Wb = ThisWorkbook
Set ws = Wb.Worksheets("output")
linklist = Array( _
"https://bazashifer.ru/armatura-stekloplastikovaya", _
"https://bazashifer.ru/truby-0", _
"https://bazashifer.ru/setka-stekloplastikovaya" _
)
For Each link In linklist
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", link, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll(".view-content > .views-row")
For I = 0 To .Length - 1
Htmldoc.body.innerHTML = .item(I).outerHTML
R = R + 1: ws.Cells(R, 1) = Htmldoc.querySelector(".views-field-title a").innerText
ws.Cells(R, 2) = Htmldoc.querySelector("[class*='cena-tovara'] > .field-content").innerText
Next I
End With
Next link
End Sub
I think the problem is that your columns aren't updated for each link.
For i = 0 To info.Length - 1
.Cells(i + 1, 1) = info(i).innerText
.Cells(i + 1, 2) = prices(i).innerText
Next i
In this part you write everything to the first and second column. This should be updated everytime you move to a new link.
So maybe add a 'colcount' variable which updates just before you move to the next link?
something like this:
Infocol = 1
Pricecol = 2
For Each link In links
....
.Cells(i + 1, Infocol) = info(i).innerText
.Cells(i + 1, Priceol) = prices(i).innerText
....
Infocol = infocol + 2
Pricecol = Pricecol + 2
Next link
You go +2 so you don't overwrite your price column with your new info.

Using getElementById VBA webscraping

The following macro works fine in extracting data from webpages in a range using getElementsByClassName but I need it changed to getElementsById since the class name is not unique. Any help here will be appreciated
Dim oHtml As HTMLDocument
Dim oElement As Object
Dim url As String
Set oHtml = New HTMLDocument
Application.ScreenUpdating = False
Sheets("ASIN").Range("A1:A100").ClearContents
url = Sheets("ASIN").Range("L2").Value
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
oHtml.body.innerHTML = .responseText
End With
Set oElement = oHtml.getElementsByClassName("a-color-price")
For i = 0 To oElement.Length - 1
Sheets("ASIN").Range("A" & (i + 1)) = oElement(i).innerText
Next i
Application.ScreenUpdating = True
Any help to use getElementById will be appreciated.
Webpage inspection screenshot attached
I'm not sure what URL you are point to, but I see some TR elements in the screen shot, as well as some TD elements. See the sample code below, and try to convert that to your specific use case.
Sub Dow_HistoricalData()
Dim xmlHttp As Object
Dim TR_col As Object, Tr As Object
Dim TD_col As Object, Td As Object
Dim row As Long, col As Long
ThisSheet = ActiveSheet.Name
Range("A2").Select
Do Until ActiveCell.Value = ""
Symbol = ActiveCell.Value
Sheets(ThisSheet).Select
Sheets.Add
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
myURL = "https://www.fxstreet.com/economic-calendar"
xmlHttp.Open "GET", myURL, False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim tbl As Object
Set tbl = html.getElementById("fxst-calendartable")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each Tr In TR_col
Set TD_col = Tr.getElementsByTagName("TD")
For Each Td In TD_col
Cells(row, col) = Td.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
Sheets(ActiveSheet.Name).Name = Symbol
Sheets(ThisSheet).Select
ActiveCell.Offset(1, 0).Select
Loop
End Sub

VBA copy html table data to excel worksheet

I need a VBA script that can extract local html table data to an Excel worksheet. I have some code (found it somewhere on the web) that works by using a URL link, but what I want is to be able to do it using my locally stored html file. The error is I get is 'app defined or object defined error'.
Sub HTML_Table_To_Excel()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
'Replace the URL of the webpage that you want to download
Web_URL = "http://espn.go.com/nba/"
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, False
.send
HTML_Content.body.innerHTML = .responseText 'this is the highlighted part for the error
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("table")
With HTML_Content.getElementsByTagName("table")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
MsgBox "Process Completed"
End Sub
I had the same problem and to solve it I used the original code of the question, but instead of downloading the html, I opened the html as a text file and the result was passed to the object HTML_Content.body.innerHtml the rest of the code is same.
Sub HTML_Table_To_Excel()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
Dim file as String
'Replace the file path with your own
file = "c:\your_File.html"
'Determine the next file number available for use by the FileOpen function
TextFile = FreeFile
'Open the text file
Open file For Input As TextFile
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
HTML_Content.body.innerHtml = Input(LOF(TextFile), TextFile)
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("table")
With HTML_Content.getElementsByTagName("table")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
MsgBox "Process Completed"
End Sub
Not sure if i've followed the conventions, but i somehow managed to get an HTML table exported to excel successfully. Here's my vb script. Any optimizations/corrections are welcome! Thanks.
Sub Export()
rowsLength =document.all.yourHTMLTableId.rows.length
cellLength= (document.all.yourHTMLTableId.Cells.length/rowsLength) 'Because i dont know how to get no.of cells in a row,so used a simple division
Set crr = CreateObject("WScript.Shell")
fileNm= "Export"
dir= crr.CurrentDirectory&"\"&fileNm&".xlsx"
Set objExcel = CreateObject("Excel.Application")
Set objWorkbook = objExcel.Workbooks.Add()
Set objWorksheet= objWorkbook.Worksheets(1)
i=0
j=0
do until i=rowsLength
do until j=cellLength
objWorksheet.cells(i+1,j+1).value = document.all.yourHTMLTableId.rows(i).cells(j).innerHTML
msgbox document.all.yourHTMLTableId.rows(i).cells(j).innerHTML
j= j+1
Loop
j=0
i=i+1
Loop
objWorkbook.SaveAs(dir)
objWorkbook.close
objExcel.Quit
Set objExcel = Nothing
End Sub

Failed to use VBA capturing the web table

I would like to capture the table showing here
But the number in the web table shows up to be "-" in my Excel file.
Here is the VBA scripts that I wrote for capturing the table in the middle:
Sub data()
Dim xmlHttp As Object
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
xmlHttp.Open "GET", "http://mis.twse.com.tw/stock/fibest.jsp?stock=29151&lang=zh_tw", False
xmlHttp.setRequestHeader "Content-Type", "text/html; charset=utf-8"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim tbl As Object
Set tbl = html.getElementById("hor-minimalist-tb")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
End Sub
The table is slow to load. XHR is too fast. Swop to Selenium or Internet Explorer where you can take advantage of waits.
Here is an example of using IE to scrape the table including using a wait to ensure over the scheduled 5s refresh time. I have used the English version of site:
Image on web:
Output from code:
Code:
Option Explicit
Public Sub MakeSelectionGetData()
Dim ie As New InternetExplorer
Const url = "http://mis.twse.com.tw/stock/fibest.jsp?stock=2402&lang=en_us"
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate url
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 6)
Dim nTable As HTMLTable
Set nTable = .document.getElementById("hor-minimalist-tb")
Dim Headers()
Headers = Array("Best Bid Volume", "Best Bid Price", "Best Ask Price", "Best Ask Volume")
Dim TR As Object, TD As Object, r As Long, c As Long
With ActiveSheet
r = 2
c = 1
Dim TR_col As Object, TD_col As Object
Set TR_col = nTable.getElementsByTagName("TR")
.Range("A1").Resize(1, UBound(Headers) + 1) = Headers
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
For Each TD In TD_col
.Cells(r, c) = TD.innerText
c = c + 1
Next
c = 1
r = r + 1
Next
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub

Resources