Excel import data from java table - excel

I am trying to import data from a table generated by java script into Excel 2010 from this webpage: https://spotwx.com/products/grib_index.php?model=nam_awphys&lat=30.26678&lon=-97.76905&tz=America/Chicago&display=table
My code (which I stole from another post and have altered) as follows:
Sub SpotWx_NAM()
Dim xmlHttp As Object
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
xmlHttp.Open "GET", "https://spotwx.com/products/grib_index.php?model=nam_awphys&lat=55.81035&lon=-122.26822&tz=America/Dawson_Creek&display=table", False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim tbl As Object
Set tbl = html.getElementById("example")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
End Sub
Thanks in advance!

The data isnt stored in a traditional HTML table so you will have to do some data gymnastics. I would recommend leveraging regex to strip out the data you need and then parse the results.
The below code will get you part of the way there by presenting you each row in an array. I'll leave it to you to 'split' each resulting row accordingly to get each item by iterating through the rowsarray.
you will also need to trim the square brackets from the first and last array items.
Sub SpotWx_NAM()
Dim xmlHttp As Object
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
xmlHttp.Open "GET", "https://spotwx.com/products/grib_index.php?model=nam_awphys&lat=55.81035&lon=-122.26822&tz=America/Dawson_Creek&display=table", False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim rowsArr As Variant
rowsArr = Split(extractRows(xmlHttp.responseText), "],[", -1, 0)
For i = 0 To UBound(rowsArr)
Sheets(1).Range("A1").Resize(, UBound(rowsArr)).Offset(i) = Split(rowsArr(i), "','")
Next
End Sub
Function extractRows(ByVal text As String) As Variant
Dim matches As Object
Dim RE As Object
Set RE = CreateObject("vbscript.regexp")
Dim arr() As Variant
Dim result As Variant
RE.Pattern = "\['.*'\]"
RE.Global = True
RE.IgnoreCase = True
Set matches = RE.Execute(text)
extractRows = matches(0)
End Function
This will get your data out into sheet 1, you'll need to clean it up a little. removing a brace from the first & last cell and a ' from the end cells.

Related

praise data from an attribute

want to get value of "data-defaultasin" attribute ,in b,c,d columns and so on from these elements (screenshot given).
product url- https://www.amazon.in/dp/B06XTB2N7P
inspect ss product page worksheet
Sub praseasin()
Dim ASIN
Dim doc As HTMLDocument
Dim htmTable As HTMLTable
Dim wb As Workbook: Set wb = ThisWorkbook
Dim ws As Worksheet: Set ws = wb.Worksheets("Sheet1")
Dim img As HTMLDocument
Dim i, lastRow As Long
lastRow = ws.Range("A" & ws.Rows.Count).End(xlUp).Row
For i = 3 To lastRow
Set doc = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", ws.Cells(i, 1), True
.send
Do: DoEvents: Loop Until .readyState = 4
Dim resp
resp = .responseText
.abort
End With
' On Error Resume Next
doc.body.innerHTML = resp
Set ASIN = doc.getelementsbyclassname("swatchAvailable")
' On Error Resume Next
r.Offset(0, 1).Value = li.getAttribute("data-defaultasin")
Next
End Sub
Try the following to get the asins from the different colors and put them in different columns. It's a demo script in which I've used one link multiple times. Modify it to suit your need. Thanks.
Sub FetchAsin()
Dim Http As New XMLHTTP60
Dim Html As New HTMLDocument, I&, R&, C&
Dim linkList As Variant, link As Variant
linkList = Array( _
"https://www.amazon.in/dp/B06XTB2N7P", _
"https://www.amazon.in/dp/B06XTB2N7P", _
"https://www.amazon.in/dp/B06XTB2N7P" _
)
For Each link In linkList
With Http
.Open "GET", link, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("[class^='swatch'][data-defaultAsin]")
For I = 0 To .Length - 1
C = C + 1: Cells(R + 1, C) = .Item(I).getAttribute("data-defaultAsin")
Next I
C = 0: R = R + 1
End With
Next link
End Sub
Reference to add to the library:
Microsoft XML, v6.0

Export Web Text to excel vbs

Hi all i have been searching the web like mad and came up with the following vba code in excel the problem is it exports the table but not the text within the table it seems to only recover all text in the table that you can click on but not any set text.
If it will be easier for anyone to assist with their own code to do what i need it to help will be highly appreciated.
Sub my_Procedure()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
Dim http As Object, html As New HTMLDocument
Dim paras As Object, para As Object, i As Long
Set http = CreateObject("MSXML2.XMLHTTP")
Set http = CreateObject("MSXML2.XMLHTTP")
http.Open "GET", "https://www.plus500.co.uk/?id=113082&tags=g_sr%2B1711614451_cpi%2BUKSearchBrand_cp%2B70887725030_agi%2BPlus500Core_agn%2Bplus%20500_ks%2Bkwd-842162906_tid%2Be_mt%2Bc_de%2Bg_nt%2B_ext%2B1006989_loc%2BUURL&%D7%90&gclid=CjwKCAjw1cX0BRBmEiwAy9tKHqylty6Mz9TbIA5VzgOiqxOcWg7biR652Hg9tksIR97hlUuAHLZilhoCTq0QAvD_BwE", False
http.send
html.body.innerHTML = http.responseText
Set paras = html.getElementsByTagName("Tbody")
i = 1
For Each para In paras
ThisWorkbook.Worksheets("Sheet3").Cells(i, 1).Value = para.innerText
i = i + 1
Next
Dim Doc As HTMLDocument
'Replace the URL of the webpage that you want to download
Web_URL = VBA.Trim(Sheets(1).Cells(1, 1))
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
Dim tdd As String
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, False
.send
HTML_Content.body.innerHTML = http.responseText
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("tbody")
With HTML_Content.getElementsByTagName("tbody")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
MsgBox "Process Completed"
Call StartTimer
End Sub

Retrieving data ( Balance sheets not stocks) from yahoo finance with VBA

I try for few days to get Balance sheet data from yahoo finance with VBA instruction like ".getElementsByTagName" for companies but it's not working... nothing occuring, I am stuck...
Any ideas ? see code below :
Sub Get_Data()
Dim url As String
Dim http As Object
Dim TR_col As Object, Tr As Object
Dim TD_col As Object, Td As Object
Dim row As Long, col As Long
Dim html As Object
Dim tbl As Object
'
On Error Resume Next
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0") 'CreateObject("MSXML2.ServerXMLHTTP.6.0")
url = "https//finance.yahoo.com/quote/AAPL/balance-sheet?p=AAPL"
http.Open "POST", url, False
http.setRequestHeader "Content-Type", "text/xml"
http.Send
MsgBox http.responseText
Set html = CreateObject("htmlfile")
html.body.innerHTML = http.responseText
Set tbl = html.getElementById("Pos(r)")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each Tr In TR_col
Set TD_col = Tr.getElementsByTagName("TD")
For Each Td In TD_col
Cells(row, col) = Td.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
End Sub
I think this will do the trick. I had to add ':' to the URL, changed 'POST' to 'GET' and set 2 request headers, the first is a cookie, which I think will expire in a year's time, this was found out by running the following:
Cells(1, 1) = http.responseText
Dim responseHeader As String
responseHeader = http.getAllResponseHeaders
Cells(2, 1) = responseHeader
and reading the setcookie line. I then simply hardcoded that into the request header. Running code is as follows:
Sub Get_Data()
Dim url As String
Dim http As Object
Dim TR_col As Object, Tr As Object
Dim TD_col As Object, Td As Object
Dim row As Long, col As Long
Dim html As Object
Dim tbl As Object
'On Error Resume Next
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
'CreateObject("MSXML2.ServerXMLHTTP.6.0")
url = "https://uk.finance.yahoo.com/quote/AAPL/balance-sheet?p=aapl"
http.Open "GET", url, False
http.setRequestHeader "Cookie", "B=22guonpesgnqg&b=3&s=5p"
http.setRequestHeader "Content-Type", "text/xml"
http.Send
Cells(1, 1) = http.responseText
'Dim responseHeader As String
'responseHeader = http.getAllResponseHeaders
'Cells(2, 1) = responseHeader
Set html = CreateObject("htmlfile")
html.body.innerHTML = http.responseText
Set tbl = html.getElementById("Pos(r)")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each Tr In TR_col
Set TD_col = Tr.getElementsByTagName("TD")
For Each Td In TD_col
Cells(row, col) = Td.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
End Sub
Hopefully that is what you wanted, the returned data looks similar to what my browser shows.

Using getElementById VBA webscraping

The following macro works fine in extracting data from webpages in a range using getElementsByClassName but I need it changed to getElementsById since the class name is not unique. Any help here will be appreciated
Dim oHtml As HTMLDocument
Dim oElement As Object
Dim url As String
Set oHtml = New HTMLDocument
Application.ScreenUpdating = False
Sheets("ASIN").Range("A1:A100").ClearContents
url = Sheets("ASIN").Range("L2").Value
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
oHtml.body.innerHTML = .responseText
End With
Set oElement = oHtml.getElementsByClassName("a-color-price")
For i = 0 To oElement.Length - 1
Sheets("ASIN").Range("A" & (i + 1)) = oElement(i).innerText
Next i
Application.ScreenUpdating = True
Any help to use getElementById will be appreciated.
Webpage inspection screenshot attached
I'm not sure what URL you are point to, but I see some TR elements in the screen shot, as well as some TD elements. See the sample code below, and try to convert that to your specific use case.
Sub Dow_HistoricalData()
Dim xmlHttp As Object
Dim TR_col As Object, Tr As Object
Dim TD_col As Object, Td As Object
Dim row As Long, col As Long
ThisSheet = ActiveSheet.Name
Range("A2").Select
Do Until ActiveCell.Value = ""
Symbol = ActiveCell.Value
Sheets(ThisSheet).Select
Sheets.Add
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
myURL = "https://www.fxstreet.com/economic-calendar"
xmlHttp.Open "GET", myURL, False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim tbl As Object
Set tbl = html.getElementById("fxst-calendartable")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each Tr In TR_col
Set TD_col = Tr.getElementsByTagName("TD")
For Each Td In TD_col
Cells(row, col) = Td.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
Sheets(ActiveSheet.Name).Name = Symbol
Sheets(ThisSheet).Select
ActiveCell.Offset(1, 0).Select
Loop
End Sub

Failed to use VBA capturing the web table

I would like to capture the table showing here
But the number in the web table shows up to be "-" in my Excel file.
Here is the VBA scripts that I wrote for capturing the table in the middle:
Sub data()
Dim xmlHttp As Object
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
xmlHttp.Open "GET", "http://mis.twse.com.tw/stock/fibest.jsp?stock=29151&lang=zh_tw", False
xmlHttp.setRequestHeader "Content-Type", "text/html; charset=utf-8"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.responseText
Dim tbl As Object
Set tbl = html.getElementById("hor-minimalist-tb")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
End Sub
The table is slow to load. XHR is too fast. Swop to Selenium or Internet Explorer where you can take advantage of waits.
Here is an example of using IE to scrape the table including using a wait to ensure over the scheduled 5s refresh time. I have used the English version of site:
Image on web:
Output from code:
Code:
Option Explicit
Public Sub MakeSelectionGetData()
Dim ie As New InternetExplorer
Const url = "http://mis.twse.com.tw/stock/fibest.jsp?stock=2402&lang=en_us"
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate url
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 6)
Dim nTable As HTMLTable
Set nTable = .document.getElementById("hor-minimalist-tb")
Dim Headers()
Headers = Array("Best Bid Volume", "Best Bid Price", "Best Ask Price", "Best Ask Volume")
Dim TR As Object, TD As Object, r As Long, c As Long
With ActiveSheet
r = 2
c = 1
Dim TR_col As Object, TD_col As Object
Set TR_col = nTable.getElementsByTagName("TR")
.Range("A1").Resize(1, UBound(Headers) + 1) = Headers
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
For Each TD In TD_col
.Cells(r, c) = TD.innerText
c = c + 1
Next
c = 1
r = r + 1
Next
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub

Resources