Concatenate referenced URL into XML HTTP Request - excel

The following snippet of code sends a XML request to the following site
Sub GetContents()
Dim XMLReq As New MSXML2.XMLHTTP60
XMLReq.Open "Get", "https://echa.europa.eu/brief-profile/-/briefprofile/100.028.723", False
XMLReq.send
End Sub
I have another Sub routine GetURL() which prints out the desired URL in this case: https://echa.europa.eu/brief-profile/-/briefprofile/100.028.723
How can I essentially concatenate the output of GetURL() into the BstrUrl? i.e.
XMLReq.Open "Get", "x", False where x is the output of GetURL()
Despite various attempts the syntax is not accepted as a URL.

Assuming you are combining from your earlier question then you need to ensure you write a function which returns the url (as Tim Williams has pointed out). I would expand upon this, in that I think you would need to consider adding a test to ensure both the request succeeded, there were results, and to pass the searchKeyWord as an argument to make your function more reusable. Along the same lines, you could pass the xmlhttp object into the function, so as to avoid continually creating and destroying them.
Avoid auto-instantiation, as you can get unexpected results, and Hungarian style notation. Personally, I also avoid those type characters, as they are harder to read.
vbNullString will offer faster assignment than = "".
I would also use a shorter, faster, and more reliable css pattern to retrieve the url, based on classes and a parent child relationship of two elements.
Public Sub GetContents()
Dim searchKeyWord As String, xmlReq As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument, url As String
searchKeyWord = "Acetone"
Set xmlReq = New MSXML2.XMLHTTP60
url = GetUrl(searchKeyWord, xmlReq)
Set html = New MSHTML.HTMLDocument
If url <> "N/A" Then
With xmlReq
.Open "GET", url, False
.send
If .Status = 200 Then
html.body.innerHTML = .responseText
Debug.Print html.querySelector("title").innerText
End If
End With
End If
End Sub
Public Function GetUrl(ByVal searchKeyWord As String, ByVal http As MSXML2.XMLHTTP60) As String
Const url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
Dim html As MSHTML.HTMLDocument, dict As Object, i As Long, r As Long
Dim dictKey As Variant, payload$, ws As Worksheet
Set html = New MSHTML.HTMLDocument
Set dict = CreateObject("Scripting.Dictionary")
Set ws = ThisWorkbook.Worksheets("Sheet1")
dict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
dict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
dict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyWord
dict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
dict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = vbNullString
For Each dictKey In dict
payload = IIf(Len(dictKey) = 0, WorksheetFunction.EncodeURL(dictKey) & "=" & WorksheetFunction.EncodeURL(dict(dictKey)), _
payload & "&" & WorksheetFunction.EncodeURL(dictKey) & "=" & WorksheetFunction.EncodeURL(dict(dictKey)))
Next dictKey
With http
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
If .Status = 200 Then
html.body.innerHTML = .responseText
Else
GetUrl = "N/A"
Exit Function
End If
End With
Dim result As Boolean
result = html.querySelectorAll(".lfr-search-container .substanceNameLink").Length > 0
GetUrl = IIf(result, html.querySelector(".lfr-search-container .substanceNameLink").href, "N/A")
End Function

If GetURL is a function returning a string then this should work:
Sub GetContents()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim url
url = GetURL()
XMLReq.Open "Get", url, False
XMLReq.send
End Sub

Related

(XMLHTTP60) no longer returning required data

Help gratefully received on this one. I have some VBA running in Excel that inspects a series of webpages displaying betting odds for football matches and puts the odds into my spreadsheet. It has been working perfectly for months and has stopped working in the last few weeks. Here is a simplified version of the code I'm using:
Sub TestImport()
Dim http As New MSXML2.XMLHTTP60
Dim html As New MSHTML.HTMLDocument
Dim htmlEle1 As MSHTML.IHTMLElement
Dim columncounter As Integer
Dim rowccounter As Integer
Dim targetwebpage As String
Dim ColumnHeader As Variant
On Error GoTo ErrorStop
trowNum = 1
targetwebpage = "https://www.oddschecker.com/football/english/premier-league"
With http
.Open "get", targetwebpage, False
.send
End With
Set table_data = html.getElementsByTagName("tr")
If table_data.Length = 0 Then GoTo SkipLeague
For Each trow In table_data
For Each tcell In trow.Children
If tcell.innerText <> "TIP" Then 'Ignore this
tcellNum = tcellNum + 1
Cells(trowNum, tcellNum) = tcell.innerText
End If
Next tcell
Cells(trowNum, 1) = Worksheets("Leagues").Cells(j, 1)
trowNum = trowNum + 1
tcellNum = 1
Next trow
SkipLeague:
ErrorStop:
End Sub
No data gets returned because [table_data] is always null. It's always null because there are no tags in my variable, [html]. Instead, [html] seems to be simply this:
"<HEAD></HEAD>
<BODY>
<P> </P></BODY>"
Why would [html] return this value when the actual webpage (https://www.oddschecker.com/football/english/premier-league) is much more complex when displayed in my browser? And why has this problem only started in the last few weeks?
I'd be grateful for any help on this.
I did a quick test and had no issue. Some page, like Google require the User-Agent to be sent, but not the oddschecker page.
Sub TestURL()
Debug.Print GetResult("https://www.oddschecker.com/football/english/premier-league")
End Sub
Function GetResult(url As String) As String
Dim XMLHTTP As Object, ret As String
Set XMLHTTP = CreateObject("MSXML2.ServerXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "Cache-Control", "no-cache"
XMLHTTP.setRequestHeader "Pragma", "no-cache"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
ret = XMLHTTP.responseText
GetResult = ret
End Function

Connecting output of module to another and looping this logic for a list of values

I am trying to connect 2 modules in vba such that the output of the first module (geturl) feeds into the other (getdata).
Get Url to look up the dossier URL online for substances entered in column A e.g. Acetone or alternatively the CAS number in column B can be used (see image below). Note: currently only looks up for substance info in A1 or B1.
Public Function GetUrl() As String
Const Url = "https://echa.europa.eu/information-on-chemicals/registered-substances?p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&p_p_state=normal&p_p_mode=view&_dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
SubstanceName = Cells(1, 1)
CASNumber = Cells(1, 2)
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = vbNullString
For Each DictKey In MyDict
payload = IIf(Len(DictKey) = 0, WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)), _
payload & "&" & WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)))
Next DictKey
With oHttp
.Open "POST", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
GetUrl = oHtml.querySelector(".details").getAttribute("href")
Debug.Print oHtml.querySelector(".substanceNameLink ").innerText
Debug.Print oHtml.querySelector(".details").getAttribute("href")
End Function
If run this should return
Acetone https://echa.europa.eu/registration-dossier/-/registered-dossier/15460
Get Data uses the Url from geturl to return "DNEL" values:
Sub GetData()
'Start ECHA Search via XML HTTP Request
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim Route(1 To 3) As String
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", GetUrl & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
'Loops through each element
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
Debug.Print Info.innerText
Set Info = HTMLDoc.getElementById(Route(c)).NextSibling.NextSibling.NextSibling
Set Data = Info.getElementsByTagName("dd")(0)
Debug.Print Data.innerText
Set Data = Info.getElementsByTagName("dd")(1)
Debug.Print Data.innerText
'Cells(r, c + 2) = Data.innerText
Next c
End Sub
For Acetone in Cell(1,1) This should Return:
Acetone
https://echa.europa.eu/registration-dossier/-/registered-dossier/15460
General Population - Hazard via inhalation route
DNEL (Derived No Effect Level)
200 mg/m³
General Population - Hazard via dermal route
DNEL (Derived No Effect Level)
62 mg/kg bw/day
General Population - Hazard via oral route
DNEL (Derived No Effect Level)
62 mg/kg bw/day
Instead of just relying on Cell A1 however, I wish to have the entire code loop for each cell with a substance in columnA/ColumnB. So in this case the URL for Acetone is found and the corresponding data is then pulled then the same occurs for Oxydipropanol.
Note in this image Substances can be looked up online using either the substance name, CAS number in columnB, or a combination of both.
Trying to connect the two modules, zo far I have only been able to get the geturl module to cycle through for each substance. I have also tried to combine both into 1 module but cant figure out how to correctly nest the for loops.
A quick google search states that you cant nest functions in vba. This makes me wonder if what I'm doing is even the right way to approach this. But I've seen similar things achieved In the past so I'm sure it's possible.
Note: If testing please use the example substances for testing. Using a random chemical say Benzene may result in an error as the tox profile for this substance doesn't exist. I still need to implement handling errors but this can be ignored for now.
I Will update you here with any further progress made, Thanks.
This worked for me:
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A1:E1") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(3).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function

How to pull image from Amazon UK

I am trying to download the hi-res pictures from Amazon.co.uk. I tried the code given in the
[thread][1] and I am getting some issues.
the code by #QHarr works well for given Amazon.in website but when I try for Amazon.co.uk the
.querySelector("#landingImage").getAttribute("data-old-hires") returns nothing. here is the code I am testing.
Public Sub GetInfo()
Dim Html As HTMLDocument, results()
Set Html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.amazon.co.uk/dp/product/B01GFJWHZ0", True
.send
Html.body.innerHTML = .responseText
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 2) = Html.querySelector("#productTitle").innerText
.Cells(1, 2).Offset(0, 1) = Html.querySelector("#landingImage").getAttribute("data-old-hires")
End With
End With
End Sub
any idea what exactly I am doing wrong?
My guess is the response html is mangled. You can easily regex out that bit from the response string however.
Note that the async argument is also set to False to allow time for data to load. I'm somewhat surprised your code got as far as you said it did. It should also have tripped over on the fact there is no match for that selector in the HTMLDocument.
Public Sub GetInfo()
'tools > references > Microsoft HTML Object Library
Dim re As Object, html As MSHTML.HTMLDocument, xhr As Object, s As String
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
Set html = New MSHTML.HTMLDocument
re.Pattern = """hiRes"":""(.*?)"""
With xhr
.Open "GET", "https://www.amazon.co.uk/dp/product/B01GFJWHZ0", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
html.body.innerHTML = s
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 2) = html.querySelector("#productTitle").innerText
.Cells(1, 3) = re.Execute(s)(0).SubMatches(0)
End With
End Sub
Regex:

How to replace characters in a string in VBA?

I want to change a URL to make it into the API compatible form.
If I have the string: "https://MYSERVER.com/browse/BIT-1234?jql=projectXXXXXX"
I want to change it into: "https://MYSERVER.com/rest/api/latest/search?jql=projectXXXXXX"
I use an InputBox to get the URL and I want to change it to that form and put it back into the script as shown below. How do I this?
Dim response As String
With CreateObject("Microsoft.XMLHTTP")
myURL= Application.InputBox("Enter the URL")
//CHANGE THE URL SOMEHOW
.Open "GET", myURL(changed URL goes here), False, **USERNAME**, **PASSWORD**
.send
response = .responseText
End With
Dim response As String, arr
myURL= Application.InputBox("Enter the URL")
arr = Split(myUrl, "?")
If ubound(arr) = 1 Then
With CreateObject("Microsoft.XMLHTTP")
myUrl = "https://MYSERVER.com/rest/api/latest/search?" & arr(1)
.Open "GET", myURL, False, **USERNAME**, **PASSWORD**
.send
response = .responseText
End With
Else
Msgbox "URL has no querystring!"
End If

Google Translate via VBA setting for accept-encoding

I'm trying to use the google translate API via VBA (works in python so far), and I found that the only difference between the python request and the VBA one is on the header "accept-encoding", the python one uses the "application/gzip" and works, but the VBA one is automatically changed to "gzip,deflate" even if I change it through code. Here is the code:
Function Test_GoogleTranslate()
Dim strTranslate As String
Dim objRequest As Object
Dim strUrl As String
Dim blnAsync As Boolean
Dim strResponse As String
Dim strWeather As String
Dim payload As String
Set objRequest = CreateObject("MSXML2.XMLHTTP")
payload = "target=es&q=something&source=en"
strTranslate = "https://google-translate1.p.rapidapi.com/language/translate/v2"
strTranslate = strTranslate & "?" & payload
With objRequest
.Open "POST", strTranslate, True
.setRequestHeader "host", "google-translate1.p.rapidapi.com"
.setRequestHeader "x-forwarded-port", "443"
.setRequestHeader "x-forwarded-proto", "https"
.setRequestHeader "connection", "keep-alive"
.setRequestHeader "content-type", "application/x-www-form-urlencoded"
.setRequestHeader "accept-encoding", "application/gzip"
.setRequestHeader "x-rapidapi-host", "google-translate1.p.rapidapi.com"
.setRequestHeader "x-rapidapi-key", "856e8ba78dmsh443766612c5a923p14f661jsn72323e803261"
.Send
While objRequest.readyState <> 4
DoEvents
Wend
strResponse = .ResponseText
End With
MsgBox (strResponse)
End Function
When I changed the accept-encoding to "gzip, deflate" in python it crashed, so I figured that might be the problem.
Any help is greatly appreciated
I had some success with Google Translate and Excel VBA using the MSXML2.ServerXMLHTTP object. I note you are using MSXML2.XMLHTTP. The solution appears to work well only setting a User-Agent request header so I did not delve into accept-encoding etc.
The differences between MSXML2.ServerXMLHTTP and MSXML2.XMLHTTP are touched upon in this question which might be useful for you.
Working code using MSXML2.ServerXMLHTTP:
Option Explicit
Sub Test()
Debug.Print Translate("Hello", "en", "fr", True) ' french
Debug.Print Translate("Hello", "en", "de", True) ' german
Debug.Print Translate("Hello", "en", "pt", True) ' portuguese
Debug.Print Translate("Hello", "en", "ru", False) ' russian - use romanised alphabet
Debug.Print Translate("Hello", "en", "ru", True) ' russian - use cyrillic
' ThisWorkbook.Sheets(1).Range("A1").Value = Translate("Hello", "en", "ru", True)
Debug.Print Translate("Hello", "en", "zh-CN", False) ' chinese simplified - use romanised alphabet
Debug.Print Translate("Hello", "en", "zh-CN", True) ' chinese simplified - use chinese script
' ThisWorkbook.Sheets(1).Range("B1").Value = Translate("Hello", "en", "zh-CN", True)
End Sub
Public Function Translate(strInput As String, strFromLanguageCode As String, strToLanguageCode As String, blnTargetAlphabet As Boolean) As String
Dim strURL As String
Dim objHTTP As Object
Dim objHTML As Object
Dim objDivs As Object, objDiv
Dim strTranslatedT0 As String
Dim strTranslatedO1 As String
' send query to web page
strURL = "https://translate.google.com/m?hl=" & strFromLanguageCode & _
"&sl=" & strFromLanguageCode & _
"&tl=" & strToLanguageCode & _
"&ie=UTF-8&prev=_m&q=" & strInput
Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
objHTTP.Open "GET", strURL, False
objHTTP.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
objHTTP.send ""
' create a html document
Set objHTML = CreateObject("htmlfile")
With objHTML
.Open
.Write objHTTP.responseText
.Close
End With
' o1 has Anglicised translation, t0 as tranlsation in target language
Set objDivs = objHTML.getElementsByTagName("div")
For Each objDiv In objDivs
If objDiv.className = "o1" Then
strTranslatedO1 = objDiv.innerText
End If
If objDiv.className = "t0" Then
strTranslatedT0 = objDiv.innerText
End If
Next objDiv
' choose which to return
If blnTargetAlphabet Then
Translate = strTranslatedT0
Else
Translate = strTranslatedO1
End If
CleanUp:
Set objHTML = Nothing
Set objHTTP = Nothing
End Function
Result:
Bonjour
Hallo
Olá
Privet
??????
Ni hao
??
The VBA immediate window doesn't print Cyrillic or Chinese characters but you can see this feature working by outputing to a cell:
December 2020 update
Looks like this method will no longer work going back maybe to mid November.
Looking at the response
the div class names have changed to something more obscure
there's some esoteric c-wiz elements doing something wonderful...
also, I suspect that some client side script is calling for the actual translation after the document is retrieved
Options: Selenium, Microsoft Translate, free and paid tiers for Google translation APIs ;)

Resources