Download xlsx file from password protected website - excel

I'm trying to download a xlsx file from a password protected website to use in PBI.
On PBI I already tried to use Power Query and the Web Connector. I also tried using Power Automate (online version with HTTP connector, since my desktop version doesn't run on background).
And finally I'm using VBA. But all of them returns a file with the website HTML code, instead of the data which should be in the xlsx.
The code from the last try with VBA (which I found here is bellow (with a generic website URL)):
Sub DownloadFile()
Dim evalURL As String
Dim streamObject As Object
Dim winHttpRequest As Object
Set winHttpRequest = CreateObject("Microsoft.XMLHTTP")
evalURL = "https://generic_website.com/Excel_file.xslx" '
winHttpRequest.Open "GET", evalURL, False, "username", "password"
winHttpRequest.send
If winHttpRequest.Status = 200 Then
Set streamObject = CreateObject("ADODB.Stream")
streamObject.Open
streamObject.Type = 1
streamObject.Write winHttpRequest.responseBody
streamObject.SaveToFile "C:\Users\MyUser\Downloads\Excel_file.xslx", 2 ' 1 = no overwrite, 2 = overwrite
streamObject.Close
End If
End Sub
If I log into the website and open the URL directly in a browser, it downloads the .xlsx file.
Is there any way to do that? I have no idea what's happening, since the same code worked to other people.
UPDATE:
I tried the VBA code bellow, and get the results you can see in the image here.
Sub Login()
Dim response As String
With CreateObject("Microsoft.XMLHTTP")
.Open "GET", "https://generic_website.com/Excel_file.xslx", False, "username", "password"
.send
response = .responseText
End With
MsgBox response
End Sub

I do not know if this works as I cant login, and dont know the file. hopefully it can point you in the right direction.
' VBA Editor->Tools->References
' find and select the following
' Microsoft WinHTTP Services,version 5.1
' Microsoft HTML Object Library
Sub GetFile()
Dim URL As String: URL = "your url"
Dim File As String: File = "your url/file.xslx"
Dim Email As String: Email = "your#address.com"
Dim Password As String: Password = "Your Password"
Dim Cookie As String
Dim Token As String
Dim Message As String
Dim HTML As HTMLDocument: Set HTML = New HTMLDocument
Dim HTTP As WinHttpRequest: Set HTTP = New WinHttpRequest
' you potentially need the csrf_token to post the login form.
' so we get the token, and any cookies sent
HTTP.Open "GET", URL, True
HTTP.Send
HTTP.WaitForResponse
HTML.body.innerHTML = HTTP.ResponseText
Cookie = HTTP.GetResponseHeader("Set-Cookie")
Token = HTML.getElementsByName("csrf_token")(0).Value
Message = "csrf_token=" & Token & "&email=" & URLEncode(Email) & "&senha=" & URLEncode(Password)
HTTP.Open "POST", URL, True
HTTP.SetRequestHeader "Content-type", "application/x-www-form-urlencoded"
HTTP.SetRequestHeader "Cookie", Cookie
HTTP.Send Message
HTTP.WaitForResponse
Cookie = HTTP.GetResponseHeader("Set-Cookie")
' i dont have credentials so dont know what happens after this point and cannot test any further
HTTP.Open "GET", File, True
HTTP.SetRequestHeader "Cookie", Cookie
HTTP.Send
HTTP.WaitForResponse
msgbox HTTP.responseText
' if the runtime error still occurs then not sure what to do
' however,
' if the above message box looks like HTML, it didnt work.
' if it doesn't, it MIGHT have worked, you just need to
' figure out how to save the data to a file
'Dim FileNumber As Integer: FileNumber = FreeFile()
'Open "C:\destination.xslx" For Binary Access Write As #FileNumber
'Put #FileNumber, 1, HTTP.ResponseBody
'Close #FileNumber
End Sub
'https://stackoverflow.com/a/218199/212869
Public Function URLEncode(StringVal As String) As String
Dim StringLen As Long: StringLen = Len(StringVal)
If StringLen > 0 Then
ReDim result(StringLen) As String
Dim i As Long, CharCode As Integer
Dim Char As String
For i = 1 To StringLen
Char = Mid$(StringVal, i, 1)
CharCode = Asc(Char)
Select Case CharCode
Case 97 To 122, 65 To 90, 48 To 57, 45, 46, 95, 126
result(i) = Char
Case 32
result(i) = "+"
Case 0 To 15
result(i) = "%0" & Hex(CharCode)
Case Else
result(i) = "%" & Hex(CharCode)
End Select
Next i
URLEncode = Join(result, "")
End If
End Function

Related

VBA Run Time Error: The requested Header Was Not Found

I am using this code to fetch historical data from Yahoo Finance. This was working fine, until I started receiving this error:
VBA Run Time Error:'2147012746 (80072f76)': The requested Header Was Not Found.
When I debug the error, it seems it s occuring on this line:
cookie = Split(.getResponseHeader("Set-Cookie"), ";")(0)
Any suggestion?
Sub getCookieCrumb(crumb As String, cookie As String, validCookieCrumb As Boolean)
Dim i As Integer
Dim str As String
Dim crumbStartPos As Long
Dim crumbEndPos As Long
Dim objRequest
validCookieCrumb = False
For i = 0 To 5 'ask for a valid crumb 5 times
Set objRequest = CreateObject("WinHttp.WinHttpRequest.5.1")
With objRequest
.Open "GET", "https://finance.yahoo.com/lookup?s=bananas", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send
.waitForResponse (10)
cookie = Split(.getResponseHeader("Set-Cookie"), ";")(0)
'crumbStartPos = InStr(1, .ResponseText, """CrumbStore"":{""crumb"":""",
vbBinaryCompare) + Len("""CrumbStore"":{""crumb"":""")
crumbStartPos = InStrRev(.ResponseText, """crumb"":""") + 9
crumbEndPos = crumbStartPos + 11 'InStr(crumbStartPos, .ResponseText, """",
vbBinaryCompare)
crumb = Mid(.ResponseText, crumbStartPos, crumbEndPos - crumbStartPos)
'Sheets("Parameters").Range("C30") = crumbStartPos
'Sheets("Parameters").Range("C31") = crumbEndPos
'Sheets("Parameters").Range("c32") = crumb
End With
If Len(crumb) = 11 Then 'a valid crumb is 11 characters long
validCookieCrumb = True
Exit For
End If:
'If i = 5 Then ' no valid crumb
' validCookieCrumb = False
'End If
Next i
End Sub
Eliminate the use of cookie and crumb entirely in your Yahoo! Finance data scrape request. So, don't call getCookieCrumb at all.
Then modify your call to call to getYahooFinanceData (or whatever you called your sub) to eliminate the cookie and crumb attribute.
Your tickerURL (or whatever you called it) will also drop the cookie and crumb from the end. Just use ticker, start and end period, interval, and event.

Image url extraction using excel vba

Iam working with excel 2016. I need to extract the link of an image from a website using VBA in excel.
Example, i have a website that shows a product with the link : https://www.hikvision.com/en/products/Turbo-HD-Products/Turbo-HD-Cameras/Value-Series/ds-2ce56d0t-vpir3f/
My image is into a div , like that :
<div class="slide-image" style="background-image: url('/content/dam/hikvision/products/HIKVISION/Turbo_HD_Products/Turbo_HD_Cameras/Value_Series/D0T_Series/DS-2CE56D0T-VPIR3F/images/2CE56D0T-半球11-正视图.png.thumb.1280.1280.png');"></div>
I tried this :
Private Sub btnExtractURL_Click()
Dim sourceString As String
Dim rowIdx As Integer, rowMax As Integer
Dim posFirst As Integer, posLast As Integer, chrLength As Integer
rowMax = Range("A3").End(xlDown).Row
' ---
For rowIdx = 3 To rowMax
If Cells(rowIdx, 1).Value <> "" Then
Cells(rowIdx, 2).Value = ""
sourceString = Cells(rowIdx, 1).Value
posFirst = InStr(sourceString, "http")
posLast = InStr(posFirst, sourceString, """")
chrLength = (posLast - 1) - (posFirst - 1)
Cells(rowIdx, 2).Value = Mid(sourceString, posFirst, chrLength)
End If
Next
' ---
MsgBox "finished"
End Sub
But i have an error with this solution... I tried to extract the text to see another method, and it work's but when i insert the class of that image, it doesn't work !
Sub Get_Web_Data()
Dim request As Object
Dim response As String
Dim html As New HTMLDocument
Dim website As String
Dim price As Variant
' Website to go to.
website = "https://www.hikvision.com/en/products/Turbo-HD-Products/Turbo-HD-Cameras/Value-Series/ds-2ce56d0t-vpir3f/"
' Create the object that will make the webpage request.
Set request = CreateObject("MSXML2.XMLHTTP")
' Where to go and how to go there - probably don't need to change this.
request.Open "GET", website, False
' Get fresh data.
request.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
' Send the request for the webpage.
request.send
' Get the webpage response data into a variable.
response = StrConv(request.responseBody, vbUnicode)
' Put the webpage into an html object to make data references easier.
html.body.innerHTML = response
' Get the price from the specified element on the page.
Name = html.getElementsByClassName("prod_name").Item(0).innerText
' Output the price into a message box.
MsgBox Name
End Sub
Can you give an idea to extract this image and copy the link into my excel ?

URL Issue retrieving data quotes in Yahoo finance

The URL from Yahoo is not working when I try to retrieve quotes from a particular stock. There are several discussion about it, However, it seems nothing is shown regarding VBA macro
Sub Get_Data()
Dim URL As String
Dim Ticker As String
Dim http As New WinHttpRequest
Dim sCotes As String
Dim Lignes
Dim Valeurs
Dim i As Long
Dim j As Long
Dim sLigne As String
Dim sValeur As String
Ticker = Range("Ticker")
URL = "https://query1.finance.yahoo.com/v7/finance/download/TECK?period1=1540456339&period2=1571992339&interval=1d&events=history&crumb=kjOZLFv6ch2"
http.Send
sCotes = http.ResponseText
MsgBox sCotes
Lignes = Split(sCotes, Chr(10))
For i = 1 To UBound(Lignes) 'until the end of the Lignes variable
sLigne = Lignes(i)
Valeurs = Split(sLigne, ",")
For j = 0 To UBound(Valeurs) - 1
Select Case j
Case 0
sValeur = DateSerial(CLng(Left(Valeurs(0), 4)), CLng(Mid(Valeurs(0), 6, 2)), CLng(Right(Valeurs(0), 2)))
Case 5
sValeur = CLng(Valeurs(5))
Case Else
sValeur = CDbl(Replace(Valeurs(j), ".", ","))
End Select
Range("A1").Offset(i, j) = sValeur
Application.StatusBar = Format(Cells(i, 1), "Short Date")
Next
Next
Application.StatusBar = False
End Sub
Execution error at the step Http.send : "This method cannot be called until the Open method has been called"
You would need to use the "open" method before attempting to send and GET is perfectly fine. However, a few things....
There is an easier way. The headers worth adding are the User-Agent and one to mitigate being served cached results. The following shows you how to get a json response from the server for a specified time period and write to Excel. Note: You need to concatenate the ticker into the url. You should probably also test the response code from server to ensure successful.
I use jsonconverter.bas as the json parser to handle response. Download raw code from here and add to standard module called JsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
The values for startDate and endDate need to be passed as unix timestamps. #TimWilliams wrote a nice function, toUnix, for converting Date to Unix here which I use. I have added my own function to manage the conversion in the opposite direction.
This method avoids using any session based identifiers so avoids your issue with invalid cookie crumb.
VBA:
Option Explicit
Public Sub GetYahooHistoricData()
Dim ticker As String, ws As Worksheet, url As String, s As String
Dim startDate As Long, endDate As Long
Set ws = ThisWorkbook.Worksheets("Sheet1")
ticker = ws.Range("ticker") 'Range A1. Above write out range
endDate = toUnix("2019-10-27")
startDate = toUnix("2018-10-25")
url = "https://query1.finance.yahoo.com/v8/finance/chart/" & ticker & "?region=US&lang=en-US&includePrePost=false&interval=1d&period1=" & startDate & "&period2=" & endDate & "&corsDomain=finance.yahoo.com&.tsrc=finance"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
s = .responseText
End With
Dim json As Object
Set json = JsonConverter.ParseJson(s)("chart")("result")
Dim dates As Object, results(), rows As Object, adjClose As Object, r As Long, headers()
headers = Array("date", "close", "volume", "open", "high", "low", "adjclose")
Set dates = json(1)("timestamp")
ReDim results(1 To dates.Count, 1 To UBound(headers) + 1)
Set rows = json(1)("indicators")("quote")(1)
Set adjClose = json(1)("indicators")("adjclose")(1)("adjclose")
For r = 1 To dates.Count
results(r, 1) = GetDate(dates(r))
results(r, 2) = rows("close")(r)
results(r, 3) = rows("volume")(r)
results(r, 4) = rows("open")(r)
results(r, 5) = rows("high")(r)
results(r, 6) = rows("low")(r)
results(r, 7) = adjClose(r)
Next
With ws
.Cells(3, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(4, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetDate(ByVal t As Variant) As String
GetDate = Format$(t / 86400 + DateValue("1970-01-01"), "yyyy-mm-dd")
End Function
Public Function toUnix(ByVal dt As Variant) As Long
toUnix = DateDiff("s", "1/1/1970", dt)
End Function
Example top 10 rows:
Try replacing this code
URL = "https://query1.finance.yahoo.com/v7/finance/download/TECK?period1=1540456339&period2=1571992339&interval=1d&events=history&crumb=kjOZLFv6ch2"
http.Send
with this code:
set http = Server.Createobject("MSXML2.ServerXMLHTTP.6.0")
URL = "https://query1.finance.yahoo.com/v7/finance/download/TECK?period1=1540456339&period2=1571992339&interval=1d&events=history&crumb=kjOZLFv6ch2"
http.open "POST", URL, False
http.Send
The error is pretty clear: you need to call the open method before the Send method. Also this would be a POST request. You also may need to put these two lines after the open method:
http.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
http.setRequestHeader "Content-Length", 0
The question is about 99% duplicate as the one from here - How can I send an HTTP POST request to a server from Excel using VBA?. Anyway, the mistake is obviously, because the .Send() method simply sends a completely empty Dim http As New WinHttpRequest object.
To make the code work, copy the example from the duplcated question and print the http.ResponseText:
Sub TestMe()
Dim http As Object
Dim url As String
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
url = "https://query1.finance.yahoo.com/v7/finance/download/TECK?period1=1540456339&period2=1571992339&interval=1d&events=history&crumb=kjOZLFv6ch2"
http.Open "POST", url, False
http.Send
MsgBox http.responsetext
End Sub

how to get inner text of html under id?

I am trying to pull data pull inner text under id in excel cell.
This is for XML code.
Sub getelementbyid()
Dim XMLpage As New MSXML2.XMLHTTP60
Dim hdoc As New MSHTML.HTMLDocument
Dim HBEs As MSHTML.IHTMLElementCollection
Dim HBE As MSHTML.IHTMLElement
Dim ha As String
XMLpage.Open "GET","https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=HAL", False
XMLpage.send
hdoc.body.innerHTML = XMLpage.responseText
ha = hdoc.getelementbyid("open").innerText
Range("K11").Value = ha
Debug.Print ha
End Sub
I expect output value, but it shows --.
Examine the response text. There is a difference in the way the page is rendered in the browser versus what is returned in the ResponseText.
I put the URL into a browser went into dev tools (F12), found the element, and noted the numeric value inside the HTML element.
Then I dumped the response text we're getting in VBA into a cell and copied the entire cell value into Notepad++. If you do that you'll see the initial value inside the #open element is indeed "--".
The real value appears to be getting written into the HTML via JavaScript, which is common practice. There is a JSON object at the top of the page, presumably injected into the document from the back-end of the website upon your request.
So you have to parse the JSON, not the HTML. I've provided code doing just that. Now, there may be a better way to do it, I feel this code is kind of "hacky" but it's getting the job done for your example URL.
Sub getelementbyid()
Dim XMLpage As New MSXML2.XMLHTTP60
Dim hdoc As New MSHTML.HTMLDocument
Dim HBEs As MSHTML.IHTMLElementCollection
Dim HBE As MSHTML.IHTMLElement
Dim ha As String
XMLpage.Open "GET", "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=HAL", False
XMLpage.send
'// sample: ,"open":"681.05",
Dim token As String
token = """open"":"""
Dim startPosition As Integer
startPosition = InStr(1, XMLpage.responseText, token)
Dim endPosition As Integer
endPosition = InStr(startPosition, XMLpage.responseText, ",")
Dim prop As String
prop = Mid(XMLpage.responseText, startPosition, endPosition - startPosition)
prop = Replace(prop, """", vbNullString)
prop = Replace(prop, "open:", vbNullString)
Dim val As Double
val = CDbl(prop)
ha = val
Range("K11").Value = ha
Debug.Print ha
End Sub
Here are two methods. 1) Using regex on the return text. Usually frowned upon but perfectly serviceable here. 2) Traditional extract json string and use json parser to parse out value.
The data you want is stored in a json string found both on the webpage and the xmlhtttp response, under the same element:
This means you can treat the html as a string and target just the pattern for the open price using regex as shown below, or parse the xmlhttp request into an html parser, grab the required element, extract its innerText and trim off the whitespace, then pass to a json parser to extract the open price.
In both methods you want to avoid being served cached results so the following header is an important addition to attempt to mitigate for this:
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
There is no need for addtional cell formatting. Full value comes out for both your tickers.
Regex:
It is present in a json string in the response. You can regex it out easily from return text.
Regex explanation:
VBA:
Option Explicit
Public Sub GetClosePrice()
Dim ws As Worksheet, re As Object, p As String, r As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
p = """open"":""(.*?)"""
Set re = CreateObject("VBScript.RegExp")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=HAL", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
If .Status = 200 Then
r = GetValue(re, .responseText, p)
Else
r = "Failed connection"
End If
End With
ws.Range("K11").Value = r
End Sub
Public Function GetValue(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As String
With re
.Global = True
.pattern = pattern
If .test(inputString) Then ' returns True if the regex pattern can be matched agaist the provided string
GetValue = .Execute(inputString)(0).submatches(0)
Else
GetValue = "Not found"
End If
End With
End Function
HTML and json parser:
This requires installing code for jsonparser from jsonconverter.bas in a standard module called JsonConverter and then going VBE>Tools>References>Add a reference to Microsoft Scripting Runtime and Microsoft HTML Object Library.
VBA:
Option Explicit
Public Sub GetClosePrice()
Dim ws As Worksheet, re As Object, r As String, json As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=MRF", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
If .Status = 200 Then
Dim html As HTMLDocument
Set html = New HTMLDocument
html.body.innerHTML = .responseText
Set json = JsonConverter.ParseJson(Trim$(html.querySelector("#responseDiv").innerText))
r = json("data")(1)("open")
Else
r = "Failed connection"
End If
End With
ws.Range("K11").Value = r
End Sub

Podio items to excel sheet

I need to have a vba macro that downloads data from my Podio app to an excel sheet. Right now I am running a code I found on Podio community website, which I am pasting below:
Dim winHttpReq As Object
Dim access_token As String
Function ProjectsFromPodio()
Dim result As String
Dim postData As String
Dim myURL As String
Dim token_pos As Long
'Initialisierung
Set winHttpReq = CreateObject("WinHttp.WinHttpRequest.5.1")
myURL = "https://api.podio.com/oauth/token"
postData = "grant_type=app&app_id=ABC&app_token=DEF&client_id=GHI&client_secret=JKL"
If winHttpReq.Open("POST", myURL, False) = S_OK Then
'MsgBox ("Open successfull")
End If
winHttpReq.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded"
winHttpReq.Send (postData)
result = winHttpReq.responseText
token_pos = InStr(result, "access_token")
access_token = Mid(result, token_pos + 15)
token_pos = InStr(access_token, Chr(34)) - 1
access_token = Mid(access_token, 1, token_pos)
OAuthAppAuthorization = result
'MsgBox (OAuthAppAuthorization)
Set winHttpReq = CreateObject("WinHttp.WinHttpRequest.5.1")
myURL = "https://api.podio.com/item/app/XYZ"
winHttpReq.Open "GET", myURL, False
winHttpReq.SetRequestHeader "Authorization", "OAuth2 " & access_token
winHttpReq.Send
result = winHttpReq.responseText
'MsgBox (result)
End Function
This works nice, data is downloaded and I can use it. My question is how to download this data in CSV format, rather than JSON? Is it possible?
If it is not possible, how can I parse it to a sheet in a smart way?
Thanks for any help
You may send the POST request to /item/app/{app_id}/export/{exporter}which creates a batch for exporting the items.
Here you can export the items as xls and xlsx formats.
Your myURL must be like,
myURL = "https://api.podio.com/item/app/XYZ/export/xls"
You will get a batch_id as a result of this request.
Then make a GET call to /batch/{batch_id} to get the status of the batch. You can download the output xls file by making a GET request to /file/{file_id} using the podio_file_id from the batch response.
References
Podio - Export Items
Podio - Get Batch
Podio - Get File

Resources