Website to excel data fetching - excel

Can anyone please help in fetching data from website to excel using VBA, i need property address in excel cell:
Option Explicit
Sub test()
Dim ie As New InternetExplorer
ie.navigate "xyz.org"
ie.Visible = True
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
ie.document.all("input-search-field").Value = Range("b1").Value
Dim obj As Object
For Each obj In ie.document.getElementsByTagName("BUTTON")
If obj.className = "btn btn-default search-button" Then
obj.Click
Exit For
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim isPropertyAddr As Boolean
isPropertyAddr = ie.document.getElementById("LoanObject").ie.document.getelementbytagname("SPAN")
isPropertyAddr = True
Debug.Print ie.document.getattributes("classname"), ie.document.getattributes("tagname")
MsgBox obj.innerText
dom code

Try something like this
IsAddressFound = False
For Each obj In ie.document.getElementById("LoanObject").getElementsByTagname("SPAN")
If InStr(obj.Innertext, "Property Address") > 0 Then
IsAddressFound = True
End If
If IsAddressFound Then
Debug.Print obj.Innertext 'Your address populates here
IsAddressFound = False
End If
Next
Thanks

Related

Scrape data from multiple url in excel and save it in different row

getting url form sheet1 range a1:a1000 and after exporting saving data too sheet2
ie.navigate Workbooks("1000").Worksheets("sheet1").Range("A6").Value
Set ht = ie.document
Workbooks("1000").Worksheets("sheet2").Range("C6").Value = ht.getElementById("formNo").Value
Workbooks("1000").Worksheets("sheet2").Range("D6").Value = ht.getElementById("fullName").Value
Workbooks("1000").Worksheets("sheet2").Range("E6").Value = ht.getElementById("idNo").Value
I do not what exactly is that you want. But I am providing a code snippet with which you can achieve your task by simply modifying it.
Sub Scrape()
Dim url As String
Dim IE As New InternetExplorer
Dim doc As HTMLDocument
Set IE = New InternetExplorer
IE.Visible = False
For Each cell In Range("A1:A1000")
url = cell.Value
IE.navigate url
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set doc = IE.document
Sheet2.Range("A1").Value = doc.getElementsByTagName("span")(0).innerText
Sheet2.Range("A2").Value = doc.getElementsByTagName("span")(1).innerText
Sheet2.Range("A3").Value = doc.getElementsByTagName("span")(2).innerText
Sheet2.Range("A4").Value = doc.getElementsByTagName("span")(3).innerText
Next cell
IE.Quit
End Sub

How can I loop the script 1000 times to get the data in different cells and get URL value from different cells?

How can I loop the script 1000 times to get the data in different cells and get URL value from different cells?
Private Sub CommandButton1_Click()
Dim ie As InternetExplorer
Dim ht As HTMLDocument
Set ie = New InternetExplorer
ie.Visible = True
ie.navigate Workbooks("FINAL-1").Worksheets("sheet1").Range("A2").Value
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
Set ht = ie.document
Workbooks("FINAL-1").Worksheets("sheet1").Range("C").Value = ht.getElementById("addr1").Value
'Workbooks("FINAL-1").Worksheets("sheet1").Range("C2").Value = ht.getElementById("fullName").Value
'Workbooks("FINAL-1").Worksheets("sheet1").Range("d2").Value = ht.getElementById("addr1").Value
Workbooks("FINAL-1").Worksheets("sheet1").Range("D").Value = ht.getElementById("dob").Value
Workbooks("FINAL-1").Worksheets("sheet1").Range("E").Value = ht.getElementById("applyDte").Value
Workbooks("FINAL-1").Worksheets("sheet1").Range("B").Value = ht.getElementById("idNo").Value
'Workbooks("FINAL-1").Worksheets("sheet1").Range("G2").Value = ht.getElementById("genderMtdtId").Value
End Sub
Private Sub Worksheet_SelectionChange(ByVal Target As Range)
End Sub
Set ie = New InternetExplorer
ie.Visible = True
ie.navigate Workbooks("FINAL-1").Worksheets("sheet1").Range("A2").Value
Do While ie.readyState <> READYSTATE_COMPLETE
Loop

How to login a website using VBA and retrieve data to excel?

There are plenty of examples of how to login into a website, like gmail
But for some reason my code only opens the webpage without logging into the target page, using the provided username and password
I've enabled the HTML object library and Microsoft Internet Controls
Any thoughts , as I am fairly new to this object library?
Plus, how do I grab a table to import into excel from a website after I log in?
I tried macro recording but it needs a password to login into the website
An example would be helpful :) Thanks so much!
Dim HTMLDoc As HTMLDocument
Dim MyBrowser As InternetExplorer
Sub MyGmail()
Dim MyHTML_Element As IHTMLElement
Dim MyURL As String
On Error GoTo Err_Clear
MyURL = "https://www.google.com/accounts/Login"
Set MyBrowser = New InternetExplorer
MyBrowser.Silent = True
MyBrowser.Navigate MyURL
MyBrowser.Visible = True
Do
Loop Until MyBrowser.ReadyState = READYSTATE_COMPLETE
Set HTMLDoc = MyBrowser.Document
HTMLDoc.all.Email.Value = "nasterpizza87#gmail.com"
HTMLDoc.all.passwd.Value = "************" 'my password would be here
For Each MyHTML_Element In HTMLDoc.getElementsByTagName(“input”)
If MyHTML_Element.Type = “submit” Then _
MyHTML_Element.Click: Exit For
Next
Err_Clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
End Sub
Sub LoginToSite()
Const cURL = "https://singlepoint.usbank.com/cs70_banking/logon/sbuser"
Const ccompanyID = "YourCustomerIDHere"
Const cj_username = "YourUserIDHere"
Const cj_password = "YourPasswordHere"
Dim IE As InternetExplorer
Dim doc As HTMLDocument
Dim LoginForm As HTMLFormElement
Dim CustomerIDInputBox As HTMLInputElement
Dim UserIDInputBox As HTMLInputElement
Dim PasswordInputBox As HTMLInputElement
Dim objElement As Object
Dim objCollection As Object
Set IE = New InternetExplorer
IE.Visible = True
IE.navigate cURL
'Wait for initial page to load
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy: DoEvents: Loop
Set doc = IE.document
'Get the only form on the page
Set LoginForm = doc.forms(0)
'Get the CustomerID textbox and populate it
'input name="companyID" id="companyID" size="18" value="" type="text"
Set CustomerIDInputBox = doc.getElementById("companyID")
UsernameInputBox.Value = ccompanyID
'Get the UserID textbox and populate it
'input name="j_username" id="j_username" size="18" type="text"
Set UserIDInputBox = doc.getElementById("j_username")
UsernameInputBox.Value = cj_username
'Get the Password textbox and populate it
'input name="j_password" id="j_password" size="18" type="password"
Set PasswordInputBox = doc.getElementById("j_password")
PasswordInputBox.Value = cj_password
'Get the form input button and click it
'input name="submit_logon" size="18" type="image"
Set ElementCol = IE.document.getElementsByName("submit_logon")
ElementCol.Item(0).Click
'Wait for the new page to load
Do While IE.readyState <> READYSTATE_COMPLETE Or IE.Busy: DoEvents: Loop
End Sub
If it's specifically Gmail that you're trying to login to then the below code will work. I use it.
Sub Gmail()
'Set a reference (Visual Basic Editor) > Tools > References) to the following libraries:
' a) Microsoft Internet Controls
' b) Microsoft HTML Object Library
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLdoc As New MSHTML.HTMLDocument
Dim HTMLelement As MSHTML.IHTMLElement
With IE
.Visible = True
.Silent = True 'avoid any pop-up
.navigate "https://www.google.com/accounts/Login"""
Do While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Loop
End With
Call myTimer
Set HTMLdoc = IE.document
HTMLdoc.all.identifier.Value = "YourUsernameHere"
HTMLdoc.all.identifierNext.Click
With IE
Do While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Loop
End With
Call myTimer
For Each HTMLelement In HTMLdoc.getElementsByName("password")
If HTMLelement.getAttribute("type") = "password" Then
HTMLelement.Value = "YourPasswordHere"
Exit For
End If
Next HTMLelement
HTMLdoc.all.passwordNext.Click
Set IE = Nothing
Set HTMLdoc = Nothing
Set HTMLelement = Nothing
End Sub
Private Sub myTimer()
Dim timerStart As Single
Const pauseTIME As Integer = 1 'second
timerStart = Timer
Do Until Timer - timerStart > pauseTIME
DoEvents
Loop
End Sub

IE.Document.getElementsByName("name").Value = "Value" not working

When I try and do this, it throws a number of errors at me depending on other bits of my code, such as the method is not associated with that object or an unspecified error.
I am trying to go down a list of Excel cells with strings and use a web form to search for those strings.
My code currently looks like this:
Sub YSF_automation()
Application.ScreenUpdating = False
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate "URL"
While IE.Busy
DoEvents
Wend
IE.Document.getElementsByName("ElementName")(0).Value = "test"
End Sub
Try the below
Option Explicit
Dim IE as Object
Sub YSF_automation()
Application.ScreenUpdating = False
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate "URL"
While IE.Busy
DoEvents
Wend
set document = IE.document
with document
.getElementsByName("ElementName")(0).value="test"
end with
End Sub

Excel VBA code reads wrong innerHTML code

I am absolutely new in webscraping and have some minor previous VBA knowledge.
I am trying to make a scraper which enters a site makes a search and then scrapes the details of the search.
I am very annoyed that my scraper can make the search with the given parameters, but after the search is made and the website is loaded, I make a innerHTML read request within VBA and the results are NOT the source code of the new page. So I cannot extract any information because my VBA code does not see the actual webpage html data. Why is that happening? What is the source code that my VBA extracts?
Thank you very much for your help in advance!
Public Sub my_scraper()
Dim my_data1, my_data2 As String
Dim my_Coll As String
i = 1
my_data1 = ActiveSheet.Cells(1, 1).Value
my_data2 = ActiveSheet.Cells(1, 2).Value
my_Coll = profession_hu_scraper(my_data1, my_data2)
Cells(2, 2).Value = my_Coll
End Sub
Public Function profession_hu_scraper(ByVal my_data1 As String, ByVal my_data2 As String) As String
Dim objIE As InternetExplorer
Dim html As HTMLDocument
Dim Link As Object
Dim ElementCol As Object
Dim erow As Long
'Dim all_inp_el As Object
'Application.ScreenUpdating = False
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Visible = True
.Navigate "https://www.profession.hu/"
Do While .ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading website..."
DoEvents
Loop
Set html = .Document
Range("A16") = html.DocumentElement.innerHTML
.Document.getElementById("header_keyword").Value = my_data1
.Document.getElementById("header_location").Value = my_data2
Set my_classes = .Document.getElementsByClassName("p2_button_inner")
For Each my_class In my_classes
If my_class.getAttribute("value") = "Keresés" Then
Range("c4") = "Clicked"
my_class.Click
i = i + 1
End If
Next my_class
Do While .ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading website..."
DoEvents
Loop
Set html = .Document
Range("B16") = html.DocumentElement.innerHTML
End With
Set objIE = Nothing
Application.StatusBar = "Finished"
'Application.StatusBar = ""
End Function
After a few days of struggling I finally was able to find out that the code works OK. The problem was that the max character of a cell is 32k so it could not show the whole html code.
If you are a beginner watch out for it!
Updated:
Public Function profession_Hu_Scraper(myData1 As String, my_data2 As String)
Dim ie As New InternetExplorer
Dim doc As HTMLDocument
Dim ws As Worksheet: Set ws = ActiveSheet
ie.navigate "https://www.profession.hu/"
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
Set doc = ie.document
ws.Range("A16") = doc.getElementById(myData1).innerText
ws.Activate("B16") = doc.getElementById(mydata2).innerText
'whatever else you wish to do
End Function

Resources