I have been trying to scrape the web data using EXCEL VBA. Below code paste the date from excel to wen then initiated the submit button to go to the result page. which looks like this:
I want to copy and paste the first and the second line into excel like this:
if any individual gets first dose then put details of first dose, and return empty for the second dose if its empty. If both dose are not available then return empty for both.
I am not able to develop this last thing and struggling since couple of hours to achieve this (copy data and paste into excel)
it would required a ID number and date to submit for the result that i can provide in comments. I have been using following code to accomplish this your help will be much appreciated.
Option Explicit
Sub Newfunction()
Const Url As String = ""
Dim LogData As Worksheet
Set LogData = ThisWorkbook.Worksheets("Sheet1")
Dim IdNumber As String
Dim openDate As Date
IdNumber = LogData.Cells(3, "A").Value
openDate = LogData.Cells(3, "B").Value
Set LogData = Nothing
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Navigate Url
Do While .Busy Or .ReadyState <> 4
DoEvents
Loop
.Visible = True
Dim ieDoc As Object
Set ieDoc = .Document
End With
'Enter the CNIC
Dim IDdata As Object
Set IDdata = ieDoc.getElementById("checkEligibilityForm:cnic")
If Not IDdata Is Nothing Then IDdata.Value = IdNumber
Set IDdata = Nothing
'Enter Date
Dim puttdate As Object
Set puttdate = ieDoc.getElementById("checkEligibilityForm:issueDate_input")
If Not puttdate Is Nothing Then puttdate.Value = Format(openDate, "dd-mm-yyyy")
Set puttdate = Nothing
'Answering the captcha question
'Split the innerText to string array to determine the equation
Dim captchaQns As Object
Set captchaQns = ieDoc.getElementsByClassName("submit__generated")(0)
If Not captchaQns Is Nothing Then
Dim mathEq() As String
mathEq = Split(captchaQns.innerText, " ")
Set captchaQns = Nothing
'mathEq(0) = first number
'mathEq(1) = math operator
'mathEq(2) = second number
If IsNumeric(mathEq(0)) Then
Dim firstNum As Long
firstNum = CLng(mathEq(0))
If IsNumeric(mathEq(2)) Then
Dim secondNum As Long
secondNum = CLng(mathEq(2))
'Select Case statement used here in case you encounter other form of math question (e.g. - X /), expand cases to cater for other scenario
Dim mathAnswer As Long
Select Case mathEq(1)
Case "+": mathAnswer = firstNum + secondNum
End Select
End If
End If
If mathAnswer <> 0 Then
'Enter the answer to the box
Dim captchaAns As Object
Set captchaAns = ieDoc.getElementsByClassName("submit__input")(0)
If Not captchaAns Is Nothing Then captchaAns.Value = mathAnswer
Set captchaAns = Nothing
'Get the submit button element, remove "disabled" attribute to allow clicking
Dim submitBtn As Object
Set submitBtn = ieDoc.getElementsByName("checkEligibilityForm:j_idt79")(0)
submitBtn.removeAttribute "disabled"
submitBtn.Click
Set submitBtn = Nothing
End If
End If
Dim tbls, tbl, trs, tr, tds, td, r, c
Set tbl = ie.Document.getElementsByTagName("table")(0)
Set trs = tbl.getElementsByTagName("tr")
For r = 0 To trs.Length - 1
Set tds = trs(r).getElementsByTagName("tr")
'if no <td> then look for <th>
If tds.Length = 0 Then Set tds = trs(r).getElementsByTagName("td")
For c = 0 To tds.Length - 1
ActiveSheet.Range("C4").Offset(r, c).Value = tds(c).innerText
Next c
Next r
End Sub
Try this:
It will enter the data into Range("C4:F4") for first dose and Range("G4:J4") for second dose.
Sub Newfunction()
Const Url As String = "https://nims.nadra.gov.pk/nims/certificate"
Dim LogData As Worksheet
Set LogData = ThisWorkbook.Worksheets("Sheet1")
Dim lastRow As Long
lastRow = LogData.Range("A" & Rows.Count).End(xlUp).Row
Dim currentRow As Long
For currentRow = 3 to lastRow
Dim IdNumber As String
Dim openDate As Date
IdNumber = LogData.Cells(currentRow, 1).Value
openDate = LogData.Cells(currentRow, 2).Value
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate Url
Do While .Busy Or .readyState <> 4
DoEvents
Loop
.Visible = True
Dim ieDoc As Object
Set ieDoc = .document
End With
'Enter the CNIC
Dim IDdata As Object
Set IDdata = ieDoc.getElementById("checkEligibilityForm:cnic")
If Not IDdata Is Nothing Then IDdata.Value = IdNumber
Set IDdata = Nothing
'Enter Date
Dim puttdate As Object
Set puttdate = ieDoc.getElementById("checkEligibilityForm:issueDate_input")
If Not puttdate Is Nothing Then puttdate.Value = Format(openDate, "dd-mm-yyyy")
Set puttdate = Nothing
'Answering the captcha question
'Split the innerText to string array to determine the equation
Dim captchaQns As Object
Set captchaQns = ieDoc.getElementsByClassName("submit__generated")(0)
If Not captchaQns Is Nothing Then
Dim mathEq() As String
mathEq = Split(captchaQns.innerText, " ")
Set captchaQns = Nothing
'mathEq(0) = first number
'mathEq(1) = math operator
'mathEq(2) = second number
If IsNumeric(mathEq(0)) Then
Dim firstNum As Long
firstNum = CLng(mathEq(0))
If IsNumeric(mathEq(2)) Then
Dim secondNum As Long
secondNum = CLng(mathEq(2))
'Select Case statement used here in case you encounter other form of math question (e.g. - X /), expand cases to cater for other scenario
Dim mathAnswer As Long
Select Case mathEq(1)
Case "+": mathAnswer = firstNum + secondNum
End Select
End If
End If
Erase mathEq
If mathAnswer <> 0 Then
'Enter the answer to the box
Dim captchaAns As Object
Set captchaAns = ieDoc.getElementsByClassName("submit__input")(0)
If Not captchaAns Is Nothing Then captchaAns.Value = mathAnswer
Set captchaAns = Nothing
'Get the submit button element, remove "disabled" attribute to allow clicking
Dim submitBtn As Object
Set submitBtn = ieDoc.getElementsByName("checkEligibilityForm:j_idt79")(0)
submitBtn.removeAttribute "disabled"
submitBtn.Click
Set submitBtn = Nothing
End If
End If
With ie
Do While .Busy Or .readyState <> 4
DoEvents
Loop
Set ieDoc = .document
End With
Dim resultTbl As Object
Set resultTbl = ieDoc.getElementsByTagName("table")
If resultTbl.Length <> 0 Then
Dim resultRows As Object
Set resultRows = resultTbl(0).getElementsByTagName("tr")
If resultRows.Length > 1 Then
'Get the 2nd row (1st row is header so ignore)
Dim firstDose As Object
Set firstDose = resultRows(1).getElementsByTagName("td")
LogData.Cells(currentRow, 3).Value = firstDose(0).innerText
LogData.Cells(currentRow, 4).Value = firstDose(1).innerText
LogData.Cells(currentRow, 5).Value = firstDose(2).innerText
LogData.Cells(currentRow, 6).Value = firstDose(3).innerText
Set firstDose = Nothing
'If there are totals of 3 TR elements then there are 2nd dose
If resultRows.Length = 3 Then
Dim secondDose As Object
Set secondDose = resultRows(2).getElementsByTagName("td")
LogData.Cells(currentRow, 7).Value = secondDose(0).innerText
LogData.Cells(currentRow, 8).Value = secondDose(1).innerText
LogData.Cells(currentRow, 9).Value = secondDose(2).innerText
LogData.Cells(currentRow, 10).Value = secondDose(3).innerText
Set secondDose = Nothing
End If
'Else
'Do something here if there is only a header row i.e. no dose (assumption)
End If
Set resultRows = Nothing
End If
Set resultTbl = Nothing
Set ieDoc = Nothing
ie.Quit 'Remove if you don't want to close IE
Set ie = Nothing 'Remove if you don't want to close IE
Next currentRow
Set LogData = Nothing
End Sub
Related
I have this website and i have been trying to create an function which collects ID Number from Col"A" and its Date of initiation in Col"B".
then adds the Sum of two boxes into 3rd one like below image.
after that it will go for the result it will be like
If the individual gets first dose then insert in Col"C" "1st Dose Done" If the individual gets second dose then insert in Col"D" "second dose done" if the individual has not taken both or single dose result will be empty.
then go for next until Col"A" used range. I have tried to create function but could not, Your help will be appreciated in this regards.
Option Explicit
Sub Newfunction()
Const Url$ = ""
Dim IdNumber As String, openDate As Date, LogData As Worksheet
Set LogData = ThisWorkbook.Worksheets("Sheet1")
IdNumber = LogData.Cells(2, "A").Value
openDate = LogData.Cells(2, "B").Value
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate Url
ieBusy ie
.Visible = True
Dim IDdata As Object, puttdate As Object, submitbut As String
Set IDdata = .document.getElementsByName("checkEligibilityForm:cnic")(0)
Set puttdate = .document.getElementsByName("checkEligibilityForm:issueDate_input")(0)
Set submitbut = .document.getElementsByClassName("submit__generated")(0).innerText
IDdata.Value = IdNumber
puttdate.Value = Format(openDate, "dd/mm/yyyy")
submitbut.Value = .document.getElementsByClassName("submit__input")(0)
Debug.Print .document.getElementsByClassName("submit__input")(0)
End With
End Sub
Sub ieBusy(ie As Object)
Do While ie.Busy Or ie.readyState < 4
DoEvents
Loop
End Sub
Try this code below - This should do the following:
Enter the CNIC
Enter the date
Answer the Captcha question
Click the button and the page should load.
As I can't proceed to the result page, I can't guarantee that it will produce the result page so please test it out with a proper data to see if it works. I have also tried to explain what each block of codes is doing in the comment.
Option Explicit
Sub Newfunction()
Const Url As String = "https://nims.nadra.gov.pk/nims/certificate"
Dim LogData As Worksheet
Set LogData = ThisWorkbook.Worksheets("Sheet1")
Dim IdNumber As String
Dim openDate As Date
IdNumber = LogData.Cells(2, "A").Value
openDate = LogData.Cells(2, "B").Value
Set LogData = Nothing
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate Url
Do While .Busy Or .readyState <> 4
DoEvents
Loop
.Visible = True
Dim ieDoc As Object
Set ieDoc = .Document
End With
'Enter the CNIC
Dim IDdata As Object
Set IDdata = ieDoc.getElementById("checkEligibilityForm:cnic")
If Not IDdata Is Nothing Then IDdata.Value = IdNumber
Set IDdata = Nothing
'Enter Date
Dim puttdate As Object
Set puttdate = ieDoc.getElementById("checkEligibilityForm:issueDate_input")
If Not puttdate Is Nothing Then puttdate.Value = Format(openDate, "dd-mm-yyyy")
Set puttdate = Nothing
'Answering the captcha question
'Split the innerText to string array to determine the equation
Dim captchaQns As Object
Set captchaQns = ieDoc.getElementsByClassName("submit__generated")(0)
If Not captchaQns Is Nothing Then
Dim mathEq() As String
mathEq = Split(captchaQns.innerText, " ")
Set captchaQns = Nothing
'mathEq(0) = first number
'mathEq(1) = math operator
'mathEq(2) = second number
If IsNumeric(mathEq(0)) Then
Dim firstNum As Long
firstNum = CLng(mathEq(0))
If IsNumeric(mathEq(2)) Then
Dim secondNum As Long
secondNum = CLng(mathEq(2))
'Select Case statement used here in case you encounter other form of math question (e.g. - X /), expand cases to cater for other scenario
Dim mathAnswer As Long
Select Case mathEq(1)
Case "+": mathAnswer = firstNum + secondNum
End Select
End If
End If
If mathAnswer <> 0 Then
'Enter the answer to the box
Dim captchaAns As Object
Set captchaAns = ieDoc.getElementsByClassName("submit__input")(0)
If Not captchaAns Is Nothing Then captchaAns.Value = mathAnswer
Set captchaAns = Nothing
'Get the submit button element, remove "disabled" attribute to allow clicking
Dim submitBtn As Object
Set submitBtn = ieDoc.getElementsByName("checkEligibilityForm:j_idt79")(0)
submitBtn.removeAttribute "disabled"
submitBtn.Click
Set submitBtn = Nothing
End If
End If
End Sub
I am looking to follow a series of URL's that are found in column A (example: https://www.ebay.com/itm/Apple-iPhone-7-GSM-Unlocked-Verizon-AT-T-TMobile-Sprint-32GB-128GB-256GB/352381131997?epid=225303158&hash=item520b8d5cdd:m:mWgYDe4a79NeLuAlV-RmAQA:rk:7:pf:0) and pull the following information from them:
- Title
- Price
- Description
I think there are multiple issues with my code... For one, I can't get the program to follow specific URL's listed in the Excel (only if I specify one within the code). Also, pulling multiple fields has given me issues.
Option Explicit
Public Sub ListingInfo()
Dim ie As New InternetExplorer, ws As Worksheet, t As Date
Dim i As Integer
i = 0
Do While Worksheets("Sheet1").Cells(i, 1).Value <> ""
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 Worksheets("Sheet1").Cells(i, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Dim Links As Object, i As Long, count As Long
t = Timer
Do
On Error Resume Next
Set Title = .document.querySelectorAll("it-ttl")
Set price = .document.querySelectorAll("notranslate")
Set Description = .document.querySelectorAll("ds_div")
count = Links.Length
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While count = 0
For i = 0 To Title.Length - 1
ws.Cells(i + 1, 1) = Title.item(i)
ws.Cells(i + 1, 2) = price.item(i)
ws.Cells(i + 1, 3) = Description.item(i)
Next
.Quit
i = i + 1
Loop
End With
End Sub
I would use late binding for MSXML2.XMLHTTP and set a reference to the Microsoft HTML Object Library for the HTMLDocument.
Note: querySelector() references the first item it finds that matches its search string.
Here is the short version:
Public Sub ListingInfo()
Dim cell As Range
With ThisWorkbook.Worksheets("Sheet1")
For Each cell In .Range("A1", .Cells(.Rows.Count, 1).End(xlUp))
Dim Document As MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", cell.Value, False
.send
Set Document = New MSHTML.HTMLDocument
Document.body.innerHTML = .responseText
End With
cell.Offset(0, 1).Value = Document.getElementByID("itemTitle").innerText
cell.Offset(0, 2).Value = Document.getElementByID("prcIsum").innerText
If Not Document.querySelector(".viSNotesCnt") Is Nothing Then
cell.Offset(0, 3).Value = Document.querySelector(".viSNotesCnt").innerText
Else
'Try Something Else
End If
Next
End With
End Sub
A more elaborate solution would be to break the code up into smaller routines and load the data into an Array. The main advantage of this is that you can test each subroutine separately.
Option Explicit
Public Type tListingInfo
Description As String
Price As Currency
Title As String
End Type
Public Sub ListingInfo()
Dim source As Range
Dim data As Variant
With ThisWorkbook.Worksheets("Sheet1")
Set source = .Range("A1:D1", .Cells(.Rows.count, 1).End(xlUp))
data = source.Value
End With
Dim r As Long
Dim record As tListingInfo
Dim url As String
For r = 1 To UBound(data)
record = getListingInfo()
url = data(r, 1)
record = getListingInfo(url)
With record
data(r, 2) = .Description
data(r, 3) = .Price
data(r, 4) = .Title
End With
Next
source.Value = data
End Sub
Public Function getListingInfo(url As String) As tListingInfo
Dim ListingInfo As tListingInfo
Dim Document As MSHTML.HTMLDocument
Set Document = getHTMLDocument(url)
With ListingInfo
.Description = Document.getElementByID("itemTitle").innerText
.Price = Split(Document.getElementByID("prcIsum").innerText)(1)
.Title = Document.querySelectorAll(".viSNotesCnt")(0).innerText
Debug.Print .Description, .Price, .Title
End With
End Function
Public Function getHTMLDocument(url As String) As MSHTML.HTMLDocument
Const READYSTATE_COMPLETE As Long = 4
Dim Document As MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
If .readyState = READYSTATE_COMPLETE And .Status = 200 Then
Set Document = New MSHTML.HTMLDocument
Document.body.innerHTML = .responseText
Set getHTMLDocument = Document
Else
MsgBox "URL: " & vbCrLf & "Ready state: " & .readyState & vbCrLf & "HTTP request status: " & .Status, vbInformation, "URL Not Responding"
End If
End With
End Function
There are a lot of things to fix in your code. It is late here so I will just give pointers (and update fully later) and working code below:
Declare all variables and use appropriate type
Review For Loops and how transpose can be used to create a 1d array of urls pulled from sheet to loop over
Review the difference between querySelector and querySelectorAll methods
Review CSS selectors (you are specifying everything as type selector when in fact you are not selecting by tag for the elements of interest; nor by your stated text)
Think about placement of your IE object creation and of your .Navigate2 to make use of existing object
Make sure to use distinct loop counters
Be sure not to overwrite values in sheet
Code:
Option Explicit
Public Sub ListingInfo()
Dim ie As New InternetExplorer, ws As Worksheet
Dim i As Long, urls(), rowCounter As Long
Dim title As Object, price As Object, description As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
urls = Application.Transpose(ws.Range("A1:A2").Value) '<= Adjust
With ie
.Visible = True
For i = LBound(urls) To UBound(urls)
If InStr(urls(i), "http") > 0 Then
rowCounter = rowCounter + 1
.Navigate2 urls(i)
While .Busy Or .readyState < 4: DoEvents: Wend
Set title = .document.querySelector(".it-ttl")
Set price = .document.querySelector("#prcIsum")
Set description = .document.querySelector("#viTabs_0_is")
ws.Cells(rowCounter, 3) = title.innerText
ws.Cells(rowCounter, 4) = price.innerText
ws.Cells(rowCounter, 5) = description.innerText
Set title = Nothing: Set price = Nothing: Set description = Nothing
End If
Next
.Quit
End With
End Sub
Here's an approach using Web Requests, using MSXML. It should be significantly faster than using IE, and I'd encourage you to strongly consider using this approach wherever possible.
You'll need references to Microsoft HTML Object Library and Microsoft XML v6.0 to get this working.
Option Explicit
Public Sub SubmitRequest()
Dim URLs As Excel.Range
Dim URL As Excel.Range
Dim LastRow As Long
Dim wb As Excel.Workbook: Set wb = ThisWorkbook
Dim ws As Excel.Worksheet: Set ws = wb.Worksheets(1)
Dim ListingDetail As Variant
Dim i As Long
Dim j As Long
Dim html As HTMLDocument
ReDim ListingDetail(0 To 2, 0 To 10000)
'Get URLs
With ws
LastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
Set URLs = .Range(.Cells(1, 1), .Cells(LastRow, 1))
End With
'Update the ListingDetail
For Each URL In URLs
Set html = getHTML(URL.Value2)
ListingDetail(0, i) = html.getElementByID("itemTitle").innertext 'Title
ListingDetail(1, i) = html.getElementByID("prcIsum").innertext 'Price
ListingDetail(2, i) = html.getElementsByClassName("viSNotesCnt")(0).innertext 'Seller Notes
i = i + 1
Next
'Resize array
ReDim Preserve ListingDetail(0 To 2, 0 To i - 1)
'Dump in Column T,U,V of existing sheet
ws.Range("T1:V" & i).Value = WorksheetFunction.Transpose(ListingDetail)
End Sub
Private Function getHTML(ByVal URL As String) As HTMLDocument
'Add a reference to Microsoft HTML Object Library
Set getHTML = New HTMLDocument
With New MSXML2.XMLHTTP60
.Open "GET", URL
.send
getHTML.body.innerHTML = .responseText
End With
End Function
I have made the macros script which retrieves the data from the URL. What I need is that, I need to increase the date one by one and get the data for each. the URL is like this :
https://www.ukdogracing.net/racecards/01-05-2017/monmore
Ia m able to get the data with this script :
Sub GetData()
Dim IE As Object
Dim doc As Object
Dim strURL As String
Dim I As Integer
For I = 1 To 5
strURL = "https://www.ukdogracing.net/racecards/01-05-2017/monmore" + Trim(Str(I))
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
Next I
End Sub
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Dim ThisLink As Object 'variable for <a> tags
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
I = Range("B" & Rows.Count).End(xlUp).Row 'last row with data
Do While Cells(I, 1).Value = "" 'will loop until first not blank found in column A (starting from last row of data, from end to start)
For Each ThisLink In doc.getElementsByTagName("a") 'we check all <a> tags
If ThisLink.innerText = Cells(I, 2).Value Then Cells(I, 1).Value = ThisLink.href 'If the innertext is the name of the race, in column A we add link
Next ThisLink
I = I - 1 'we decrease row position
Loop
End Sub
But I need the script takes the date part of the URL and add one day each time till today and get the data. for example :
https://www.ukdogracing.net/racecards/01-06-2017/monmore
https://www.ukdogracing.net/racecards/01-07-2017/monmore
etc... How can I make the script to get the data for each day adding one each time.
Thanks in advance.
Replace the first sub with this one and it will run for the specified dates. I couldn't see I having any purpose so i removed it.
Sub GetData()
Dim IE As Object, doc As Object
Dim strURL As String, myDate As Date
Set IE = CreateObject("InternetExplorer.Application")
With IE
For myDate = CDate("01-05-2017") To CDate("01-09-2017")
strURL = "https://www.ukdogracing.net/racecards/" & Format(myDate, "mm-dd-yyyy") & "/monmore" ' Trim(Str(I))
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
Next myDate
.Quit
End With
End Sub
Iwant to get the href link from the following code:
<div class="border-content">
<div class="main-address">
<h2 class="address">
Marcos paz 2500<span></span>
</h2>
i tried using getelementsbytagname("a") but i don't know how to do that for the specific class "address". Any ideas?
Thanks Kilian, here's how i handle everything. Quite complicated but it worked, although it takes for ever as i have plenty of nested loops:
Sub Propiedades()
'to refer to the running copy of Internet Explorer
Dim ie As InternetExplorer
'to refer to the HTML document returned
Dim html As HTMLDocument
'open Internet Explorer in memory, and go to website
Set ie = New InternetExplorer
ie.Visible = False
ie.Navigate "http://www.argenprop.com/Departamentos-tipo-casa-Venta-Almagro-Belgrano-Capital-Federal/piQ86000KpsQ115000KmQ2KrbQ1KpQ1KprQ2KpaQ135Kaf_816Kaf_100000001KvnQVistaResultadosKaf_500000001Kaf_801KvncQVistaGrillaKaf_800000002Kaf_800000005Kaf_800000010Kaf_800000041Kaf_800000011Kaf_800000020Kaf_800000030Kaf_800000035Kaf_800000039Kaf_900000001Kaf_900000002Kaf_900000006Kaf_900000008Kaf_900000009Kaf_900000007Kaf_900000010Kaf_900000033Kaf_900000034Kaf_900000036Kaf_900000038Kaf_900000037Kaf_900000035Kaf_900000039Kaf_900000041Kaf_900000042Kaf_900000043"
'Wait until IE is done loading page
Do While ie.ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to argenprop ..."
DoEvents
Loop
'show text of HTML document returned
Set html = ie.Document
'close down IE and reset status bar
Set ie = Nothing
Application.StatusBar = ""
'clear old data out and put titles in
Sheets(2).Select
Cells.ClearContents
'put heading across the top of row 3
Range("A3").Value = "Direccion"
Range("B3").Value = "Mts cuadrados"
Range("C3").Value = "Antiguedad"
Range("D3").Value = "Precio"
Range("E3").Value = "Dormitorios"
Range("F3").Value = "Descripcion"
Range("G3").Value = "Link"
Dim PropertyList As IHTMLElement
Dim Properties As IHTMLElementCollection
Dim Property As IHTMLElement
Dim RowNumber As Long
Dim PropertyFields As IHTMLElementCollection
Dim PropertyField As IHTMLElement
Dim PropertyFieldLinks As IHTMLElementCollection
Dim caracteristicasfields As IHTMLElementCollection
Dim caract As IHTMLElement
Dim caracteristicas As IHTMLElementCollection
Dim caractfield As IHTMLElement
Set PropertyList = html.getElementById("resultadoBusqueda")
Set Properties = PropertyList.Children
RowNumber = 4
For Each Property In Properties
If Property.className = "box-avisos-listado clearfix" Then
Set PropertiesFields = Property.all
For Each PropertyField In PropertiesFields
Fede = PropertyField.className
If PropertyField.className Like "avisoitem*" Then
Set caracteristicas = PropertyField.Children
For Each caract In caracteristicas
f = caract.className
If f = "border-content" Then
Set caracteristicasfields = caract.all
For Each caractfield In caracteristicasfields
test1 = caractfield.className
u = caractfield.innerText
If caractfield.className <> "" Then
Select Case caractfield.className
Case Is = "address"
Cells(RowNumber, "A") = caractfield.innerText
marray = Split(caractfield.outerHTML, Chr(34))
Cells(RowNumber, "G") = "www.argenprop.com" & marray(5)
Case Is = "list-price"
Cells(RowNumber, "D") = caractfield.innerText
Case Is = "subtitle"
Cells(RowNumber, "F") = caractfield.innerText 'descripcion
'Case is ="datoscomunes"
'Set myelements = caractfield.all
Case Is = "datocomun-valor-abbr"
Select Case counter
Case Is = 0
Cells(RowNumber, "B") = caractfield.innerText 'square mts
counter = counter + 1
Case Is = 1
Cells(RowNumber, "E") = caractfield.innerText 'DORMITORIOS
counter = counter + 1
Case Is = 2
Cells(RowNumber, "C") = caractfield.innerText ' antiguedad
counter = 0 ' reset counter
Set caracteristicasfields = Nothing
Exit For 'salgo del loop en caractfield
End Select 'cierro el select del counter
End Select 'cierro el select de caractfield.classname
End If ' cierro If caractfield.className <> "" Then
Next caractfield
End If ' cierro el border content
If caract = "border-content" Then Exit For 'salgo del loop dentro de aviso item (caract)
Next caract
RowNumber = RowNumber + 1
End If ' If PropertyField.className Like "avisoitem*"
Next PropertyField 'para ir al siguiente aviso
End If
Next Property
Set html = Nothing
MsgBox "done!"
End Sub
I am trying to scrape some data from a database, and I have it pretty much set. I look in IE for a tab that has me logged in into the database, and paste the query link there through vba. But how do I extract the data that it returns from the IE tab and put that into an excel cell or array.
This is the code I have for opening my query:
Sub import()
Dim row As Integer
Dim strTargetFile As String
Dim wb As Workbook
Dim test As String
Dim ie As Object
Call Fill_Array_Cultivar
For row = 3 To 4
Sheets.Add.Name = Cultivar_Array(row, 1)
strTargetFile = "https://www3.wipo.int/pluto/user/jsp/select.jsp?fl=app_date%2Cden_info%2Cden_final&hl=false&json.nl=map&wt=json&type=upov&start=0&qi=3-nNCXQ6etEVv184O9nnd5yg%3D%3D&q=cc%3AIT%20AND%20latin_name%3A(zea%20mays)%20AND%20den_info%3A" & Trim(Cultivar_Array(row, 1)) & "&facet=false"
Set ie = GetIE("https://www3.wipo.int" & "*")
If Not ie Is Nothing Then
ie.navigate (strTargetFile)
Else
MsgBox "IE not found!"
End If
Next row
End Sub
And this is the appropriate function:
'Find an IE window with a matching (partial) URL
'Assumes no frames.
Function GetIE(sAddress As String) As Object
Dim objShell As Object, objShellWindows As Object, o As Object
Dim retVal As Object, sURL As String
Set retVal = Nothing
Set objShell = CreateObject("Shell.Application")
Set objShellWindows = objShell.Windows
'see if IE is already open
For Each o In objShellWindows
sURL = ""
On Error Resume Next
sURL = o.document.Location
On Error GoTo 0
If sURL <> "" Then
If sURL Like sAddress & "*" Then
Set retVal = o
Exit For
End If
End If
Next o
Set GetIE = retVal
End Function
What the website returns to me is a white page with a line of text. Here is an example:
{"response":{"start":0,"docs":[{"den_final":"Abacus","app_date":"1998-01-13T22:59:59Z"}],"numFound":1},"qi":"3-nNCXQ6etEVv184O9nnd5yg==","sv":"bswa2.wipo.int","lastUpdated":1436333633993}
PS. I also tried using the importxml function, it will import the website, but only an error page, as it does not recognize me as logged in.
I found the solution, which was fairly simple but hard to find.
I can just grab the ie.Document.body.innertext which is all the text I need.
See the code I updated below:
Sub import()
Dim row As Integer
Dim strTargetFile As String
Dim wb As Workbook
Dim test As String
Dim ie As Object
Dim pageText As String
Call Fill_Array_Cultivar
For row = 3 To 4
Sheets.Add.Name = Cultivar_Array(row, 1)
strTargetFile = "https://www3.wipo.int/pluto/user/jsp/select.jsp?fl=app_date%2Cden_info%2Cden_final&hl=false&json.nl=map&wt=json&type=upov&start=0&qi=3-nNCXQ6etEVv184O9nnd5yg%3D%3D&q=cc%3AIT%20AND%20latin_name%3A(zea%20mays)%20AND%20den_info%3A" & Trim(Cultivar_Array(row, 1)) & "&facet=false"
Set ie = GetIE("https://www3.wipo.int" & "*")
If Not ie Is Nothing Then
ie.navigate (strTargetFile)
Do Until ie.ReadyState = 4: DoEvents: Loop
pageText = ie.Document.body.innertext
ActiveSheet.Cells(1, 1) = pageText
pageText = Empty
Else
MsgBox "IE not found!"
End If
Next row
End Sub