Extract link from website query selector all - excel

I have used the below vba code to extract website link from
https://www.bursamalaysia.com/market_information/announcements/company_announcement?keyword=&cat=FA%2CFRCO&sub_type=&company=&mkt=&alph=&sec=&subsec=&dt_ht=23%2F04%2F2020&dt_lt=07%2F05%2F2020#/?category=all
into excel spreadsheet.
But it seem like having some problems over here, there is nothing shown up in my spreadsheet. Appreciate if anybody can point out my mistake here.
Below is the list of website link that I wish to extract it into excel spreadsheet.
Sub ScrapLink()
Dim p As Integer
Application.ScreenUpdating = False
p = InputBox("Please insert page number")
Application.ScreenUpdating = False
On Error GoTo ErrorHandler:
Worksheets("results").Cells(1, 1).Value = Worksheets("sheet1").Cells(1, 1).Value
For u = 2 To p
Worksheets("results").Cells(u, 1).Value = Worksheets("sheet1").Cells(1, 1).Value & "&page=" & u
Application.DisplayAlerts = False
Application.DisplayAlerts = True
ErrorHandler:
Application.ScreenUpdating = True
Next u
Dim IE As New InternetExplorer, html As HTMLDocument
Dim x As Long
Application.ScreenUpdating = False
x = WorksheetFunction.CountA(Worksheets("results").Range("A1:A1000"))
With IE
For u = 1 To x
IE.Visible = True
IE.Navigate Worksheets("results").Cells(u, 1).Value
While .Busy Or .ReadyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 1)
Application.StatusBar = "Trying to go to website"
DoEvents
Dim links As Object, i As Long
Set links = .Document.querySelectorAll("#bm_ajax_container
[href^='/market_information/announcements/company_announcement/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
Range("A" & rows.count).End(xlUp).Offset(1).Value = links.item(i - 1)
End With
Next i
Next u
.Quit
End With
Worksheets("results").Range("a1:a1000").Clear
End Sub

Try
For i = 0 To links.Length -1
and
Range("A" & rows.count).End(xlUp).Offset(1).Value = links.item(i ).href
assuming correct selector. nodeLists are 0 based.

Related

detecting a captcha pop-up

Hello I'm trying to get multiple results from a website, depending on the value in column E, filling all cells in the column H.
I'm wondering if there is a way to pause the execution if a captcha pop-up is opened.
Please do not mention Selenium. Either I'm not interested in OCR, I only want the program stops and waits for solving the captcha puzzle.
Sub getPEC()
Dim MyHTML_Element As IHTMLElement
Dim HTMLDoc As HTMLDocument
Dim MyURL As String
Dim results As IHTMLElementCollection
Dim MyBrowser As Object
MyURL = "https://www.registroimprese.it/home"
Set MyBrowser = New InternetExplorer
MyBrowser.Silent = True
MyBrowser.navigate MyURL
MyBrowser.Visible = True
While Selection.Column = 5 And ActiveCell.Value <> ""
Dim r As VbMsgBoxResult
Do
DoEvents
Loop Until MyBrowser.readyState = READYSTATE_COMPLETE
Application.Wait Now + TimeValue("00:00:02")
Set HTMLDoc = MyBrowser.document
HTMLDoc.getElementById("inputSearchFieldMob").Value = Selection.Value
HTMLDoc.getElementById("selProvincia-Mob").Value = Range("P" & Selection.Row).Value
HTMLDoc.getElementById("btnCercaGratuitaMob").Click
Do
DoEvents
Loop Until MyBrowser.readyState = READYSTATE_COMPLETE
Application.Wait Now + TimeValue("00:00:03")
If a captcha pop-up is present Then
r = MsgBox("continue?", vbYesNo, "Captcha?")
If r = vbNo Then Exit Sub
End If
Set tbody = HTMLDoc.getElementsByClassName("table tableRisultatiGratuita")(0).getElementsByTagName("tbody")(0)
Set datarow = tbody.getElementsByTagName("tr")
If datarow.Length = 1 Then
Set datarowtdlist = datarow(0).getElementsByTagName("td")
Set datarowDIVlist = datarowtdlist(0).getElementsByTagName("DIV")
Set datarowAlist = datarowDIVlist(0).getElementsByTagName("A")
MyBrowser.navigate datarowAlist(0).href
Do
DoEvents
Loop Until MyBrowser.readyState = READYSTATE_COMPLETE
Else
r = MsgBox("Select one", vbOKCancel)
If r = vbCancel Then Exit Sub
End If
Set HTMLDoc = MyBrowser.document
Range("H" & Selection.Row).Value = HTMLDoc.getElementsByClassName("ddPec")(0).Value
MyBrowser.GoBack
Range("E" & ActiveCell.Row + 1).Select
i = i + 1
Wend
End Sub

Excel Macro to change page number mentioned in javascript:__doPostBack when called by IE.Navigate

I wrote excel macro to fetch data from multiple pages ( here around 25-40 pages ) . I have managed to change pages and scrape all pages from every page .
Sub Fetch_Data()
Dim IE As Object
Dim httpReq As Object
Dim HTMLdoc As Object
Dim resultsTable As Object
Dim tRow As Object, tCell As Object
Dim destCell As Range
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = False
'Application.ScreenUpdating = False
Application.StatusBar = "Data Fetching in progress, please wait..."
IE.Navigate "https://www.bseindia.com/markets/debt/TradenSettlement.aspx" 'load the Backshop Loan Locator page
Do
DoEvents
Loop Until IE.ReadyState = 4
Set HTMLdoc = IE.Document
'LR = Cells(Rows.Count, 1).End(xlUp).Row
With ActiveSheet
'.Cells.ClearContents
Set destCell = .Range("A1")
End With
Set resultsTable = HTMLdoc.getElementById("ContentPlaceHolder1_GridViewrcdsFC")
For Each tRow In resultsTable.Rows
For Each tCell In tRow.Cells
destCell.Offset(tRow.RowIndex, tCell.cellIndex).Value = tCell.innerText
Next
Next
'________________________________________________________________________________________________________________________
'Go to Next page
'IE.Navigate "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$2')"
i = 2
For i = 2 To 50
If i = 2 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$2')"
On Error GoTo ErrorHandler
ElseIf i = 3 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$3')"
On Error GoTo ErrorHandler
ElseIf i = 4 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$4')"
On Error GoTo ErrorHandler
ElseIf i = 5 Then
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC','Page$5')"
On Error GoTo ErrorHandler
ErrorHandler:
GoTo XYZ
End If
IE.Navigate Url
Do
DoEvents
Loop Until IE.ReadyState = 4
Url = ""
LR = Cells(Rows.Count, 1).End(xlUp).Row - 1
With ActiveSheet
'.Cells.ClearContents
Set destCell = .Range("A" & LR)
End With
Set resultsTable = HTMLdoc.getElementById("ContentPlaceHolder1_GridViewrcdsFC")
For Each tRow In resultsTable.Rows
For Each tCell In tRow.Cells
destCell.Offset(tRow.RowIndex, tCell.cellIndex).Value = tCell.innerText
Next
Next
Next i
'________________________________________________________________________________________________________________________
XYZ: IE.Quit
Application.StatusBar = "Data Fetching Completed"
MsgBox ("Data Successfully Fetched")
Application.StatusBar = ""
Dim lrow As Long
Dim index As Long
Dim header As String
header = Range("A1").Value
lrow = Range("A" & Rows.Count).End(xlUp).Row
For index = 2 To lrow
If Range("A" & index).Value = header Then Rows(index).Delete
Next
End Sub
I want to change pages automatically without writing every page , I tried something like below , but pages are not getting changed , how to loop through pages :
For i = 2 To 4
x = "Page$" + CStr(i)
Url = "javascript:__doPostBack('ctl00$ContentPlaceHolder1$GridViewrcdsFC'," & x & ")"
On Error GoTo ErrorHandler
ErrorHandler:
GoTo XYZ
You have to look if there are url links to the other pages on the current page, find a tag and loop all the webpages. You can also look voor the url of each page and hardcode it.
Example with urls beneath tag "a":
Set AElements = HTMLDoc.getElementsByTagName("a")
For Each AElement In AElements
If AElement.id = "xxxxxxxxx" Then
Cells(Cell, 27) = AElement.src 'I write URL in the 27th column
'AElement.href
End If
Next AElement

There is no error but there is also no output on excel after the VBA execution

This is the code that i am using. No error on the macro but there is also no output on the excel sheet.
I am trying to get the data on the table for all the shares.
Sub sqylogin()
On Error Resume Next
Dim ie, objShell, Wnd As Object
Set objShell = CreateObject("Shell.Application")
Application.Calculation = xlManual
ieopen = True
For Each Wnd In objShell.Windows
If Right(Wnd.Name, 17) = "Internet Explorer" Then
Set ie = Wnd
ieopen = False
Exit For
End If
Next Wnd
If ieopen Then Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
p = "https://www.pse.com.ph/stockMarket/marketInfo-marketActivity.html?tab=1&indexName=All%20Shares"
ie.navigate (p): Application.Wait (Now + #12:00:59 AM#)
Set divelements = ie.Document.getElementsbytagname("div")
Cells(1, 1) = Now: c = 2
For Each divelement In divelements
If divelement.ID = "ext-gen291" Then
For j = 0 To 300
For i = 0 To 8
Cells(c + j, i + 1).Value = divelement.Children(j).Children(0).Children(0).Children(0).Children(i).innertext
Next i
Next j
End If
Next divelement
Set ie = Nothing
Application.Calculation = xlCalculationAutomatic
End Sub
The tables within the parent div are dynamically loaded via an ajax call so you need some sort of wait condition to ensure they are present. I show a wait condition based on the number of child tables present and additionally use the clipboard to copy paste the child tables to the sheet.
Option Explicit
Public Sub GetMarketActivity()
Dim ie As SHDocVw.InternetExplorer, clipboard As Object
Set ie = New SHDocVw.InternetExplorer
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Dim t As Date
Const MAX_WAIT_SEC As Long = 360
With ie
.Visible = True
.Navigate2 "https://www.pse.com.ph/stockMarket/marketInfo-marketActivity.html?tab=1&indexName=All%20Shares"
Do
DoEvents
Loop While .Busy Or .readyState <> READYSTATE_COMPLETE
t = Timer
Do
DoEvents
If Timer - t > MAX_WAIT_SEC Then Exit Sub
Loop Until .document.querySelectorAll(".x-grid3-row-table").Length > 1 '<wait for more than one record (Table)
Dim tables As Object, i As Long, headers()
Set tables = .document.querySelectorAll(".x-grid3-row-table")
headers = Array("Record", "Symbol", "Last trade date", "Last trade price", "Outstanding shares")
For i = 0 To tables.Length - 1
clipboard.SetText tables.item(i).outerHTML
clipboard.PutInClipboard
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(i + 2, 1).PasteSpecial
End With
Next
.Quit
End With
End Sub

Extracting HTML code from websites by looping through list of URLs

I'm using Excel VBA to launch an IE browser tab based on the URL in each of the rows in column D. Then the relevant HTML code is extracted based on pre-defined classes and populated in columns A - C.
Pretty sure I missed a step. The process stops at D2 and does not proceed to extract HTML from the next URLs (in cells D3, D4, etc).
Thanks in advance for any suggestions!
Sub useClassnames()
Dim element As IHTMLElement
Dim elements As IHTMLElementCollection
Dim IE As InternetExplorer
Dim html As HTMLDocument
Dim shellWins As New ShellWindows
Dim IE_TabURL As String
Dim intRowPosition As Integer
Set IE = New InternetExplorer
IE.Visible = False
intRowPosition = 2
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate Sheet1.Range("D" & intRowPosition)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
While Sheet1.Range("D" & intRowPosition) <> vbNullString
IE.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
Wend
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading Web page…"
DoEvents
Loop
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long
Dim erow As Long
count = 0
For Each element In elements
If element.className = "container-bs" Then
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
count = count + 1
End If
Next element
Range("A2:C2000").Select
Columns("A:A").EntireColumn.AutoFit
Columns("B:B").ColumnWidth = 36
End Sub
Your lines
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
etc happen after the While loop. It needs to be inside.
Your If statement:
If element.className = "container-bs"
should be redundant as you are already looping over a collection of that classname; so I have removed this.
You are not working off element in the loop, so essentially you are using it to control your incremented counter variable. This suggests you can use a better coding strategy for retrieving the items of interest.
Always state the parent worksheet and don't rely on implicit Activesheet references - that is bug prone.
I would expect a structure more like as follows (I cannot account for refactoring to remove element)
Option Explicit
Public Sub UseClassnames()
Dim element As IHTMLElement, elements As IHTMLElementCollection, ie As InternetExplorer
Dim html As HTMLDocument, intRowPosition As Long
intRowPosition = 2
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
While Sheet1.Range("D" & intRowPosition) <> vbNullString
If intRowPosition = 2 Then
ie.navigate Sheet1.Range("D" & intRowPosition)
Else
ie.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
End If
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
Set html = ie.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long, erow As Long
count = 0
For Each element In elements
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
With Sheet1
.Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
.Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
.Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
End With
count = count + 1
Next element
intRowPosition = intRowPosition + 1
Wend
With Sheet1
.Range("A2:C2000").Select
.Columns("A:A").EntireColumn.AutoFit
.Columns("B:B").ColumnWidth = 36
End With
End Sub

Google Translate text only if it is not english

I am importing google alerts into my excel worksheet , what I am trying to do is only use the google translate code I have if the text is not in English. As the code is very slow. And I import upto 1000 rows of text. The majority of those are already in English. But at present my code translates every row.
Public Sub Translate()
Const MAX_WAIT_SEC As Long = 5
Dim IE As New InternetExplorer
Dim t As Date
Dim ws As Worksheet
Dim ftext As String
Dim x
Dim y As Long
Dim translation As Object
Dim translationText As String
y = Sheet1.Cells(Rows.Count, 1).End(xlUp).Row
Set ws = ThisWorkbook.Worksheets("Google_Notifications")
For x = 1 To y
With IE
.Visible = False
.Navigate "https://translate.google.com/#view=home&op=translate&sl=auto&tl=en"
While .Busy Or .ReadyState < 4: DoEvents: Wend
ftext = Sheet1.Range("C" & x).Value
.Document.querySelector("#source").Value = ftext
While .Busy Or .ReadyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set translation = .Document.querySelector(".tlid-translation.translation")
translationText = translation.textContent
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While translationText = vbNullString
Sheet1.Range("C" & x).Value = translationText
Sheet1.Range("E" & x).Value = "Translated"
.Quit
Set IE = Nothing
Set translation = Nothing
translationText = ""
End With
Next x
End Sub

Resources