I am trying to download non-free shipping charges from eBay. I have the item numbers of the pages. The links should go to the right pages on eBay.
While trying to go to the page and download the data, Excel hangs.
I have similar working code that gets the eBay item numbers on many pages from eBay.
If this code can't be fixed how can I get the info I need into Excel?
itemNumberAlone = Range("a" & eachItem).Value
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.ebay.com/itm/" & itemNumberAlone & "?ru=http%3A%2F%2Fwww.ebay.com%2Fsch%2Fi.html%3F_from%3DR40%26_sacat%3D0%26_nkw%3D" & itemNumberAlone & "%26_rdc%3D1" _
, Destination:=Range("$bZ$1"))
.Name = "second ebay links"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = True
.BackgroundQuery = True
.RefreshStyle = xlOverwriteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Do While Not IsError(Application.Match("Shipping and handling", Range("bz1:bz1000"), 0))
If IsError(Application.Match("Shipping and handling", Range("bz1:bz1000"), 0)) Then Exit Do
If Not IsError(Application.Match("Shipping and handling", Range("bz1:bz1000"), 0)) Then
shippingRow = Application.Match("Shipping and handling", Range("bz1:bz1000"), 0) + 1
shippingCell = Range("bz" & shippingRow).Value
If Left(shippingCell, 2) <> "US" Then
Range("bz" & shippingRow - 1).ClearContents
Else
Range("c" & eachItem).Value = Right(shippingCell, Len(shippingCell) - 2)
End If
End If
Loop
End If
Next
I think you will have to learn DOM automation to do this cleanly. I took a look at the HTML on the ebay pages and it might be a little much for someone who hasn't used DOM automation before. I wasn't planning on writing this but it sounds like you are in a bit of a pinch, so here you go. You can use it to learn from. Just keep in mind that this will work in the short-term but when they change their HTML, it will fail.
Option Explicit
Sub Get_Ebay_Shipping_Charges()
Dim IE As Object, DOM_DOC As Object
Dim URL$, SHIPPING_CHARGES$
Dim SHIPPING_AMOUNT
Dim i&, x&
Dim EL, EL_COLLECTION, CHILD_NODES, TABLE_NODES, TABLE_ROW_NODES, TABLE_DATA_NODES, ITEM_NUMBER_ARRAY
Dim WS As Excel.Worksheet
Dim ITEM_NOT_FOUND As Boolean
''You should change this to the worksheet name you want to use
''ie Set WS = ThisWorkbook.Sheets("Ebay")
Set WS = ThisWorkbook.Sheets(1)
''Create an Internet Explorer Object
Set IE = CreateObject("InternetExplorer.Application")
''Make it visible
IE.Visible = True
''You can replace this with an array that is built from your spreadsheet, this is just for demo purposes
ITEM_NUMBER_ARRAY = Array("290941626676", "130942854921", "400035340501")
''In your code, you can start your loop here to handle the list of items
''This code is a little different for demo purposes
For x = 0 To UBound(ITEM_NUMBER_ARRAY)
''Here is your URL
URL = "http://www.ebay.com/itm/" & ITEM_NUMBER_ARRAY(x) & "?ru=http%3A%2F%2Fwww.ebay.com%2Fsch%2Fi.html%3F_from%3DR40%26_sacat%3D0%26_nkw%3D" & ITEM_NUMBER_ARRAY(x) & "%26_rdc%3D1"
''Navigate to your URL
IE.navigate URL
''This loop will wait until the page is received from the server - the page was hanging for me too so I added a counter to exit after a certain number of loops (this is the i variable)
Do Until IE.readystate = 4 Or i = 50000
i = i + 1
DoEvents
Loop
i = 0
''This sets the DOM document
Set DOM_DOC = IE.document
''First get a collection of table names
Set EL_COLLECTION = DOM_DOC.GetElementsByTagName("table")
If IsEmpty(EL_COLLECTION) Then ITEM_NOT_FOUND = True: GoTo ERR_EXIT
''Then look for the table classname that matches the one we want (in this case "sh-tbl") and set the childnodes to a new collection
For Each EL In EL_COLLECTION
If EL.ClassName = "sh-tbl" Then
Set CHILD_NODES = EL.ChildNodes
Exit For
End If
Next EL
If IsEmpty(CHILD_NODES) Then ITEM_NOT_FOUND = True: GoTo ERR_EXIT
''Next look for the TBODY element in the childnodes collection and set the childnodes of the TBODY element when found
For Each EL In CHILD_NODES
If Not TypeName(EL) = "DispHTMLDOMTextNode" Then
If EL.tagname = "TBODY" Then
Set TABLE_NODES = EL.ChildNodes
Exit For
End If
End If
Next EL
If IsEmpty(TABLE_NODES) Then ITEM_NOT_FOUND = True: GoTo ERR_EXIT
''Find the TR element and set its childnodes to another collection
For Each EL In TABLE_NODES
If Not TypeName(EL) = "DispHTMLDOMTextNode" Then
If EL.tagname = "TR" Then
Set TABLE_ROW_NODES = EL.ChildNodes
Exit For
End If
End If
Next EL
If IsEmpty(TABLE_ROW_NODES) Then ITEM_NOT_FOUND = True: GoTo ERR_EXIT
''Find the first TD element and get it's childnodes
For Each EL In TABLE_ROW_NODES
If Not TypeName(EL) = "DispHTMLDOMTextNode" Then
If EL.tagname = "TD" Then
Set TABLE_DATA_NODES = EL.ChildNodes
Exit For
End If
End If
Next EL
If IsEmpty(TABLE_DATA_NODES) Then ITEM_NOT_FOUND = True: GoTo ERR_EXIT
''The first DIV element holds the shipping information so when it is found, get the innertext of that element
For Each EL In TABLE_DATA_NODES
If Not TypeName(EL) = "DispHTMLDOMTextNode" Then
If EL.tagname = "DIV" Then
SHIPPING_CHARGES = EL.INNERTEXT
Exit For
End If
End If
Next EL
''Make sure a shipping charge was found
If SHIPPING_CHARGES = vbNullString Then MsgBox "No shipping charges found for item " & ITEM_NUMBER_ARRAY(x): GoTo ERR_EXIT
If IsNumeric(Right(SHIPPING_CHARGES, InStr(SHIPPING_CHARGES, Chr(36)))) Then
SHIPPING_AMOUNT = Right(SHIPPING_CHARGES, InStr(SHIPPING_CHARGES, Chr(36)))
Else
SHIPPING_AMOUNT = SHIPPING_CHARGES
End If
''You may have to change this to fit your spreadsheet
WS.Cells(x + 1, 3).Value = SHIPPING_AMOUNT
ERR_EXIT:
If ITEM_NOT_FOUND = True Then MsgBox "No Page Was Found For Item " & ITEM_NUMBER_ARRAY(x): ITEM_NOT_FOUND = False
Next x
IE.Quit
Set IE = Nothing
End Sub
If you are stuck on using your existing code, you can also try deleting the querytables after the query.
Dim QRY_TABLE As QueryTable
For Each QRY_TABLE In ThisWorkbook.Sheets(1).QueryTables
QRY_TABLE.Delete
Next
This method will not delete the querytable values on your spreadsheet but it will kill the querytable connection. If you have too many of these, it could create a crash.
One final suggestion, if your workbook contains a lot of vlookups then this is probably the true culprit. Good Luck!
You can use xmlHTTP object which will download the data easier and wont make the excel stuck.
Sub xmlHttp()
Dim xmlHttp As Object
Set xmlHttp = CreateObject("MSXML2.ServerXMLHTTP.6.0")
Dim ITEM_NUMBER_ARRAY As Variant
ITEM_NUMBER_ARRAY = Array("290941626676", "130942854921", "400035340501")
For x = 0 To UBound(ITEM_NUMBER_ARRAY)
''Here is your URL
URL = "http://www.ebay.com/itm/" & ITEM_NUMBER_ARRAY(x) & "?ru=http%3A%2F%2Fwww.ebay.com%2Fsch%2Fi.html%3F_from%3DR40%26_sacat%3D0%26_nkw%3D" & ITEM_NUMBER_ARRAY(x) & "%26_rdc%3D1"
xmlHttp.Open "GET", URL, False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Dim html As Object
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.ResponseText
Set objShipping = html.getelementbyid("shippingSection").getElementsbytagname("td")(0)
If Not objShipping Is Nothing Then
Set divShip = objShipping.ChildNodes(1)
Debug.Print divShip.innerHTML
Else
Debug.Print "No Data"
End If
Next
End Sub
Immediate Window (Ctrl + G)
US $2.55
No Data
US $6.50
Related
I have to create several functions that get the status of the supplied cargo number from each different website.
Below is the code user Zwenn helped me with. However, I am not familiar with the RegEx and Replace methods of VBA.
I am trying to simplify this code so I can replicate it for other websites. I understand that each website will need a unique code, but if the base stays the same and I can then modify the exact element needed to be scraped would be ideal.
Function FlightStat_AF(cargoNo As Variant) As String
Const url = "https://www.afklcargo.com/mycargo/api/shipment/detail/057-"
Dim elem As Object
Dim Result As String
Dim askFor As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url & cargoNo, False
.send
Result = .responseText
If .Status = 200 Then
If InStr(1, Result, "faultDescription") = 0 Then
askFor = """metaStatus"""
Else
askFor = """faultDescription"""
End If
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = askFor & ":(.*?),"
Set elem = .Execute(Result)
End With
Result = Replace(elem(0).SubMatches(0), Chr(34), "")
Else
Result = "No cargoID"
End If
End With
FlightStat_AF = Result
End Function
I am trying to create a similar function for the below website.
URL = https://booking.unitedcargo.com/skychain/app?service=page/nwp:Trackshipmt&doc_typ=AWB&awb_pre=016&awb_no=
Sample CargoNo = 60848034
The element to scrape is highlighted in yellow
The following should fetch you the required status as long as it is available.
Sub PrintStatus()
MsgBox GetDeliveryStat("60848034")
End Sub
Function GetDeliveryStat(cargoNo As Variant) As String
Const Url = "https://booking.unitedcargo.com/skychain/app?service=page/nwp:Trackshipmt&doc_typ=AWB&awb_pre=016&awb_no="
Dim dStatCheck$, deliveryStat$, S$
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Url & cargoNo, False
.send
S = .responseText
End With
With CreateObject("HTMLFile")
.write S
On Error Resume Next
dStatCheck = .getElementById("trackShiptablerowInner0").getElementsByTagName("b")(0).innerText
On Error GoTo 0
If dStatCheck <> "" Then
deliveryStat = dStatCheck
Else
deliveryStat = "Not Found"
End If
End With
GetDeliveryStat = deliveryStat
End Function
i wrote my VBA code in excel sheet as below but it is not scrape data for me and also i don't know why please any one help me. it gave me reullt as "click her to read more" onlyi want to scrape enitre data such as first name last name state zip code and so on
Sub extractTablesData()
Dim IE As Object, obj As Object
Dim myState As String
Dim r As Integer, c As Integer, t As Integer
Dim elemCollection As Object
Set IE = CreateObject("InternetExplorer.Application")
myState = InputBox("Enter the city where you wish to work")
With IE
.Visible = True
.navigate ("http://www.funeralhomes.com/go/listing/Search? name=&city=&state=&country=USA&zip=&radius=")
While IE.readyState <> 4
DoEvents
Wend
For Each obj In IE.document.all.item("state").Options
If obj.innerText = myState Then
obj.Selected = True
End If
Next obj
IE.document.getElementsByValue("Search").item.Click
Do While IE.Busy: DoEvents: Loop
ThisWorkbook.Sheets("Sheet1").Range("A1:K1500").ClearContents
Set elemCollection = IE.document.getElementsByTagName("TABLE")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
Next t
End With
Set IE = Nothing
End Sub
Using the same URL as the answer already given you could alternatively select with CSS selectors to get the elements of interest, and use split to get just the names and address parts from the text. We can also do away with the browser altogether to get faster results from first results page.
Business name:
You can get the name with the following selector (using paid listing example):
div.paid-listing .listing-title
This selects (sample view)
Try
Address info:
The associated descriptive information can be retrieved with the selector:
div.paid-listing .address-summary
And then using split we can parse this into just the address information.
Code:
Option Explicit
Public Sub GetTitleAndAddress()
Dim oHtml As HTMLDocument, nodeList1 As Object, nodeList2 As Object, i As Long
Const URL As String = "http://www.funeralhomes.com/go/listing/ShowListing/USA/New%20York/New%20York"
Set oHtml = New HTMLDocument
With CreateObject("WINHTTP.WinHTTPRequest.5.1")
.Open "GET", URL, False
.send
oHtml.body.innerHTML = .responseText
End With
Set nodeList1 = oHtml.querySelectorAll("div.paid-listing .listing-title")
Set nodeList2 = oHtml.querySelectorAll("div.paid-listing .address-summary")
With Worksheets("Sheet3")
.UsedRange.ClearContents
For i = 0 To nodeList1.Length - 1
.Range("A" & i + 1) = nodeList1.Item(i).innerText
.Range("B" & i + 1) = Split(nodeList2.Item(i).innerText, Chr$(10))(0)
Next i
End With
End Sub
Example output:
Yeah, without an API, this can be very tricky at best, and very inconsistent at worst. For now, you can try the script below.
Sub DumpData()
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
URL = "http://www.funeralhomes.com/go/listing/ShowListing/USA/New%20York/New%20York"
'Wait for site to fully load
IE.Navigate2 URL
Do While IE.Busy = True
DoEvents
Loop
RowCount = 1
With Sheets("Sheet1")
.Cells.ClearContents
RowCount = 1
For Each itm In IE.document.all
If itm.classname Like "*free-listing*" Or itm.classname Like "*paid-listing*" Then
.Range("A" & RowCount) = itm.classname
.Range("B" & RowCount) = Left(itm.innertext, 1024)
RowCount = RowCount + 1
End If
Next itm
End With
End Sub
You probably want some kind of input box to capture the city and state and radius from the user, or capture those variable in cells in your worksheet.
Notice, the '%20' is a space character.
I got this idea from a friend of mine, Joel, a long time ago. That guy is great!
First, I want to say that this is my first attempt at building vba code. I am trying to extract data from the web using a web query .Add(Connection,Destination,sql). What I want my code to do is to loop through the string 'str' containing stock tickers to be inserted into my url using a for loop and pasting the table data in the active sheet.
In addition, it would be an extra if I could create a new sheet for every url queried with the corresponding NYSE name.
Currently my code does not run because it is not extracting the data. I think the error is in how I am specifying the url using the loop index NYSE(i).
Thanks for any responses, advice, and suggestions.
Sub URL_Get_Query()
Dim NYSE(1 To 22) As String
NYSE(1) = "APC"
NYSE(2) = "APA"
NYSE(3) = "COG"
NYSE(4) = "CHK"
NYSE(5) = "XEC"
NYSE(6) = "CRK"
NYSE(7) = "CLR"
NYSE(8) = "DNR"
NYSE(9) = "DVN"
NYSE(10) = "ECA"
NYSE(11) = "EOG"
NYSE(12) = "XCO"
NYSE(13) = "MHR"
NYSE(14) = "NFX"
NYSE(15) = "NBL"
NYSE(16) = "PXD"
NYSE(17) = "RRC"
NYSE(18) = "ROSE"
NYSE(19) = "SD"
NYSE(20) = "SWN"
NYSE(21) = "SFY"
NYSE(22) = "WLL"
For i = 1 To 22
Debug.Print NYSE(i)
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://finance.yahoo.com/q/ks?s=NYSE(i)+Key+Statistics", _
Destination:=Range("a1"))
.BackgroundQuery = True
.TablesOnlyFromHTML = True
.Refresh BackgroundQuery:=False
.SaveData = True
End With
Next i
End Sub
See how this works for you:
Dim NYSE_List As String, i As Long
Dim NYSE
NYSE_List = "APC,APA,COG,CHK,XEC,CRK,CLR,DNR,DVN,ECA,EOG,XCO,MHR,NFX,NBL,PXD,RRC,ROSE,SD,SWN,SFY,WLL"
' this is easier to maintain. Split the list at the commas.
' No need to count absolute numbers, either.
NYSE = Split(NYSE_List, ",")
For i = 0 To UBound(NYSE)
Dim ws As Worksheet
' Insert a new worksheet after the last one (each time)
Set ws = Worksheets.Add(after:=Worksheets(Worksheets.Count))
ws.Name = NYSE(i)
Debug.Print NYSE(i)
' assemble the variable into the string:
With ws.QueryTables.Add(Connection:= _
"URL;http://finance.yahoo.com/q/ks?s=" & NYSE(i) & "+Key+Statistics", _
Destination:=ws.Range("a1"))
' note that the range must address the proper worksheet object
.BackgroundQuery = True
.TablesOnlyFromHTML = True
.Refresh BackgroundQuery:=False
.SaveData = True
End With
Next i
Thank you for taking the time to read my request. I have tried using a few answers on this site and I am not getting what I want. (I tried this: Word VBA how to select text between two substrings and assign to variable?)
I am trying to select a number that is ALWAYS between the two same words. It is between "Account No.:" and "IMPORTANT" (yes in all caps, unsure if caps/ no-caps matters for denoting it).
I am creating a macro where I open a word document with say 200 pages. I want to open and save EACH PAGE as it's own pdf with a specific name. I have gotten the code to run where I open and save as PDF. What I want to do, is with in that code, have something that finds the text between "Account No.:" and "IMPORTANT", selects it and copies it. This text is an account number.
Then, when I go to save the file, I want it to paste the account number as the file name. Or have a reference that when it finds the account number it assigns it to a variable. I am new to VBA, so if you can please be descriptive, and put instructions in laymans terms. THANK YOU!
My macro:
Sub CutePDFWriter()
Dim FName, FPath, username, LoanNo As String
Dim wordapp As Word.Application
Dim wordDoc As Word.Document
Dim i As Integer
Dim rngParagraphs As Range
'open doc and export as a pdf
Set wordapp = CreateObject("word.Application")
Set wordDoc = wordapp.Documents.Open("G:\test.doc")
For i = 1 To wordDoc.BuiltinDocumentProperties("Number of Pages")
**Here is where I want to add the “Find and Select” code**
'set variable strings
FPath = "G:\Excel Doc Tests\"
FName = "___**Here is where I want the acct nbr to go_______"** & i & ""
wordDoc.ExportAsFixedFormat FPath & FName & "-escrtax", ExportFormat:=wdExportFormatPDF, Range:=wdExportFromTo, From:=i, To:=i
Next i
'empty word doc objects
wordDoc.Close (False)
wordapp.Quit
End Sub
I added a comment to the question at that link which makes his code work. But I spent time on this: (tested with "blah blah Account No.:123-456IMPORTANT blah blah"):
Option Explicit
Sub Sub1()
Dim i&, FName$ ' I presume
Dim i1&, i2&, s1$, rngDoc As Range
Selection.HomeKey wdStory ' ?
i1 = getPoint("Account No.:", 1) ' get start
i2 = getPoint("IMPORTANT", 2) ' get end
Set rngDoc = ActiveDocument.Range(i1, i2)
s1 = rngDoc.Text
FName = "Prefix" & s1 & "Postfix" & Str$(i)
Stop ' and hover over FName
End Sub
Function getPoint&(sText$, iStart&) ' 1 for start, 2 for end
With Selection.Find
.Text = sText
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = False
.Execute
End With
If iStart = 1 Then
getPoint = Selection.End
Else
getPoint = Selection.Start
End If
End Function
I have this code that I have tweaked below. I use it to scrape other morningstar data, but I can't seem to make it work now for "expected return" for ETFs(Exchange Traded Funds). Everything on the code right now is set up to get the data that I need but I am having a problem getting it on the excel spreadsheet. When I do a msgBox tblTR under the code:
Set tblTR = Doc.getElementsByClassName("pr_text3")(4).innerText
I get the expected value on the message box.
However, when I take the msgbox code out, the value doesn't appear in the excel spreadsheet. I have been trying to work it out for hours now and need HELP!
Below is the entire code. under tab "Tickers2" is where I have all the tickers I would like to pull data. Examples JKE, JKF, JKD...which I have about 1000. under tab "ExpectedReturn" is where I want the data to be displayed. I think it has to do with me pulling elementsbyclassname versus when I used to pull the elementsbytagname. There wasn't in tagnames in the information i needed so I switched it to class name. Below is the entire code.
I will also mention that you have to be signed in to morningstar.com in order to get the actual data, but I am assuming that the forum can point me in the right direction without needing to be signed in.
The website is www.morningstar.com
Sub ExpectedReturn()
Dim IE As Object, Doc As Object, lastRow As Long, tblTR As Object, tblTD As Object, strCode As String
lastRow = Range("A65000").End(xlUp).Row
Set IE = CreateObject("internetexplorer.application")
IE.Visible = True
last_row = Sheets("Tickers2").Range("A1").End(xlDown).Row
ini_row_dest = 1
Sheets("ExpectedReturn").Select
Sheets("ExpectedReturn").Range("A1:H10000").ClearContents
Application.ScreenUpdating = True
For i = 1 To lastRow
Application.StatusBar = "Updating upDown" & i & "/" & last_row
row_dest = ini_row_dest + (i - 1)
strCode = "Tickers2" ' Range("A" & i).value
list_symbol = Sheets("Tickers2").Range("A" & i)
IE.navigate "http://etfs.morningstar.com/quote?t=" & list_symbol
Do While IE.readyState <> 4: DoEvents: Loop
Set Doc = CreateObject("htmlfile")
Set Doc = IE.document
tryAgain:
Set tblTR = Doc.getElementsByClassName("pr_text3")(4).innerText
MsgBox tblTR
If tblTR Is Nothing Then GoTo tryAgain
On Error Resume Next
j = 2
For Each tblTD In tblTR.getElementsByTagName("td")
tdVal = Split(tblTD.innerText, vbCrLf)
Cells(i, j) = tdVal(0)
Cells(i, j + 1) = tdVal(1)
j = j + 2
Next
Sheets("ExpectedReturn").Range("A" & row_dest).Value = list_symbol
Next i
Range("A3").Select
Application.StatusBar = False
Application.Calculation = xlAutomatic
End Sub
Thank you in advance.
-Eddie
By setting
Set tblTR = Doc.getElementsByClassName("pr_text3")(4).innerText
the variable tblTR is a string. You want a dom element, so remove the .innerText
Only then you can loop over its TD-children further down.
This was my fix
tblTR=Doc.ElementsByClassName("pr_text3)(4).innerText
Sheets("ExpectedReturn").Range("B"& row_dest).Value=tblTR