I recorded a macro and tried to adapt it using a for loop with the different links I want to scrape data from.
The problem is, that VBA doesn't recognize my variable as a link. When I type in a link directly in the code, it works. I do not only need data from one link, but from 500.
Here is my code fragment:
Dim Link As String
Link = "https://coinmarketcap.com/currencies/bitcoin/historical-data/"
For i = 1 To 5
Link = Cells(i, 1)
ActiveWorkbook.Queries.Add Name:="Table 0 (3)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Quelle = Web.Page(Web.Contents(""https://coinmarketcap.com/currencies/ontology/historical-data/""))," & Chr(13) & "" & Chr(10) & " Data0 = Quelle{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Geänderter Typ"" = Table.TransformColumnTypes(Data0,{{""Date"", type date}, {""Open*"", type number}, {""High"", type number}, {""Low"", type number}, {""Close**"", type number}, {""Volume"", type number}, {""Market Cap" & _
""", type number}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Geänderter Typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0 (3)"";Extended Properties=""""" _
, Destination:=Range("$D$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 0 (3)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_0__3"
.Refresh BackgroundQuery:=False
End With
Next
As soon as I change the link (""https://coinmarketcap.comblabla"") for the variable "link", I get an application or object defined error. When I dig deeper and click on the array, Excel tells me that the Import "link" is not connected to an export.
You can get the main historic data table and the info above with the code below. It is a little tricky and somewhat fragile as a lot of this relies on the current page styling, which can change. The historic data bit, which is an actual table, is a more robust.
You can loop using new URLs picked from cells, for example, and simply have a Sheets.Add line in at the start of each loop so you have a new Activesheet to write data to.
Below, should be enough to get you started depending on your requirements.
I get the top bit:
using
.Cells(1, 1) = IE.document.querySelector(".col-xs-6.col-sm-8.col-md-4.text-left").innerText. This is not very robust. The document's styling could be changed. However, it is not an easy part of the page to access and obtaining it will likely be vulnerable which ever method you choose currently. I am using the element's classname (".") to retrieve the information using the .querySelector method of document to apply the CSS selector .col-xs-6.col-sm-8.col-md-4.text-left. That is the same as .getElementsByClassName(0).
I get the middle bit:
With
Set aNodeList = IE.document.querySelectorAll("[class*='coin-summary'] div")
This uses the CSS selector [class*='coin-summary'] div , which are the div tags within elements' with a className containing the string 'coin-summary'.
That CSS selector returns a list so the .querySelectorAll method is used to return a nodeLIst which is then traversed.
I get the end historic data (which is an actual table), using the table tag:
Set hTable = .document.getElementsByTagName("table")(0)
I then traverse the rows, and cells within rows, of the table.
VBA:
Option Explicit
Public Sub GetInfo()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "https://coinmarketcap.com/currencies/bitcoin/historical-data/"
While .Busy Or .readyState < 4: DoEvents: Wend '<== Loop until loaded
Dim hTable As HTMLTable
Set hTable = .document.getElementsByTagName("table")(0)
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, hBody As Object
Dim headers(), headers2()
headers = Array("Date", "Open*", "High", "Low", "Close**", "volume", "Market Cap")
headers2 = Array("Market Cap", "Volume (24h)", "Circulating Supply", "Max Supply")
With ActiveSheet
.Cells.ClearContents
.Cells(1, 1) = IE.document.querySelector(".col-xs-6.col-sm-8.col-md-4.text-left").innerText
Dim aNodeList As Object, i As Long, resumeRow As Long
Set aNodeList = IE.document.querySelectorAll("[class*='coin-summary'] div")
resumeRow = .Cells(.Rows.Count, "A").End(xlUp).Row + 2
.Range("A" & resumeRow).Resize(1, UBound(headers2) + 1) = headers2
For i = 0 To aNodeList.Length - 1
.Cells(resumeRow + 1, i + 1) = aNodeList.item(i).innerText
Next i
r = .Cells(.Rows.Count, "A").End(xlUp).Row + 2
.Cells(r, 1).Resize(1, UBound(headers) + 1) = headers
Set hBody = hTable.getElementsByTagName("tbody")
For Each tSection In hBody 'HTMLTableSection
Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell 'DispHTMLElementCollection
.Cells(r, c).Value = td.innerText 'HTMLTableCell
c = c + 1
Next td
Next tr
Next tSection
End With
'Quit '<== Remember to quit application
Application.ScreenUpdating = True
End With
End Sub
Output in sheet (sample):
Some example data from page:
This will get the data from that table.
Option Explicit
Sub Web_Table_Option_One()
Dim xml As Object
Dim html As Object
Dim objTable As Object
Dim result As String
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Set xml = CreateObject("MSXML2.XMLHTTP.6.0")
With xml
.Open "GET", "https://coinmarketcap.com/currencies/bitcoin/historical-data/", False
.send
End With
result = xml.responseText
Set html = CreateObject("htmlfile")
html.body.innerHTML = result
Set objTable = html.getElementsByTagName("Table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End Sub
You can certainly loop through an array of URLs, and iterate through each one. Where are these 500 URLs? If they are not the same as the one you provided, you may have your work cut out for you. Normally, all web sites are very different , and screen scraping is a highly customized process.
Related
I have written a quite complex VBA script to run on a Word document to
transform it into an email body (together with a bunch of other documents),
combine it with other documents and save as pdf-attachment,
open email-distribution lists
create outlook items and put the distribution lists into a bcc field and body from earlier document
The script worked quite fine until it stopped for unclear to me reason. A "Run-time error '1001': Method 'Range' of object '_Global' failed." started to occur in the "Step 3 ...", specifically in the second line:
objLista.Worksheets(1).Activate
last_row = Range("A1").End(xlDown).Row
The script activates the worksheet just fine, but fails to do anything else with it. I tried to use explicit names of the workbook and worksheet in question, but it didn't help. The same lines in different yet similar script still work well. So I find it hard to find the source of the problem and correct it. I use MS Windows 10, and Office 365.
The whole script below:
Sub script()
' >>>>>>>>>>>>>>>>>>>>>>> Step 0. Declaration of variables and paths <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Dim Disclaimer_Path As String
Dim Email_Path As String
Dim objOutlook As Object
Dim objMail As Object
Dim ExcelApp As Excel.Application
Dim objLista As Workbook
Dim Today As String
Today = Format(Date, "yyyymmdd")
Disclaimer_Path = ...
Email_Path = "!_email.docx"
Distribution_list_Path = "!_List.xlsm"
Distribution_list_Path = "!_List_2.xlsm"
Pdf_Path = ...
Set objOutlook = CreateObject("Outlook.Application")
Set ExcelApp = New Excel.Application
' >>>>>>>>>>>>>>>>>>>>>>> Step 1. Creating e-mail body <<<<<<<<<<<<<<<<<<<<<<<<<<<
' First creating an email-body
Documents.Open FileName:=ActiveDocument.Path & "\1_News.docx"
Documents.Open FileName:=ActiveDocument.Path & "\2_Essay.docx"
Documents.Open FileName:=ActiveDocument.Path & "\3_Comment.docx"
Documents.Open FileName:=Disclaimer_Path
Documents.Open FileName:=Email_Path
Documents("1_News.docx").Activate
Selection.WholeStory
Selection.Copy
Documents("!_Email.docx").Activate
With Selection
.MoveDown
.MoveDown
.PasteAndFormat wdPasteDefault
End With
Documents("2_Essay.docx").Activate
Selection.WholeStory
Selection.Copy
Documents("!_Email.docx").Activate
Selection.PasteAndFormat wdPasteDefault
Documents("3_Comment.docx").Activate
Selection.WholeStory
Selection.Copy
Documents("!_Email.docx").Activate
With Selection
.PasteAndFormat wdPasteDefault
.WholeStory
End With
' Cleaning
Documents("2_Essay.docx").Close SaveChanges:=wdDoNotSaveChanges
Documents("3_Comment.docx").Close SaveChanges:=wdDoNotSaveChanges
Documents("Disclaimer.docx").Close SaveChanges:=wdDoNotSaveChanges
Stop
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>> Step 2. Creation of pdf <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Documents("1_News.docx").Activate
Selection.WholeStory
Selection.Copy
ThisDocument.Activate
With Selection
'.Range.Text = vbNewLine
.MoveDown
.PasteAndFormat wdPasteDefault
.Range.Text = vbNewLine
.MoveDown
End With
ActiveDocument.ActiveWindow.View.Type = wdMasterView
With ActiveDocument.Subdocuments
.AddFromFile Name:=ActiveDocument.Path & "\2_Essay.docx"
.AddFromFile Name:=ActiveDocument.Path & "\3_Comment.docx"
.AddFromFile Name:=ActiveDocument.Path & "\4_Preview.docx"
.AddFromFile Name:=ActiveDocument.Path & "\5_Comment_2.docx"
.AddFromFile Name:=ActiveDocument.Path & "\8_Calendar.docx"
.AddFromFile Name:=Disclaimer_Path
End With
'Returns to standard view
ActiveDocument.ActiveWindow.View.Type = wdPrintView
Selection.HomeKey Unit:=wdStory
Stop
ActiveDocument.ExportAsFixedFormat Pdf_Path, wdExportFormatPDF
' >>>>>>>>>>>>>>>>>>>>>>>> Step 3. Creating Distribution lists <<<<<<<<<<<<<<<<<<<<<<<<<<<<
Set objLista = ExcelApp.Workbooks.Open(Distribution_list_Path)
ExcelApp.Visible = True
Dim lista_1 As String, lista_2 As String, lista_3 As String, lista_4 As String, lista_5 As String, lista_6 As String, lista_7 As String, lista_8 As String, lista_9 As String
lista_1 = ""
lista_2 = ""
lista_3 = ""
lista_4 = ""
lista_5 = ""
lista_6 = ""
lista_7 = ""
lista_8 = ""
lista_9 = ""
objLista.Worksheets(1).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_1 = lista_1 & "; " & Cells(i, 1).Value
Next i
objLista.Worksheets(2).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_2 = lista_2 & "; " & Cells(i, 1).Value
Next i
objLista.Worksheets(3).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_3 = lista_3 & "; " & Cells(i, 1).Value
Next i
objLista.Worksheets(4).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_4 = lista_4 & "; " & Cells(i, 1).Value
Next i
objLista.Worksheets(5).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_5 = lista_5 & "; " & Cells(i, 1).Value
Next i
objLista.Worksheets(6).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_6 = lista_6 & "; " & Cells(i, 1).Value
Next i
' Now differentr set of lists
Set objLista_2 = ExcelApp.Workbooks.Open(Distribution_list_Path_2)
objLista_2.Worksheets(1).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_7 = lista_7 & "; " & Cells(i, 1).Value
Next i
objLista_2.Worksheets(2).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_8 = lista_8 & "; " & Cells(i, 1).Value
Next i
objLista_2.Worksheets(3).Activate
last_row = Range("A1").End(xlDown).Row
For i = 1 To last_row
lista_9 = lista_9 & "; " & Cells(i, 1).Value
Next i
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>> Step 4. Creates e-mails <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Documents("!_Email.docx").Activate
Selection.Copy
For Each Item In Array(lista_1, lista_2, lista_3, lista_4, lista_5, lista_6, lista_7, lista_8, lista_9)
With objOutlook.CreateItem(0)
oAccount = ""
.bcc = Item
.Subject = "Weekly"
.Attachments.add Pdf_Path
Set Editor = .GetInspector.WordEditor
Editor.Content.Paste
.Display
End With
Next Item
'Cleaning
Documents("1_News.docx").Close SaveChanges:=wdDoNotSaveChanges
Documents("!_email.docx").Close SaveChanges:=wdDoNotSaveChanges
Workbooks("!_List.xlsm").Worksheets("List_1").Activate
Range("A1").Select 'To żeby uniknąć jednego z błędów, który się wcześniej wywalał, gdy arkusz był zamykany z kursorem w innym miejscu niż "A1"
Workbooks("!_List.xlsm").Close SaveChanges:=wdDoNotSaveChanges
Workbooks("!_List_2.xlsm").Worksheets(1).Activate
Range("A1").Select 'To żeby uniknąć jednego z błędów, który się wcześniej wywalał, gdy arkusz był zamykany z kursorem w innym miejscu niż "A1"
Workbooks("!_List_2.xlsm").Close SaveChanges:=wdDoNotSaveChanges
End Sub
So I'm doing this project where I have to download historical stock data from yahoo finance. Got this code. It's working fine, BUT it only downloads max 100 rows. I tried to scan the web for answers or a different code (this one is just recorded macro from excel) but I saw a few tutorials on YouTube that use his solution and it's just fine.
.. I don't understand it then
Sub Makro6()
' Dowload stock prices from Yahoo Finance based on input
Dim ws As Worksheet
Set ws = Sheets("Data")
'clear previous queries
For Each qr In ThisWorkbook.Queries
qr.Delete
Next qr
'clear Data sheet
ws.Select
Cells.Clear
'clear graphs
'ws.ChartObjects.Delete
'stock = Sheets("Main").Range("A2")
StartDate = toUnix(Sheets("Main").Range("A4"))
EndDate = toUnix(Sheets("Main").Range("A6"))
Application.CutCopyMode = False
ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & " Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
"t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Změněný typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2 (3)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2_3"
.Refresh BackgroundQuery:=False
End With
Sheets("Data").Select
'' Sort data by date from oldest to newest
ws.ListObjects("Table_2_3").Sort.SortFields. _
Clear
ws.ListObjects("Table_2_3").Sort.SortFields. _
Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
DataOption:=xlSortNormal
With ws.ListObjects("Table_2_3").Sort
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Call DeleteDividends
Call Stochastics
End Sub
The code works for other websites. I tried to download Wikipedia page list of total 120 and it loaded data no problem.
The problem is the data from Yahoo finance website is a project requirement
If you check against the page you will discover only 100 results are initially present within the HTMLTable rows (tbody to be precise).
Enter the css selector [data-test="historical-prices"] tbody tr in the browser elements tab search box (F12 to open dev tools)and you will see this:
The rest of the rows are fed dynamically from a data store as you scroll down the page. Of course, your current method doesn't pick up on these. You can in fact issue an xhr request, regex out the appropriate javascript object housing all the rows, and parse with a json parser.
Here is roughly what you should currently see in response:
I use jsonconverter.bas as my json parser. Download raw code from here and add to standard module called jsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.
#TimWilliams wrote a better unix conversion function here but I thought I would have a play at writing something different. I would advise you to stick with his as it is safer and faster.
VBA:
Option Explicit
Public Sub GetYahooData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
Dim startDate As String, endDate As String, stock As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
stock = "AAPL"
startDate = "1534809600"
endDate = "1566345600"
With xhr
.Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
s = GetJsonString(re, s)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)
WriteOutResults ws, json
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long
headers = json.item(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 1
For Each key In item.keys
results(r, c) = item(key)
c = c + 1
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
If .test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Public Function GetCurrentUnix(ByVal t As Double) As String
With CreateObject("htmlfile")
.parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
End With
End Function
Regex:
Python:
I initially wrote as python if of interest:
import requests, re, json
from bs4 import BeautifulSoup as bs
p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])
I want to copy data from xml file to excel workbook
Every xml has got tag "Client name" but some of them also have got tag "Billing client name"
I want to achieve this with my code
If in xml tag "Billing client name" exists use it and ignore tag "Client name"
if in xml there is no tag "Billing client name" use data from tag "Client name"
My current code only paste data from tag "Client name"
My code:
Dim mainWorkBook As Workbook
Set mainWorkBook = ActiveWorkbook
mainWorkBook.Sheets("Sheet3").Activate
LR = Cells(Rows.Count, "A").End(xlUp).Row
'MsgBox LR
For x = 2 To LR
Set xmlDoc = CreateObject("Microsoft.XMLDOM")
xmlDoc.SetProperty "SelectionLanguage", "XPath"
xmlDoc.Async = False
xmlDoc.Load ("\\path\" & Range("A" & x) & ".xml")
Set nodeXML = xmlDoc.getElementsByTagName("BILL_TO_CLIENTCODE")
Set nodeXML2 = xmlDoc.getElementsByTagName("CLIENTCODE")
If nodeXML Is Nothing Then
Range("B" & x).Value = nodeXML(i).Text
Else
Range("B" & x).Value = nodeXML2(i).Text
End If
Next x
If I change last lines of code to:
If nodeXML Is Nothing Then
Range("B" & x).Value = nodeXML2(i).Text
Else
Range("B" & x).Value = nodeXML(i).Text
End If
I only get value from tag "Billing client name" and no value if this tag doesn't exists
sample xml
<InvoiceData xmlns="http://tempuri.org/InvoiceData.xsd">
<INVOICE_HEADER>
<BILL_TO_CLIENTCODE/>
<CLIENTCODE>61138259</CLIENTCODE>
second one has got value in tag Bill to clientcode and in clientcode but Bill to clientcode should be primary.
xmlDoc.getElementsByTagName always return a collection, but that collection may be empty if there are no matches, so you need to test for (eg)
If nodeXML.Length > 0 Then
and not check whether nodeXML is Nothing
So something like this should work (untested):
Sub Tester()
Dim mainWorkBook As Workbook, xmlDoc As Object
Dim sht As Worksheet
Set mainWorkBook = ActiveWorkbook
Set sht = mainWorkBook.Sheets("Sheet3")
For x = 2 To sht.Cells(sht.Rows.Count, "A").End(xlUp).Row
Set xmlDoc = CreateObject("Microsoft.XMLDOM")
xmlDoc.SetProperty "SelectionLanguage", "XPath"
xmlDoc.Async = False
xmlDoc.Load ("\\path\" & Range("A" & x) & ".xml")
sht.Range("B" & x).Value = PreferredValue(xmlDoc, _
Array("BILL_TO_CLIENTCODE", "CLIENTCODE"))
Next x
End Sub
Function PreferredValue(doc As Object, arrTags)
Dim t, col, rv
For Each t In arrTags
Set col = doc.getElementsByTagName(t)
If col.Length > 0 Then
rv = col(0).Text
If Len(rv) > 0 Then Exit For '<<<< EDIT
End If
Next t
PreferredValue = rv
End Function
I am using this code for my work but I got a problem, when I replace number it doesn't copy format. For example: In Excel 999.999 when replace in Word 999999. Could someone help me solving this ?
Sub test22()
Dim num_of_row As Long
Dim num_of_column As Long
Dim i As Long, j As Long
Dim template As Object
Dim t As Object
Dim name As String
num_of_column = 20
num_of_row = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row - 1
With CreateObject("word.application") ' late binding
.Visible = False
For i = 1 To num_of_row
Set template = .documents.Open("D:\zzz LINH TINH\02. Test VBA\01. LRAMP\Template_KHQLMT.docx")
Set t = template.Content
For j = 1 To num_of_column
t.Find.Execute _
FindText:=ActiveSheet.Cells(1, j).Value, _
ReplaceWith:=ActiveSheet.Cells(i + 1, j).Value, _
Replace:=wdReplaceAll
Next
template.SaveAs Filename:=ThisWorkbook.Path & Application.PathSeparator & i & "-KHQLMT " & ".docx"
Next
.Quit
End With
Set t = Nothing
Set template = Nothing
End Sub
I decided to learn VBA two weeks ago, and it's gone rather smooth. Now, however, I've encountered a problem I can't seem to solve on my own.
I've set up an excel document containing various modules. One of these modules extracts comments from a word document over to the excel sheet - which works as intended.
The problem is, I haven't been able to extract the first numbered header above each comment, which I'd very much like. Currently I have to do this manually after extracting the comments. As an example, I would like to also extract the first header and number above each comment, such as '2.1.1 Title'. If the comment is highlighting the header itself, it should be that header which is extracted as well.
I've tried a variety of things based on what I could find online, but every time I'm met with a variety of bugs I can't seem to fix. I've yet to find something that even sorta works. I did try one method which apparently should work in Word VBA, but I couldn't get it working within Excel.
Does anyone know how I would go about extracting the numbered headers? Any hints or tips will be greatly appreciated.
This is the code I have for the module:
Sub ImportCommentsDOCX()
Dim wdDoc As Object
Dim wdFileName As Variant
Dim i As Integer
wdFileName = Application.GetOpenFilename("Word files (*.docx),*.docx", , _
"Browse for file to be imported")
If wdFileName = False Then Exit Sub '(user cancelled import file browser)
Set wdDoc = GetObject(wdFileName) 'open Word file
'1: if no comments'
With wdDoc
If wdDoc.Comments.Count = 0 Then
MsgBox ("No comments")
End If
'2; Set excel headers'
Range("B" & 1).Value = "Number"
Range("B" & 1).Font.Bold = True
Range("C" & 1).Value = "Comment"
Range("C" & 1).Font.Bold = True
Range("D" & 1).Value = "Highlighted text"
Range("D" & 1).Font.Bold = True
Range("E" & 1).Value = "Initials"
Range("B" & 1).Font.Bold = True
Range("F" & 1).Value = "Date (*Imprecise)"
Range("F" & 1).Font.Bold = True
'3: Extract comments and meta data'
For i = 1 To wdDoc.Comments.Count
Range("B" & 1 + i).Value = wdDoc.Comments(i).Index
Range("C" & 1 + i).Value = wdDoc.Comments(i).Range
Range("D" & 1 + i).Value = wdDoc.Comments(i).Scope.FormattedText
Range("E" & 1 + i).Value = wdDoc.Comments(i).Initial
Range("F" & 1 + i).Value = Format(wdDoc.Comments(i).Date, "dd/MM/yyyy") 'Unreliable: Sometimes gives wrong date'
'Range("G" & 3 + i).Value = wdDoc.Comments(i).Range.ListFormat.ListString 'Returns empty'
Next i
End With
Set wdDoc = Nothing
MsgBox ("Extraction has completed")
End Sub
Here is your code with some adjustments:
Sub ImportCommentsDOCX()
Dim wdDoc As Object
Dim wdFileName As Variant
Dim i As Integer
wdFileName = Application.GetOpenFilename("Word files (*.docx),*.docx", , _
"Browse for file to be imported")
If wdFileName = False Then Exit Sub '(user cancelled import file browser)
Set wdDoc = GetObject(wdFileName) 'open Word file
'1: if no comments'
With wdDoc
wdDoc.Activate ' Added
If wdDoc.Comments.Count = 0 Then
MsgBox ("No comments")
End If
'2; Set excel headers'
Range("B" & 1).Value = "Number"
Range("B" & 1).Font.Bold = True
Range("C" & 1).Value = "Comment"
Range("C" & 1).Font.Bold = True
Range("D" & 1).Value = "Highlighted text"
Range("D" & 1).Font.Bold = True
Range("E" & 1).Value = "Initials"
Range("E" & 1).Font.Bold = True ' Modified
Range("F" & 1).Value = "Date (*Imprecise)"
Range("F" & 1).Font.Bold = True
'3: Extract comments and meta data'
For i = 1 To wdDoc.Comments.Count
Range("B" & 1 + i).Value = wdDoc.Comments(i).Index
Range("C" & 1 + i).Value = wdDoc.Comments(i).Range
Range("D" & 1 + i).Value = wdDoc.Comments(i).Scope.FormattedText
Range("E" & 1 + i).Value = wdDoc.Comments(i).Initial
Range("F" & 1 + i).Value = Format(wdDoc.Comments(i).Date, "dd/MM/yyyy") 'Unreliable: Sometimes gives wrong date'
'Range("G" & 1 + i).Value = wdDoc.Comments(i).Scope.ListFormat.ListString 'Returns empty' ' Modified ' Updated
Dim wp As Word.Paragraph: Set wp = wdDoc.Comments(i).Scope.Paragraphs(1) ' Updated
Do While wp.Range.ListFormat.ListString = "" ' Updated
Set wp = wp.Previous ' Updated
Loop ' Updated
Range("G" & 1 + i).Value = wp.Range.ListFormat.ListString ' Updated
Next i
End With
Set wdDoc = Nothing
MsgBox ("Extraction has completed")
End Sub
Please note my comments: Added and Modified
wdDoc.Activate was required at least on my computer, otherwise the
Range property is empty.
After initials a wrong column was bolded
The original text is referred to by the Range property, not the Scope (which is the content of the comment), so its ListFormat property should be used
The row index was not correct (3 instead of 1)
Looks working for me:
This requires Microsoft VBScript Regular Expression 5.5
Sub commentaires()
Dim regexOne As Object
Set regexOne = New RegExp
regexOne.Pattern = "^\d+\."
Dim s As String, s1 As String
Dim cmt As Word.Comment
Dim doc As Word.Document
For Each cmt In ActiveDocument.Comments
Dim wp As Word.Paragraph
Set wp = cmt.Scope.Paragraphs(1) ' Updated
Do While Not regexOne.Test(wp.Range.ListFormat.ListString)
Set wp = wp.Previous ' Updated
Loop ' Updated
s = s & _
wp.Range.ListFormat.ListString & ";" & _
cmt.Reference.Information(wdActiveEndAdjustedPageNumber) & ";""" & _
cmt.Scope & """;""" & _
cmt.Range.Text & """ " & vbCr
Next
Dim f As Integer
f = FreeFile
Open "c:\comments.csv" For Output As #f
Print #f, s
Close #f
End Sub