Find and copy numbered paragraphs into individual cells in Excel - excel

I want to copy numbered paragraphs from Word to Excel.
I want to copy below paragraphs into individual cells in Excel
heading
this is paragraph
heading 2
this is paragraph
heading 3
this is paragraph
this is itself a paragraph
I am able to count the number of numbered items via below function but stuck in copy and break till another numbered item.
count_observations = ActiveDocument.Content.ListFormat.CountNumberedItems(Level:=1)

Thanks, I finaly figured it out by researching and searching the other posts. here's the below solution i've come up with.
Sub AutoDatabase()
excel_path = "D:/test.xlm"
Dim obs_range1 As Word.Range
Dim obs_range2 As Word.Range
Dim response_range1 As Word.Range
Dim response_range2 As Word.Range
Dim obs As String
Dim response As String
Dim Z As Integer
Set obs_range1 = ActiveDocument.Range
Set obs_range2 = ActiveDocument.Range
Set response_range1 = ActiveDocument.Range
Set response_range2 = ActiveDocument.Range
x = ActiveDocument.Content.ListFormat _
.CountNumberedItems(Level:=1)
For y = 1 To x
If obs_range1.Find.Execute(FindText:="CopyThis") Then
Set obs_range2 = ActiveDocument.Range(obs_range1.End, ActiveDocument.Range.End)
If obs_range2.Find.Execute(FindText:="CopyThis") Then
obs = ActiveDocument.Range(obs_range1.End, obs_range2.Start).Text
If response_range1.Find.Execute(FindText:="Branch Comments") Then
Set response_range2 = ActiveDocument.Range(response_range1.End, ActiveDocument.Range.End)
If response_range2.Find.Execute(FindText:="CopyThis") Then
response = ActiveDocument.Range(response_range1.End, response_range2.Start).Text
'MsgBox (response)
With ActiveDocument.Tables(1).Rows.Add
With ActiveDocument.Tables(1).Cell(y, 1).Range
.Text = obs
End With
With ActiveDocument.Tables(1).Cell(y, 2).Range
.Text = response
End With
End With
End If
End If
End If
End If
'MsgBox (obs)
Next y
' Remove ^p from table(1)
With ActiveDocument.Tables(1).Range
.Find.Execute FindText:="^p", ReplaceWith:="", Replace:=wdReplaceAll
End With
End Sub

Related

How to pull data as numbers from Word to Excel using VBA?

I'm trying to pull data from tables in a Word document to Excel. I'm able to pull it as text but I don't know how to pull the numbers as numbers.
Sub extractData()
Dim wd As New Word.Application
Dim doc As Word.Document
Dim sh As Worksheet
wd.Visible = True
Set doc = wd.Documents.Open(ActiveWorkbook.Path & "C:\Users\itays\Desktop\TTd.docx")
Set tbl = doc.Tables
Set sh = ActiveSheet
For i = 1 To 17
sh.Cells(i, 1).Value = tbl(5).Rows(i).Cells(1).Range.Text
Next
For i = 1 To 17
sh.Cells(i, 2).Value = tbl(5).Rows(i).Cells(2).Range.Text
Next
Range("a:e").Columns.AutoFit
doc.Close
End Sub
Basically, I need the second For command to pull the data as a number and not as a text.
Word handles text, not numbers. You have to make sure that the text in the second column comes out as a number by converting it to the correct data type. First you have to strip out the text you cannot convert, like linebreaks and table formatting. There are several ways to do this, the following is my example. Trim removes whitespace, Val keeps just the digits, CLng converts it to a Long.
sh.Cells(i, 2).Value = CLng(Val(Trim(tbl(1).Rows(i).Cells(2).Range.Text)))
By the way, the path when you open the Word document looks really weird?
EDIT
You need to clean the data before converting. Adding a Replace-command to change the commas to periods, then convert to a Double instead of a Long to handle the decimal value with CDbl:
sh.Cells(i, 2).Value = CDbl(Val(Trim(Replace(tbl(1).Rows(i).Cells(2).Range.Text, ",", "."))))
Try:
Sub ExtractData()
Application.ScreenUpdating = False
Dim wdApp As New Word.Application, wdDoc As Word.Document, r As Long
Set wdDoc = wdApp.Documents.Open(ActiveWorkbook.Path & "\TTd.docx")
With wdDoc.Tables(5)
For r = 1 To 17
ActiveSheet.Cells(r, 1).Value = Split(.Cell(r, 1).Range.Text, vbCr)(0)
ActiveSheet.Cells(r, 2).Value = Split(.Cell(r, 2).Range.Text, vbCr)(0)
Next
End With
wdDoc.Close False: wdApp.Quit
Range("a:e").Columns.AutoFit
Application.ScreenUpdating = True
End Sub

Extracting string values containing a certain key word from Word doc to Excel with the page number

I am new to VBA and trying to extract some string values that contain a certain key word from a Word document to Excel. For example, there are country codes such as USA.001.01.033592 and I want to extract all the string values look like the country code from the word doc and collect them into a Excel spreadsheet.
The country codes I'm looking at as a format of
USA.xxx.xx.xxxxxx
JPA.xxx.xx.xxxxxx
FRA.xxx.xx.xxxxxx
The Xs represent numbers and the problem is theses codes are in the main body paragraphs, tables within the paragraphs, and the footnotes. Also, when I retrieve the codes, I also want to extract the page number.
Is there any way I can extract the data I want from the main paragraphs, tables, and footnotes at once with their page number?
I have a rough draft of my code but it's not working at all. Could anyone please help?
Here's my code:
Option Explicit
Sub Footnotes()
Dim appExcel As Object
Dim objSheet As Object
Dim aRange As Range
Dim intRowCount As Integer
intRowCount = 1
Set aRange = ActiveDocument.Range
With aRange.Find
Do
.Text = "USA." or "JPA." or "FRA."
.Execute
If .Found Then
aRange.Expand Unit:=wdSentence
aRange.Copy
aRange.Collapse wdCollapseEnd
If objSheet Is Nothing Then
Set appExcel = CreateObject("Excel.Application")
Set objSheet = appExcel.workbooks.Open("C:\Users\Footnotes.xlsx").Sheets("Sheet1")
intRowCount = 1
End If
objSheet.Cells(intRowCount, 1).Select
objSheet.Paste
intRowCount = intRowCount + 1
End If
Loop While .Found
End With
If Not objSheet Is Nothing Then
appExcel.workbooks(1).Close True
appExcel.Quit
Set objSheet = Nothing
Set appExcel = Nothing
End If
Set aRange = Nothing
End Sub
currently, I'm trying to use VBA from the word document but if it's better to start from the excel file please let me know.
As you have a distinct pattern to search for you can use Word's Find with wildcards. There is a good reference for that at the Word MVP website. This will enable Find to return the entire string that you are looking for without needing to extend the found range.
Once you have the found range you can then retrieve the text to pass to Excel and obtain the page number using the Information property.
A Word document consists of a number of parts referred to as StoryRanges. Whilst tables are just part of the range that contains them, footnotes are contained in a separate StoryRange. The code below loops through the StoryRanges and checks which type the current one is. I have done it this way so that you can add other types if you need to.
It wasn't stated in your question what you wanted to do with the page number so you'll need to modify the code below for that.
Sub Footnotes()
Dim appExcel As Excel.Application
Dim objSheet As Excel.Worksheet
Dim findRange As Range
Dim intRowCount As Integer
Dim pageNum As Long
If objSheet Is Nothing Then
Set appExcel = CreateObject("Excel.Application")
Set objSheet = appExcel.workbooks.Open("C:\Users\Footnotes.xlsx").Sheets("Sheet1")
End If
intRowCount = 2
'Set findRange = ActiveDocument.Range
For Each findRange In ActiveDocument.StoryRanges
With findRange.Find
.Text = "[UJF][PRS]A.[0-9]{3}.[0-9]{2}.[0-9]{6}"
.MatchWildcards = True
Do While .Execute = True
pageNum = CLng(findRange.Information(wdActiveEndPageNumber))
objSheet.Cells(intRowCount, 1).Value = findRange.Text
objSheet.Cells(intRowCount, 2).Value = pageNum
intRowCount = intRowCount + 1
findRange.Collapse wdCollapseEnd
Loop
End With
Next findRange
If Not objSheet Is Nothing Then
appExcel.workbooks(1).Close True
appExcel.Quit
Set objSheet = Nothing
Set appExcel = Nothing
End If
Set findRange = Nothing
End Sub
EDIT:
The code above only finds the country codes listed in the question. To find any country code change Find.Text to "[A-Z]{3}.[0-9]{3}.[0-9]{2}.[0-9]{6}"

Word Document - Replace text in Header Table with Mergefields

I have a set of documents (lots) where the header in each has a table in the header with hardcoded address entries. I need to update all these documents to replace these hard-coded address with mergefields.
The code is in an excel spreadsheet where the user selects the folder containing the documents to update. Below is an extract of where the updating is done e.g. trying to replace hardcoded value of 1 Maple Road with {MERGEFIELD Address_Line1}. Not sure where i'm going wrong but message is usually wrong number of arguments or does not work at all
Thanks
Dim doc As Word.Document
Dim hf As Word.HeaderFooter
Dim lr As ListRow
Dim updated As Boolean
Dim tableCount As Integer
Dim t As Integer
Dim c As Cell
Set wd = New Word.Application
Set doc = wd.Documents.Open(Filename:="c:/......./example.docx", ReadOnly:=False)
For Each hf In doc.Sections(1).Headers()
tableCount = hf.Range.Tables.Count
For t = 1 To tableCount
For Each c In hf.Range.Tables(t).Range.Cells
If InStr(1, c.Range.Text, "1 Maple Road") > 0 Then
c.Range.Text = ""
c.Range.Select
doc.Fields.Add Range:=Selection.Range, Type:=wdFieldEmpty, PreserveFormatting:=True, Text:="MERGEFIELD Address_line1"
End If
Next c
Next t
Next hf
doc.Close False
wd.Quit False
Or tried
Dim doc As Word.Document
Dim hf As Word.HeaderFooter
Dim lr As ListRow
Dim updated As Boolean
Dim tableCount As Integer
Dim t As Integer
Dim c As Cell
Set wd = New Word.Application
Set doc = wd.Documents.Open(Filename:="c:/......./example.docx", ReadOnly:=False)
For Each hf In doc.Sections(1).Headers()
tableCount = hf.Range.Tables.Count
For t = 1 To tableCount
For Each c In hf.Range.Tables(t).Range.Cells
If InStr(1, c.Range.Text, "1 Maple Road") > 0 Then
c.Range.Text = ""
c.Range.Select
Selection.Fields.Add Range:=Selection.Range, Type:=wdFieldEmpty, PreserveFormatting:=True
Selection.TypeText Text:="MERGEFIELD Address_Line1"
End If
Next c
Next t
Next hf
doc.Close False
wd.Quit False
Instr is not reliable where tables, fields, and so on are involved. Furthermore, in your code, Selection.Range is referring to an Excel selection! To refer to a Word selection you'd need wd.Selection.Range. In any event there is no need to select anything. Try:
For Each hf In doc.Sections(1).Headers
With hf.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "1 Maple Road"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchWildcards = True
.Execute
End With
If .Find.Found = True Then
.Fields.Add .Range, wdFieldEmpty, "MERGEFIELD Address_line1", False
End If
End With
Next
Apologies making it clearer. In the document header there is a table (on right hand side) with 3 cells. The second on has hardcoded addresses e.g.
1 Maple Road
SomeTown
SomeCity SomePostCode
I need to replace the contents of this cell with mergefields e.g.
MERGEFIELD Address_Line1
MERGEFIELD Address_Line2
MERGEFIELD Address_City MERGEFIELD Address_PostCode
(so long as the hardcoded entries match the specified Road,Town, City and PostCode)
The is a batch job done in excel VBA that targets one folder at a time containing a number of documents to update.
The formatting also needs to be retained
Thanks

Finding heading of chapters in word file and copying individual paragraphs to new word file with VBA

Since nobody was able to help me with the problem I posted here before (link is below), I am now trying to solve the task through VBA.
Finding a heading in word file and copying entire paragraph thereafter to new word file with python
To briefly recap, I have a large amount of word files, which I would like to reduce to a more readable size each. In each file, there is one heading that appears several times, always formated as a 'Heading 2'. I look for this specific heading which occurs several times in a document and I want to copy all the text parts in just these chapters with the respective heading to a new word document.
I decided to create an excel file in which I list the files and the respective heading of the chapters that I want to copy (see picture below).
To do this now I have written the following code:
Sub SelectData()
Application.ScreenUpdating = False
Dim WdApp As Word.Application
Set WdApp = CreateObject("Word.Application")
Dim Doc As Word.Document
Dim NewDoc As Word.Document
Dim HeadingToFind As String
Dim ChapterToFind As String
Dim StartRange As Long
Dim EndRange As Long
Dim WkSht As Worksheet
Dim LRow As Long
Dim i As Long
Set WkSht = ThisWorkbook.Sheets("Sheet1")
LRow = WkSht.UsedRange.Cells.SpecialCells(xlCellTypeLastCell).Row
With WkSht
For i = 1 To LRow
If Dir(.Cells(i, 1).Text, vbNormal) = "" Then
.Cells(i, 3).Value = "Please check File Location"
Else
Set Doc = WdApp.Documents.Open(Filename:=.Cells(i, 1).Text, _
AddToRecentFiles:=False, Visible:=False, ReadOnly:=False)
Set NewDoc = Documents.Add
ChapterToFind = LCase(.Cells(i, 2).Text)
With Doc
Selection.HomeKey Unit:=wdStory
With Selection
With .Find
.ClearFormatting
.Text = ChapterToFind
.MatchWildcards = False
.MatchCase = True
.Execute
End With
If .Find.Found Then
.Collapse wdCollapseStart
With .Find
.Text = ""
.Style = "Heading 2"
.Forward = False
.Execute
End With
.MoveDown Count:=1
.HomeKey Unit:=wdLine
StartRange = .Start
.Find.Forward = True
.Find.Execute
.Collapse wdCollapseStart
.MoveUp Count:=1
.EndKey Unit:=wdLine
EndRange = .End
Doc.Range(StartRange, EndRange).Copy
NewDoc.Content.Paste
NewDoc.SaveAs2 Doc.Path & "Clean" & ".docx", wdFormatFlatXML
Else
WkSht.Cells(i, 4).Value = "Error Chapter Not Found"
End If
End With
End With
WdApp.Quit
Set Doc = Nothing: Set NewDoc = Nothing: Set WdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End If
Next
End With
End Sub
However I am really struggling. It seems to not work as I constantly get an error with the command (RunTimeError 438):
Selection.HomeKey Unit:=wdStory
I am aware that I have to activate the Microsoft Word 15.0 Object Library in the references to be able to get word commands. Nevertheless it is not working.
I would greatly appreciate any help, I am also open to other suggestions of course.
The word files look something like in the picture below, however the chapter that I want to extract can occur several times within one word document. As a result my code would probably need a loop or something, I was not able to get this done.
Also I have considered the following links to the topic:
Use VBA with Powerpoint to Search titles in a Word Doc and Copy Text into another Word Document
VBA: open word from excel
word vba: select text between headings
Did I understand this correctly? The following code is the core of what I think you're trying to do. It finds the first Heading 2, then finds all the paragraphs after it until it finds another header of any type or the end of the document. startCopyRange and endCopyRange is the range of those paragraphs. You'll have to piece this into your Excel routine.
A few notes. Always save the active document to a variable and work from that; the user is then free to change active documents while this routine is running. Never use Selection, always use ranges. Never use relative movements like Move, always use API calls.
Sub SelectData()
Dim Doc As Word.Document
Set Doc = ActiveDocument
Dim findRange As Range
Set findRange = Doc.Range
ChapterToFind = "My Chapter"
findRange.Find.Text = ChapterToFind
findRange.Find.Style = "Heading 2"
findRange.Find.MatchCase = True
Dim startCopyRange As Long
Dim endCopyRange As Long
Do While findRange.Find.Execute() = True
startCopyRange = findRange.End + 1
endCopyRange = -1
'findRange.Select
Dim myParagraph As Paragraph
Set myParagraph = findRange.Paragraphs(1).Next
Do While Not myParagraph Is Nothing
myParagraph.Range.Select 'Debug only
If InStr(myParagraph.Style, "Heading") > 0 Then
endCopyRange = myParagraph.Range.Start - 0
End If
If myParagraph.Next Is Nothing Then
endCopyRange = myParagraph.Range.End - 0
End If
If endCopyRange <> -1 Then
Doc.Range(startCopyRange, endCopyRange).Select 'Debug only
DoEvents
Exit Do
End If
Set myParagraph = myParagraph.Next
DoEvents
Loop
Loop
End Sub

Word Macro To Delete Pages With Specific Text

I'm trying to delete any page that contains a specific text such as the sentence below (strSearch =). But I get a 5904 Error when I try running my macro... Any clue?
Sub DeletePages()
Dim strSearch As String
Dim rgeStart As Range
Set rgeStart = Selection.Range
strSearch = "Report the content of the ""StatusBar"" status bar message to the results."
With ActiveDocument.Range.Find
.Text = strSearch
Do While .Execute
With .Parent
.Select With Selection
.Bookmarks("\Page").Range.Delete
End With
End With
Loop
End With
rgeStart.Select
Application.Browser.Target = wdBrowsePage
End Sub
I get a syntax error on .Select With Selection, and there is no intellisense with this line...it seems that objects are not able to access properties and methods because with statements are embedded in one another.
The following worked for me...I had to delete the last page differently
Sub DeletePages()
'source1 http://stackoverflow.com/questions/13465709/repeating-microsoft-word-vba-until-no-search-results-found
'source2 https://msdn.microsoft.com/en-us/library/bb208876(v=office.12).aspx
Dim strSearch As String
strSearch = "GoodBye"
With Selection.Find
.Forward = True
.Wrap = wdFindStop
.Text = strSearch
.Execute
End With
Do While Selection.Find.Found = True And iCount < 1000
iCount = iCount + 1
Selection.HomeKey Unit:=wdStory
Selection.Find.Execute
If Selection.Find.Found Then
'Get Current page
CurPage = Selection.Information(wdActiveEndAdjustedPageNumber)
'Check if current page is the last page
If CurPage = FindLastPage Then
ActiveDocument.Range(Selection.Start, ActiveDocument.Range.End).Delete
Else
ActiveDocument.Bookmarks("\Page").Range.Delete
End If
End If
Loop
End Sub
Function FindLastPage()
'Source https://support.microsoft.com/en-us/kb/293861
'Iterate each section in the document to retrieve the end page of the
'document and compute the page count in that section. The results are
'displayed in the Immediate window.
Dim oSec As Object
Dim nStartPg As Integer, nEndPg As Integer, nSecPages As Integer
Dim NumSections As Integer
NumSections = ActiveDocument.Sections.Count
nStartPg = 1
For Each oSec In ActiveDocument.Sections
nEndPg = oSec.Range.Information(3) - 1 'wdActiveEndPageNumber=3
'Account for the last page.
If oSec.Index = NumSections Then nEndPg = nEndPg + 1
nSecPages = nEndPg - nStartPg + 1
FindLastPage = nSecPages
Next
End Function

Resources