How to finda text and get the page no. for acrobat using vba - excel

I want to find the text and get the page number of text found in acrobat using VBA, I am able to find the text but not able to get the page number. for that
Sub Main()
Dim acrApp, acrAVDoc
Set acrApp = CreateObject("AcroExch.app")
Set acrAVDoc = CreateObject("AcroExch.AVDoc")
acrApp.Show
If acrAVDoc.Open("FileName", "") Then
Ok = acrAVDoc.FindText("Text to search", 0, 1, 1)
MsgBox (Ok)
End If
Set acrAVDoc = Nothing
Set acrApp = Nothing
End Sub
I am not able to set the object for
Set acrPDDoc = CreateObject("Acrobat.AV_PAGE_VIEW")

I know this is an old question, but it was one of the top search results when I was looking for the same info. I never found anything that truly met my needs so I made something up by combining several different resources.
The function below is acceptably fast, even on very large documents. It searches page by page, not word by word, so it will find multi-word matches and words with dashes (case insensitive). It returns the matches for all pages separated by commas.
Hope this is helpful to someone in the future.
Sub Demo()
Dim SearchResult As String
SearchResult = AdobePdfSearch("my search string", "C:\Demo\Demo.pdf")
MsgBox SearchResult
End Sub
Function AdobePdfSearch(SearchString As String, strFileName As String) As String
'Note: A Reference to the Adobe Library must be set in Tools|References!
'Note! This only works with Acrobat Pro installed on your PC, will not work with Reader
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList, AcroTextSelect As CAcroPDTextSelect
Dim PageNumber, PageContent, Content, i, j, iNumPages
Dim strResult As String
Set AcroApp = CreateObject("AcroExch.App")
Set AcroAVDoc = CreateObject("AcroExch.AVDoc")
If AcroAVDoc.Open(strFileName, vbNull) <> True Then Exit Function
Set AcroPDDoc = AcroAVDoc.GetPDDoc
iNumPages = AcroPDDoc.GetNumPages
For i = 0 To iNumPages - 1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
Content = Content & AcroTextSelect.GetText(j)
Next j
If InStr(1, LCase(Content), LCase(SearchString)) > 0 Then
strResult = IIf(strResult = "", i + 1, strResult & "," & i + 1)
End If
Content = ""
Next i
AdobePdfSearch = strResult
'Uncomment the lines below if you want to close the PDF when done.
'AcroAVDoc.Close True
'AcroApp.Exit
'Set AcroAVDoc = Nothing: Set AcroApp = Nothing
End Function

sub checks each page of pdf, word by word
Sub FindtextandPageNumber()
Dim FindWord 'Word you want to search
Dim acroAppObj As Object
Dim PDFDocObj As Object
Dim myPDFPageHiliteObj As Object
Dim iword As Integer, iTotalWords As Integer
Dim numOfPage As Integer, Nthpage As Integer
Dim word As String, sPath As String
Set acroAppObj = CreateObject("AcroExch.App")
Set PDFDocObj = CreateObject("AcroExch.PDDoc")
Set myPDFPageHiliteObj = CreateObject("AcroExch.HiliteList")
Check3 = myPDFPageHiliteObj.Add(0, 32767)
FindWord = "Hello"
acroAppObj.Show
sPath = "Test.pdf" 'Path of pdf where you want to search
PDFDocObj.Open (sPath)
numOfPage = PDFDocObj.GetNumPages
word = vbNullString
Set PDFJScriptObj = Nothing
For Nthpage = 0 To numOfPage - 1
Set pAcroPDPage = PDFDocObj.AcquirePage(Nthpage)
Set wordHilite = pAcroPDPage.CreateWordHilite(myPDFPageHiliteObj)
Set PDFJScriptObj = PDFDocObj.GetJSObject
iTotalWords = wordHilite.GetNumText
iTotalWords = PDFJScriptObj.getPageNumWords(Nthpage)
''check the each word
For iword = 0 To iTotalWords - 1
word = Trim(CStr(PDFJScriptObj.getPageNthWord(Nthpage, iword)))
If word <> "" Then
If word = FindWord Then
PageNumber = Nthpage
msgbox PageNumber
End If
word = ""
End If
Next iword
Next Nthpage
End Sub

Related

Copy and paste picture from excel to word

I am trying to try another method that is not to export the images from excel and then import them to word.
This method makes use of copy and paste, however I have encountered a problem using different versions of Office. In some it pastes it as InlineShape and in another as Shape.
I don't know how to correctly reference a variable in the pasted image. I thought I could use something like set object = selection after pasting the image but it doesn't work.
The purpose of referencing it is to add a text that allows me to delete it if I insert an update of the same image.
For the inlineshape I have solved it using the InlineShape.Range.BookmarkID property but if it is a Shape object I don't know the way.
Could anyone help me?
Code:
Sub Copy_Paste_Image_Bookmark(sBookmark As String, sImage As String, Optional sSheet As String, Optional sWorkbook As String)
Dim xlApp As Excel.Application, xlWrk As Excel.Workbook, xlSht As Excel.Worksheet
Dim oShp As Excel.Shape
Set xlApp = GetObject(, "Excel.Application")
Set xlWrk = xlApp.Workbooks(sWorkbook)
Set xlSht = xlWrk.Worksheets(sSheet)
xlSht.Shapes(sImage).Copy
'Control for word
Dim docWord As Word.Document
Dim oBookmark As Bookmark, rBookmark As Word.Range, oInLiShp As Word.InlineShape
Dim lInLiShapes As Long, idx As Long, lInLiShapes_old As Long
Dim lShapes As Long, lShapes_old As Long, bIsInlineShape As Boolean, bIsShape As Boolean
Dim oShape As Word.Shape, oShapes As Word.Shapes
Set docWord = ThisDocument
'If exists bookmark
If docWord.Bookmarks.Exists(sBookmark) Then
Set oBookmark = docWord.Bookmarks(sBookmark)
Set rBookmark = oBookmark.Range
'Delete previous text
'rBookmark.MoveEndUntil Chr(46), wdForward 'chr(12) jump page
rBookmark.Expand Unit:=wdParagraph
rBookmark.MoveEnd Unit:=wdCharacter, Count:=-1
If StrComp(rBookmark.Text, "Text test") = 0 Then rBookmark.Delete
'Delete previous image
idx = GetIndex_Inlishape_BookmarkID(oBookmark.Range.BookmarkID)
If idx > 0 Then docWord.InlineShapes(idx).Delete
'Recover count of shapes
lInLiShapes_old = docWord.InlineShapes.Count
lShapes_old = docWord.Shapes.Count
'Paste image
rBookmark.PasteAndFormat wdFormatOriginalFormatting
'Recover new count shapes
lInLiShapes = docWord.InlineShapes.Count
lShapes = docWord.Shapes.Count
'Determine type pasted shape
bIsInlineShape = lInLiShapes > lInLiShapes_old
bIsShape = lShapes > lShapes_old
'If is inlineshape
If bIsInlineShape And bIsShape = False Then
idx = GetIndex_Inlishape_BookmarkID(oBookmark.Range.BookmarkID)
Set oInLiShp = docWord.InlineShapes(idx)
ElseIf bIsShape And bIsInlineShape = False Then
Set oShape = docWord.Shapes(lShapes)
'Convert to inlineshape
Set oInLiShp = oShape.ConvertToInlineShape
Else
Exit Sub
End If
'Change some options
oInLiShp.Title = sImage
oInLiShp.Range.Paragraphs.Alignment = wdAlignParagraphCenter
Else
MsgBox "The bookmark " & sBookmark & " doesn't exist in the document.", vbOKOnly + vbCritical, "Not exists bookmark"
End If
End Sub
Function GetIndex_Inlishape_BookmarkID(bkm_ID As Long) As Long
Dim o As InlineShape, i As Long
For Each o In ThisDocument.InlineShapes
i = i + 1
If o.Range.BookmarkID = bkm_ID Then
Select Case o.Type
Case wdInlineShapePicture
GetIndex_Inlishape_BookmarkID = i
Exit Function
End Select
End If
Next
GetIndex_Inlishape_BookmarkID = 0
End Function
Solved with Set oShape = docWord.Shapes(sImage) because image pasted keep the name of shape from Excel although with .count of the collection Shapes run fine.
However with .count of the collection inlineshapes not run fine because Word orders the elements, first the shapepictures and after shapecharts.
Thanks.

Unpredictable errors VBA microsoft word copying comments and text to excel

I tried to make a macro that takes all the comments in a word document, filters based on the comment text and then inserts them in excel with the associated text in a note.
I tried each step iteratively and I managed to copy the comments and pasting the wanted results in the same word document. Then I managed to manipulate excel by adding columns and notes.
Everything broke when I integrated the excel part with the comment extraction part. The errors were invalid procedure call for the line with rightParPos = InStr(leftParPos, comment, ")") which I hadn't touched in a while, so I tried outputting the parameters... That lead to a completely different error - an indexing error for the categories array when categoryCount was 0, which also was very strange. After that I tried removing a strange character in a string and then I suddenly got some kind of "can't connect to excel" at Set xlWB = xlApp.Workbooks.Open(FileName:=xlPath).
It seems completely random to me. I'm thinking that it might be some kind of limit or bug in the Microsoft Word environment that is causing these problems. Anyone knowing what could be a cause of these strange errors?
I couldn't find anything out of the ordinary with my code, but maybe someone on S.O. sees something that immediately looks strange. Sorry for the very messy code.
Sub Test()
Dim comment, text As String
Dim pageNr As Integer
Dim codePrefix, fileName As String
Dim newLinePos, leftParPos, rightParPos As Integer
Dim commentNr As Integer
Dim codeWorksheetIndex As Integer
Dim xlFile, xlDir, xlPath As String
'Excel'
Dim xlApp As Object
Dim xlWB As Object
On Error Resume Next
Set xlApp = GetObject(, "Excel.Application")
If Err Then
Set xlApp = CreateObject("Excel.Application")
End If
On Error GoTo 0
xlFile = "TEST"
xlDir = "My\Directory\path\" 'censored
xlPath = xlDir & xlFile
Set xlWB = xlApp.Workbooks.Open(FileName:=xlPath)
codePrefix = "a-code" 'censored
fileName = Left(ActiveDocument.Name, Len(ActiveDocument.Name)-5)
'insert a column as second column in each spreadsheet'
For sheet_index = 1 to 3
With xlWB.Worksheets(sheet_index)
.Range("B:B").Insert
.Cells(1, 2).Formula = fileName
End With
Next sheet_index
For commentNr = 1 To ActiveDocument.Comments.Count
Dim category As String
Dim categories(1 to 2) As String
Dim categoryCount As Integer
Dim numLeft, numRight as Integer
'Dim j As Integer
comment = LCase(ActiveDocument.Comments(commentNr).Range)
text = ActiveDocument.Comments(commentNr).Scope
pageNr = ActiveDocument.Comments(commentNr).Scope.Information(wdActiveEndPageNumber)
'find newline'
newLinePos = InStr(comment, vbCr)
If newLinePos = 0 Then
newLinePos = InStr(comment, vbLf)
If newLinePos = 0 Then
newLinePos = InStr(comment, vbCrLf)
if newLinePos = 0 then
newLinePos = InStr(comment, Chr(10))
if newLinePos = 0 then
ActiveDocument.Content.InsertAfter Text:="ERROR: comment " & commentNr & " misses newline!" & vbNewLine
End If
End If
End If
End If
'set to initial index for leftpar instr'
rightParPos = 1
categoryCount = 0
Do
leftParPos = InStr(rightParPos, comment, "(")
rightParPos = InStr(leftParPos, comment, ")")
If leftParPos > 0 and rightParPos > 0 Then
numLeft = rightParPos-1
numRight = numLeft - leftParPos
category = Trim(Right(Left(comment, numLeft), numRight))
categories(categoryCount) = category
categoryCount = categoryCount + 1
End If
Loop While leftParPos > 0 And rightParPos > 0
comment = fileName & " (s. " & pageNr & ")" & vbNewLine & Trim(Right(comment, Len(comment)-newLinePos))
If Instr(LCase(comment), codePrefix) = 1 Then
For categoryIndex = 0 To categoryCount-1
category = categories(categoryIndex)
If category = "category1" Then
codeWorksheetIndex = 1
ElseIf category = "category2" Then
codeWorksheetIndex = 2
ElseIf category = "category3" Then
codeWorksheetIndex = 3
End If
With xlWB.Worksheets(codeWorksheetIndex)
.Cells(commentNr+1, 2).Formula = text
.Cells(commentNr+1, 2).NoteText comment 'this only worked without =
End With
Next categoryIndex
End If
Next commentNr
End Sub
There are two critical problems with the code that were overlooked and then there was one third problem that wasn't due to the code but which also resulted in errors.
As #TimWilliams mentioned, one case where leftParPos = 0 was unhandled.
The indexing of categories was entirely wrong and faulty in the code.
The strangest error was due to having the excel file on an external harddrive that disconnected and therefore making excel not responding.

Finding a phrase in a string variable

I am trying to find a phrase inside of a variable that contains html. I then want to replace it with a new phrase.
I tried InStr but the phrase is not found. I also tried using wildcards at tht beginning and end of the phrase. I also tried doing an if like also with and without wildcards.
The purpose is to allow a tester to do a batch change on expected results and/or test steps by entering a sentence or phrase they want changed.
The only problem I'm having is being able to programmatically find the sentence within the variable.
Problem area is in bold
Dim qcURL As String
Dim qcID As String
Dim qcPWD As String
Dim qcDomain As String
Dim qcProject As String
Dim preActVal As String
Dim postActVal As String
Dim FindSt As String
Dim currentString As String
Dim thisSheet As Worksheet
'Toggle debugging mode'
Dim isDebugOn As Boolean
isDebugOn = True 'set to true to turn off Active X
''On Error GoTo ErrHandler:
FOLDER_PATH = "BAT\PC2P - Claims - Med"
Set thisSheet = ThisWorkbook.Sheets("ShellUpdater")
TestID = thisSheet.Range("B1").Value
stField = thisSheet.Range("B2").Value
**FindSt = thisSheet.Range("B3").Value**
ReplaceSt = thisSheet.Range("B4").Value
testLocation = thisSheet.Range("B5").Value
'ActiveX Forms
If isDebugOn = False Then
'qcURL = GetOptionMetric("qcURL", 1, "Enter ALM URL") ''popup to get url from user
qcURL = "<<URL>>"
'qcDomain = GetOptionMetric("qcDomain", 1, "Enter your ALM Domain")
''popup to get domain from user
qcDomain = "<<DOMAIN>>"
'qcProject = GetOptionMetric("qcProject", 1, "Enter your ALM Project")
''popup to get project from user
qcProject = "<<PROJECT>>"
qcID = GetOptionMetric("qcID", 1, "Enter your ALM MSID")
qcPWD = GetOptionPassword("qcPWD", 1, "Enter your ALM Password")
Else
qcURL = "<<URL>>"
qcID = "<<USERNAME>>"
qcDomain = "<<DOMAIN>>"
qcProject = "<<PROJECT>>"
qcPWD = InputBox("THIS IS IN DEEBUG MODE")
If qcPWD = vbNullString Then Exit Sub
If qcPWD = "" Then Exit Sub
End If
'END ActiveX Forms
'Connect to ALM
Set tdConnection = CreateObject("TDApiOle80.TDConnection")
tdConnection.InitConnectionEx qcURL
tdConnection.Login qcID, qcPWD
tdConnection.Connect qcDomain, qcProject
''Check if batch updating or single case
Dim testObject As ITest
If InStr(TestID, "All") > 0 Then
Dim TestFact As testFactory
Set tMng = tdConnection.TreeManager
Set srcFolder = tMng.NodeByPath("Subject\" & testLocation)
Set tstFact = srcFolder.testFactory
Set tstList = tstFact.NewList("")
For Each shellTest In tstList
Set DSFact = shellTest.DesignStepFactory.NewList("")
For Each dStep In DSFact
Select Case stField
Case "StepExpectedResult"
**currentString = dStep.StepExpectedResult**
**stposition = InStr(currentString, FindSt)**
If stposition > 0 Then
dStep.StepExpectedResult = preActVal & Replace(currentString, FindSt, ReplaceSt) & postActVal
End If
Case "StepDescription"
currentString = dStep.StepDescription
stposition = InStr(currentString, FindSt)
If stposition > 0 Then
dStep.StepDescription = preActVal & Replace(currentString, FindSt, ReplaceSt) & postActVal
End If
End Select
dStep.Post
Next dStep
Next shellTest
Else
Dim myTest
'Find the Test in test plan
Set thisTest = GetTest(Trim(TestID), testLocation, "\")
Set myTest = tdConnection.testFactory.Item(TestID)
End If
Try this:
Sub replace_string()
FindSt = "but also need rendering provider first name"
ReplaceString = "I LIKE BANANAS"
currentString = "All fields populate from the correctly populated provider<<<\!Renderingproviderlastname>>>, but also need rendering provider first name"
stPosition = Replace(currentString, FindSt, ReplaceString)
MsgBox stPosition
End Sub

Not able to get out of the loop after getfirstitem in lotus script

Sub Initialize
On Error GoTo ErrorOut
Dim sess As NotesSession
Dim db As NotesDatabase
Dim doc, searchDoc, reqNumDoc As NotesDocument
Dim body As NotesMIMEEntity
Dim header As NotesMIMEHeader
Dim stream As NotesStream
Dim vwSearchRequests As NotesView
Dim reqNum, totalNotify, totalAccepted, totalRejected, totalOOO, totalNoRes As Integer
Dim reqSer, reqJRSS, reqSPOC, reqNumStr As String
Dim reqDate As String
Dim reqNumColl As NotesDocumentCollection
Dim reqPanelRes As NotesItem
Dim reqPanelResValue As Variant
Set sess = New NotesSession
Set db = sess.CurrentDatabase
Set vwSearchRequests = db.GetView("RequestDocReport")
vwSearchRequests.Autoupdate = False
Set searchDoc = vwSearchRequests.GetFirstDocument
While Not searchDoc Is Nothing
reqSer = "Service"
reqJRSS = searchDoc.PS_JRSS(0)
reqSPOC = "Hiring SPOC"
totalAccepted = 0
totalRejected = 0
totalOOO = 0
totalNoRes = 0
totalNotify = 0
reqNum = searchDoc.PS_RequestNo(0)
reqNumStr = {PS_RequestNo = "} & reqNum & {"}
Set reqNumColl = vwSearchRequests.GetAllDocumentsByKey(reqNumStr)
Set reqNumDoc = reqNumColl.GetFirstDocument
While Not reqNumColl Is Nothing
If Not reqNumDoc.GetFirstItem("PanelResponse") Is Nothing Then
reqPanelResValue = reqNumDoc.GetItemValue("PanelResponse")
MsgBox CStr(reqPanelResValue(0))
'Exit Sub
If CStr(reqPanelResValue(0)) = "Accepted" Then
totalAccepted = totalAccepted + 1
End If
If CStr(reqPanelResValue(0)) = "Rejected" Then
totalRejected = totalRejected + 1
End If
If CStr(reqPanelResValue(0)) = "OOO" Then
totalOOO = totalOOO + 1
End If
Else
If CStr(reqPanelResValue(0)) = "" Then
totalNoRes = totalNoRes + 1
End If
End If
totalNotify = totalNotify + 1
Set reqNumDoc = reqNumColl.GetNextDocument(reqNumDoc)
Wend
what is the error in code? The code is getting stuck after
If Not reqNumDoc.GetFirstItem("PanelResponse") Is Nothing Then
reqPanelResValue = reqNumDoc.GetItemValue("PanelResponse")
Instead of line
While Not reqNumColl Is Nothing
write
While Not reqNumDoc Is Nothing
You got an infinitive loop because the collection reqNumColl is not nothing all the time even when you reached the last document in collection. Instead you have to test the document reqNumDoc.
Another issue might be your code for collection calculation:
reqNumStr = {PS_RequestNo = "} & reqNum & {"}
Set reqNumColl = vwSearchRequests.GetAllDocumentsByKey(reqNumStr)
The way you coded it the first sorted column in view should contain
PS_RequestNo = "12345"
Probably, your view contains in first sorted column just the request number. If so, your code would be just:
Set reqNumColl = vwSearchRequests.GetAllDocumentsByKey(reqNum)
if column contains a numeric value or
Set reqNumColl = vwSearchRequests.GetAllDocumentsByKey(cStr(reqNum))
if it contains a string.
Apart from any other problems you might have in your code (and #Knut is correct about the cause of your infinite loop), this is not a good pattern:
If Not reqNumDoc.GetFirstItem("PanelResponse") Is Nothing Then
reqPanelResValue = reqNumDoc.GetItemValue("PanelResponse")
You're retrieving the item twice when you don't actually have to.
This woould be much better:
If reqNumDoc.HasItem"PanelResponse") Then
reqPanelResValue = reqNumDoc.GetItemValue("PanelResponse")

Word Macro To Delete Pages With Specific Text

I'm trying to delete any page that contains a specific text such as the sentence below (strSearch =). But I get a 5904 Error when I try running my macro... Any clue?
Sub DeletePages()
Dim strSearch As String
Dim rgeStart As Range
Set rgeStart = Selection.Range
strSearch = "Report the content of the ""StatusBar"" status bar message to the results."
With ActiveDocument.Range.Find
.Text = strSearch
Do While .Execute
With .Parent
.Select With Selection
.Bookmarks("\Page").Range.Delete
End With
End With
Loop
End With
rgeStart.Select
Application.Browser.Target = wdBrowsePage
End Sub
I get a syntax error on .Select With Selection, and there is no intellisense with this line...it seems that objects are not able to access properties and methods because with statements are embedded in one another.
The following worked for me...I had to delete the last page differently
Sub DeletePages()
'source1 http://stackoverflow.com/questions/13465709/repeating-microsoft-word-vba-until-no-search-results-found
'source2 https://msdn.microsoft.com/en-us/library/bb208876(v=office.12).aspx
Dim strSearch As String
strSearch = "GoodBye"
With Selection.Find
.Forward = True
.Wrap = wdFindStop
.Text = strSearch
.Execute
End With
Do While Selection.Find.Found = True And iCount < 1000
iCount = iCount + 1
Selection.HomeKey Unit:=wdStory
Selection.Find.Execute
If Selection.Find.Found Then
'Get Current page
CurPage = Selection.Information(wdActiveEndAdjustedPageNumber)
'Check if current page is the last page
If CurPage = FindLastPage Then
ActiveDocument.Range(Selection.Start, ActiveDocument.Range.End).Delete
Else
ActiveDocument.Bookmarks("\Page").Range.Delete
End If
End If
Loop
End Sub
Function FindLastPage()
'Source https://support.microsoft.com/en-us/kb/293861
'Iterate each section in the document to retrieve the end page of the
'document and compute the page count in that section. The results are
'displayed in the Immediate window.
Dim oSec As Object
Dim nStartPg As Integer, nEndPg As Integer, nSecPages As Integer
Dim NumSections As Integer
NumSections = ActiveDocument.Sections.Count
nStartPg = 1
For Each oSec In ActiveDocument.Sections
nEndPg = oSec.Range.Information(3) - 1 'wdActiveEndPageNumber=3
'Account for the last page.
If oSec.Index = NumSections Then nEndPg = nEndPg + 1
nSecPages = nEndPg - nStartPg + 1
FindLastPage = nSecPages
Next
End Function

Resources