Check if file downloaded to resume the code - excel

I have a code that enables me to login a website with credentials. I have replaced those with hidden1, hidden2 and hidden3 in the code for privacy.
The code login to the website then navigate to specific page that has two drop down lists.
From the first list that has four options (this is fixed to four options), I select an option and after that the second drop down is updated according to list1
But List2 is changeable as the options are not known
The next step is to click a button that goes to another page with two elements that I clicked so as to export a pdf file.
Everything is fine till now.
The problem is that I have put waiting line for 15 seconds so as to wait for the download and I am searching for more efficient way to check if the file downloaded or not using selenium vba.
Here's the code
Private Sub Test()
Dim bot As New Selenium.WebDriver
Dim sList1 As SelectElement
Dim sList2 As SelectElement
Dim By As New By
Dim fso As Object
Dim myFolder As Object
Dim objFile As Object
Dim fil As String
Dim fn As String
Dim dteFile As Date
Dim n As Integer
Dim x As Integer
Dim j As Integer
Const DOWNLOAD_DIRECTORY As String = "C:\Users\Future\Desktop\Files"
Set fso = CreateObject("Scripting.FileSystemObject")
If Len(Dir(DOWNLOAD_DIRECTORY, vbDirectory)) = 0 Then MkDir DOWNLOAD_DIRECTORY
With bot
.SetPreference "download.default_directory", DOWNLOAD_DIRECTORY
.SetPreference "download.directory_upgrade", True
.SetPreference "download.prompt_for_download", False
.Start "chrome", "http://primprep.emis.gov.eg"
.Get "/"
.FindElementById("ContentPlaceHolder1_TextBox1").SendKeys "hidden1"
.FindElementById("ContentPlaceHolder1_TextBox3").SendKeys "hidden2"
.FindElementById("ContentPlaceHolder1_TextBox2").SendKeys "hidden3"
.FindElementById("ContentPlaceHolder1_Button2").Click
.FindElementById("Button1").Click
.Wait 1000
mLoop:
n = n + 1
If n = 5 Then Stop
x = 0
sPoint:
Set sList1 = .FindElementById("ContentPlaceHolder1_Dedara").AsSelect
sList1.SelectByIndex n
.Wait 2000
Set sList2 = .FindElementById("ContentPlaceHolder1_Dschool").AsSelect
For j = x + 1 To sList2.Options.Count
If x + 1 >= sList2.Options.Count Then GoTo mLoop
fil = Format(n, "00") & "-" & Format(j, "00") & "-" & Application.Trim(sList2.Options(j + 1).Text) & ".pdf"
sList2.SelectByIndex j
.FindElementById("ContentPlaceHolder1_Button1").Click
.Wait 2000
If .IsElementPresent(By.ID("ContentPlaceHolder1_Label2")) Then
If .FindElementById("ContentPlaceHolder1_Label2").Text = "لا يوجد بيانات لعرضها" Then
Debug.Print "No Data For This School >> " & Application.Trim(Replace(fil, ".pdf", ""))
x = x + 1
GoTo sPoint
End If
End If
Do
Loop While .FindElementsById("IconImg_CrystalReportViewer1_toptoolbar_print").Count = 0
.FindElementById("IconImg_CrystalReportViewer1_toptoolbar_print").Click
Do
Loop While .FindElementsByCss("[id^='theBttnbobjid']").Count = 0
.FindElementByCss("[id^='theBttnbobjid']").Click
Application.Wait Now + TimeSerial(0, 0, 15)
'I need a way to check if the file downloaded or not instead of waiting for 15 seconds
'as sometimes the file took no time and sometimes the file may took over 15 seconds
Set myFolder = fso.GetFolder(DOWNLOAD_DIRECTORY)
dteFile = DateSerial(1900, 1, 1)
For Each objFile In myFolder.Files
If objFile.DateLastModified > dteFile And fso.GetExtensionName(objFile.Path) = "pdf" Then
dteFile = objFile.DateLastModified
fn = objFile.name
End If
Next objFile
If fn <> vbNullString And Not fso.FileExists(DOWNLOAD_DIRECTORY & "\" & fil) Then
fso.MoveFile DOWNLOAD_DIRECTORY & "\" & fn, DOWNLOAD_DIRECTORY & "\" & fil
End If
.FindElementById("ContentPlaceHolder1_Button2").Click
.Wait 2000
x = x + 1
GoTo sPoint
Next j
GoTo mLoop
End With
End Sub
I have searched for such a topic and found a link but it was for Java selenium. And I need to deal with VBA selenium.
Regards

you can check for files count in DOWNLOAD_DIRECTORY before download procedure code and then check again after it. If the count increased by one, so it is downloaded.
just an IDEA
i found a code that i used to count files for attendance . you can take an advantage of it
Sub Attfiles()
Dim AttFolder As String, path As String, count As Integer
AttFolder = "D:\attdata"
path = AttFolder & "\*.xls" 'ممكن تغير الامتداد هنا
Filename = Dir(path)
Do While Filename <> ""
count = count + 1
Filename = Dir()
Loop
Range("A5").Value = count
End Sub

Related

page sizing & handling on acrobar pro dc via vba excel

I have managed convert a list of images onto pdf, then gathered them in a single file and then print them as multiple pages 10 columns x 14 rows so I can print in a single sheet 140 original images.
All of these with sendkeys method which was absolutely madness and frustrating but at the end it works pretty fine, the only handicap is that I have to do this almost everyday and once I run the sendkeys macro I can't do nothing with my computer until it ends which could probably be hours
I'm trying to do this in a less "messy" way
I have managed to convert the images in pdf easely with this code I modified from a search on internet (just in case someone find it usefull for him/her)
Sub png_to_pdf()
Dim Acroapp As New Acrobat.Acroapp
Dim pddoc As New Acrobat.AcroPDDoc
Set Acroapp = CreateObject("AcroExch.App")
Set pddoc = CreateObject("AcroExch.pddoc")
aux_pngtopdf "F:\ES-VAL\PURCH-U\CARLOS\qr", pddoc
End Sub
Private Sub aux_pngtopdf(ByVal xFolderName As String, ByVal pddoc As Object)
Dim xFileSystemObject As Object
Dim xFolder As Object
Dim xSubFolder As Object
Dim xFile As Object
Dim xfilepdf As String
Set xFileSystemObject = CreateObject("Scripting.FileSystemObject")
Set xFolder = xFileSystemObject.GetFolder(xFolderName)
For Each xFile In xFolder.Files
If Right(xFile, 3) = "png" And Application.CountIf(Columns(10), Mid(xFolderName, 29, 9)) = 0 And Application.CountIf(Columns(11), Mid(xFolderName, 29, 9)) = 0 Then
pddoc.Open xFile
xfilepdf = Left(xFile, Len(xFile) - 3) & "pdf"
pddoc.Save PDSaveFull, xfilepdf
End If
Next xFile
For Each xSubFolder In xFolder.subfolders
If Len(xSubFolder) < 250 Then
aux_pngtopdf xSubFolder.Path, pddoc
End If
Next xSubFolder
Set xFile = Nothing
Set xFolder = Nothing
Set xFileSystemObject = Nothing
End Sub
I'm changing the code I found (I don't really remember if here or if in any other site) to merge all the pdf into a single one and it seems it would be fine
Sub merge_pdf()
Const DestFile As String = "MergedFile.pdf" ' <-- change to suit
Dim MyPath As String, MyFiles As String
Dim a() As String, i As Long, f As String
Dim j As Integer
j = 4
' Choose the folder or just replace that part by: MyPath = Range("E3")
With Application.FileDialog(msoFileDialogFolderPicker)
'.InitialFileName = "C:\Temp\"
.AllowMultiSelect = False
If .Show = False Then Exit Sub
MyPath = .SelectedItems(1) & "\" & Cells(j, 3).Value
DoEvents
End With
' Populate the array a() by PDF file names
If Right(MyPath, 1) <> "\" Then MyPath = MyPath & "\"
ReDim a(1 To 2 ^ 14)
f = Dir(MyPath & "*.pdf")
While Len(f)
If StrComp(f, DestFile, vbTextCompare) Then
i = i + 1
a(i) = f
End If
f = Dir()
Wend
' Merge PDFs
If i Then
ReDim Preserve a(1 To i)
MyFiles = Join(a, ",")
Application.StatusBar = "Merging, please wait ..."
Call aux_MergePDFs(MyPath, MyFiles, DestFile)
Application.StatusBar = False
Else
MsgBox "No PDF files found in" & vbLf & MyPath, vbExclamation, "Canceled"
End If
End Sub
Private Sub aux_MergePDFs(MyPath As String, MyFiles As String, Optional DestFile As String = "MergedFile.pdf")
Dim a As Variant, i As Long, n As Long, ni As Long, p As String
Dim Acroapp As New Acrobat.Acroapp, PartDocs() As Acrobat.CAcroPDDoc
If Right(MyPath, 1) = "\" Then p = MyPath Else p = MyPath & "\"
a = Split(MyFiles, ",")
ReDim PartDocs(0 To UBound(a))
On Error GoTo exit_
If Len(Dir(p & DestFile)) Then Kill p & DestFile
For i = 0 To UBound(a)
' Check PDF file presence
If Dir(p & Trim(a(i))) = "" Then
MsgBox "File not found" & vbLf & p & a(i), vbExclamation, "Canceled"
Exit For
End If
' Open PDF document
Set PartDocs(i) = CreateObject("AcroExch.PDDoc")
PartDocs(i).Open p & Trim(a(i))
If i Then
' Merge PDF to PartDocs(0) document
ni = PartDocs(i).GetNumPages()
If Not PartDocs(0).InsertPages(n - 1, PartDocs(i), 0, ni, True) Then
MsgBox "Cannot insert pages of" & vbLf & p & a(i), vbExclamation, "Canceled"
End If
' Calc the number of pages in the merged document
n = n + ni
' Release the memory
PartDocs(i).Close
Set PartDocs(i) = Nothing
Else
' Calc the number of pages in PartDocs(0) document
n = PartDocs(0).GetNumPages()
End If
Next
If i > UBound(a) Then
' Save the merged document to DestFile
If Not PartDocs(0).Save(PDSaveFull, p & DestFile) Then
MsgBox "Cannot save the resulting document" & vbLf & p & DestFile, vbExclamation, "Canceled"
End If
End If
exit_:
' Inform about error/success
If Err Then
MsgBox Err.Description, vbCritical, "Error #" & Err.Number
ElseIf i > UBound(a) Then
MsgBox "The resulting file is created:" & vbLf & p & DestFile, vbInformation, "Done"
End If
' Quit Acrobat application
Acroapp.Exit
Set Acroapp = Nothing
End Sub
But I don't have any clue on how to print several pages of the pdf into a single one. Not interested in only 16 pages per sheet (since the images I try to print are QR codes 12mmx12mm so it fits pretty fine 140 of them in a single sheet) which could be more or less easy if you set adobe pdf as your default printer and setup it to print 16 pages per sheet (I also found part of a code that could fit to this purpose)
Any clue will be apreciated
Thanks

VBA codes not running after re-installed windows10

I have already run these codes successfully more than 200 times as I download files on daily basis.
Also having another copies of same code in different drives as reference in case of any issue.
Last night I re-installed my Win10 and office 2019 pro plus both are updated to latest.
And now the codes are not running.
I am not a hard core programmer, not able to understand what's wrong with them. Do I need to install any reference library or .net framework or something else.
The codes stopped at
"Name oldFullName As newfullname"
2 files are there do be downloaded and renamed
The codes get 1st file successfully but, do not rename it so not going to 2nd loop to download 2nd file and to further rename that.
Public IsExit As Boolean
Global Path As String
Private Sub Download_AllZip()
Path = ThisWorkbook.Worksheets("Downloads").Range("C3").Value
Application.ScreenUpdating = FALSE
Application.EnableEvents = FALSE
Dim LR As Long
Dim Fileurl As String, Filename As String, y As String, z As String 'msg as string
Dim r As Long
LR = Sheets("Downloads").Range("C6").Row
For r = 5 To LR
Fileurl = Sheets("Downloads").Range("C" & r).Value
If InStr(1, Fileurl, ".zip") <> 0 Then
filepath = Path
End If
Dim Obj1 As Object
Set Obj1 = CreateObject("Microsoft.XMLHTTP")
Obj1.Open "GET", Fileurl, FALSE
Obj1.send
If Obj1.Status = 200 Then
Set Obj2 = CreateObject("ADODB.Stream")
Obj2.Open
Obj2.Type = 1
Obj2.Write Obj1.responseBody
Obj2.SaveToFile (filepath & getfilename(Fileurl)), 2 ' 1 = no overwrite, 2 = overwrite
Call UnzipFileRename(filepath & getfilename(Fileurl), filepath, Sheets("Downloads").Range("D" & r).Value)
Obj2.Close
y = (y & vbCr & Sheets("Downloads").Range("D" & r).Value & " = Downloaded & Converted To .CSV in " & filepath)
ThisWorkbook.Sheets("Downloads").Range("E" & r).Value = "Downloaded" '/STATUS
Else
z = (z & vbCr & Sheets("Downloads").Range("D" & r).Value & " = Failed To Download")
ThisWorkbook.Sheets("Downloads").Range("E" & r).Value = "Failed" '/STATUS
End If
Next r
End Sub
Function getfilename(filepath As String)
Dim v_string() As String
v_string = Split(filepath, "/")
getfilename = v_string(UBound(v_string))
End Function
Private Sub UnzipFileRename(zipFullName As Variant, unzipPath As Variant, newName As String)
Dim ShellApp As Object, oldFullName As String, newfullname As String, n As Variant
Set ShellApp = CreateObject("Shell.Application")
'/get file name
For Each n In ShellApp.Namespace(zipFullName).Items
a = a + 1
oldFullName = unzipPath & n.Name
newfullname = unzipPath & newName
If a = 1 Then Exit For
Next n
'/delete previous version to avoid any issues
DeleteFile oldFullName
DeleteFile newfullname
'/unzip the file
ShellApp.Namespace(unzipPath).CopyHere ShellApp.Namespace(zipFullName).Items
'/rename the file
Name oldFullName As newfullname
'/delete zip file
DeleteFile CStr(zipFullName)
End Sub
Private Sub DeleteFile(PathAndName As String)
On Error Resume Next
Kill PathAndName
On Error GoTo 0
End Sub

How to copy 100 files to a folder based on first and last file name and display in listbox vba

Im trying to come up with a piece of script that will allow me to copy 100 files from one folder and create a new folder based on the first file and last file name and then move those 100 files to that folder.
After moving those files, i want it to display the folders in a userform listbox as clickable items.
For example, each item in the listbox will be a folder, if i double click on a folders name it will display all the contents of the file (of each of 100 files) in a sheet i've set up.
I haven't been able to test this code yet, all i've done for the past week was research and rewrite the code over and over until i could understand it properly before adding it to the program. So there's bound to be some or more errors along the way.
What i did notice was the "objFile.CopyFile Folderpath & FCount & "_" & LCount" piece of code that doesnt specify which files could be copied specifically. For example, i want it to start at the first file and start coping the first 100 files, when the code is executed again, it will start at file 101 and copy the next 100 files. If there's way to ensure that it wouldnt keep copying the first 100 files, that would be awesome!
Sub Main()
'====CHECK IF THERE'S 100 FILES====
Dim filename, folderpath, path As String
Dim count As Integer
Dim FCount, LCount, FlagCount, IntCount As Integer
Dim objFSO As Object
Dim obj As Object
FCount = 0 ' First File name
LCount = 0 'Last file name
count = 0 'file count
FlagCount = Sheets("Flag Sheet").Range("A2").Value
folderpath = "Work\Big Book\" '==================Location Of The Book
path = folderpath & "*.xls"
filename = Dir(path)
Do While filename <> ""
count = count + 1
filename = Dir(path)
Loop
If count < 100 Then
'====CREATE A FOLDER FOR THE FILES====
If FlagCount <> "" Then '====If there is a flag count, it will create a folder based on the last number it was used
FCount = FlagCount + 1
LCount = FlagCount + 101
MkDir folderpath & FCount & "_" & LCount
Else '=======================else if there isnt one, it will use the first file name to create the folder
FCount = IntCount + 1
LCount = IntCount + 100
MkDir folderpath & FCount & "_" & LCount
End If
'====MOVE 100 FILES TO FOLDER====
For Each objFile In objFSO.GetFolder(path)
If FlagCount <> "" Then '====================if theres a flag count it will move the files starting after the flag count + 101
objFile.CopyFile folderpath & FCount & "_" & LCount
IntCount = FlagCount + 1
If IntCount = FlagCount + 100 Then Exit For
Else '======================================else it will just move the first 100 files
objFile.CopyFile folderpath & FCount & "_" & LCount
IntCount = IntCount + 1
If IntCount = IntCount + 100 Then Exit For
End If
Next
End If
Else
'===Do Nothing===
End If
End Sub
'=====Display Folders In Listbox=====
'====Display Folder Items In Book====
'Call the function
DisplayFoldersInListBox folderpath & FCount & "_" & LCount, Me.Listbox1
Sub Button_Click()
For Each File in Folderpath & FCount & "_" & LCount & "\" & Listbox.value
'[INSERT BIG BOOK CODE]
Next
End Sub
Private Sub DisplayFoldersInListBox(ByVal strRootFolder As String, ByRef lbxDisplay As MSForms.ListBox)
Dim fso As Object
Dim fsoRoot As Object
Dim fsoFolder As Object
'Make sure that root folder contains trailing backslash
If Right$(strRootFolder, 1) <> "\" Then strRootFolder = strRootFolder & "\"
'Get reference to the FileSystemObject
Set fso = CreateObject("Scripting.FileSystemObject")
'Get the root folder
Set fsoRoot = fso.GetFolder(strRootFolder)
'Clear the listbox
lbxDisplay.Clear
'Populate the listbox with subfolders of Root
For Each fsoFolder In fsoRoot.SubFolders
lbxDisplay.AddItem fsoFolder.Name
Next fsoFolder
'Clean up
Set fsoRoot = Nothing
Set fso = Nothing
End Sub
This link: Copy only the first file of a folder VBA
Seems to be the answer for the coping of the files, but im not entirely sure how to add it to my script. Can anyone help me out?
Back to the basics:
CopyXNumberOfFiles:Sub
Sub CopyXNumberOfFiles(SourceFolder As String, TargetFolder As String, Optional MaxNumFiles As Long = 100)
Dim fso As Object, objFile As Object
Dim count As Long
Dim Path As String
Set fso = CreateObject("Scripting.FileSystemObject")
If Not Right(SourceFolder, 1) = "\" Then SourceFolder = SourceFolder & "\"
If Not Right(TargetFolder, 1) = "\" Then TargetFolder = TargetFolder & "\"
For Each objFile In fso.GetFolder(SourceFolder).Files
If objFile.Path Like "*.xls?" Then
Path = TargetFolder & objFile.Name
If Len(Dir(Path)) = 0 Then
FileCopy objFile.Path, Path
count = count + 1
If count >= MaxNumFiles Then Exit For
End If
End If
Next
End Sub
Usage
CopyXNumberOfFiles "C:\","C:\Data"
Addendum
This function will copy the files over and return an array of the new file paths.
Function getCopyXNumberOfFiles(SourceFolder As String, TargetFolder As String, Optional MaxNumFiles As Long = 100) As String()
Dim fso As Object, objFile As Object
Dim count As Long, n As Long
Dim Path As String
Dim data() As String, results() As String
ReDim data(1 To 2, 1 To MaxNumFiles)
Set fso = CreateObject("Scripting.FileSystemObject")
If Not Right(SourceFolder, 1) = "\" Then SourceFolder = SourceFolder & "\"
If Not Right(TargetFolder, 1) = "\" Then TargetFolder = TargetFolder & "\"
For Each objFile In fso.GetFolder(SourceFolder).Files
If objFile.Path Like "*.xls?" Then
Path = TargetFolder & objFile.Name
If Len(Dir(Path)) = 0 Then
FileCopy objFile.Path, Path
count = count + 1
data(1, count) = objFile.Path
data(2, count) = Path
If count >= MaxNumFiles Then Exit For
End If
End If
Next
ReDim Preserve results(1 To count, 1 To 2)
For n = 1 To count
results(n, 1) = data(1, n)
results(n, 2) = data(2, n)
Next
getCopyXNumberOfFiles = results
End Function
Usage
Column 1 has the original paths and column 2 has the new paths.
Dim Files() as String, firstFilePath as String, lastFilePath as String
Files = getCopyXNumberOfFiles("C:\", "C:\New Folder\", 100)
Original Paths
firstFilePath = Files(1, 1)
lastFilePath = Files(Ubound(Files), 1)
New Paths
firstFilePath = Files(1, 2)
lastFilePath = Files(Ubound(Files), 2)

Why does my VBA macro stop after opening and closing a few hundred CSV files?

I've written a macro that downloads zip files containing CSVs from a website. The downloading and unzipping is going perfectly, however when I try to loop through the CSVs searching for the occurrence of a specific string, the macro simply quits after opening about a thousand. There is no error message, it simply stops working, leaving the last CSV it was working on open.
Here is my code:
Sub OpenSearch()
Dim ROW, j As Integer
Workbooks.Open Filename:=FileNameFolder & FileListCSV(i)
For j = 1 To 7
ROW = 3
Do Until IsEmpty(Cells(ROW, 6))
If Cells(ROW, 6) = WantedID(j, 1) Then
MsgBox "WE HAVE A MATCH!"
End If
ROW = ROW + 1
Loop
Next j
Workbooks(FileListCSV(i)).Close False
Kill FileNameFolder & FileListCSV(i)
End Sub
I did not include the main module that calls the this sub and downloads and unzips the files, because on its own, that works perfectly. It only stops working when the sub I copied here is being called.
The Filename comes from a public variable defined in the main module, WantedID contains the strings I need to find in the CSVs.
I've tried to put Application.Wait in the first line, but it did not solve the problem. Also how far the macro gets is completely random. It never stops after the same number of CSVs opened and closed.
UPDATE: Here is the code (parent sub) for the downloading and unzipping. I did not come up with this on my own, but copied it from an online source I cannot recall:
Public FileListCSV(1 To 288) As String
Public i As Integer
Public FileNameFolder As Variant
Public WantedID As Variant
Sub DownloadandUnpackFile()
Dim myURL As String
Dim YearNUM As Integer
Dim MonthNUM As Integer
Dim StarMonth, EndMonth As Integer
Dim DayNUM As Integer
Dim YearSTR As String
Dim MonthSTR As String
Dim DaySTR As String
Dim FixURL As String
Dim TargetFileName As String
Dim FSO As Object
Dim oApp As Object
Dim Fname As Variant
Dim DefPath As String
Dim strDate As String
Dim StrFile As String
Dim FileList(1 To 288) As String
Application.ScreenUpdating = False
FixURL = "http://www.nemweb.com.au/REPORTS/ARCHIVE/Dispatch_SCADA PUBLIC_DISPATCHSCADA_"
WantedID = Range(Cells(2, 1), Cells(8, 1))
YearNUM = 2016
StarMonth = 12
EndMonth = 12
For YearNUM = 2015 To 2016
For MonthNUM = StarMonth To EndMonth
For DayNUM = 1 To 31
YearSTR = CStr(YearNUM)
If MonthNUM < 10 Then
MonthSTR = "0" & CStr(MonthNUM)
Else:
MonthSTR = CStr(MonthNUM)
End If
If DayNUM < 10 Then
DaySTR = "0" & CStr(DayNUM)
Else:
DaySTR = CStr(DayNUM)
End If
myURL = FixURL & YearSTR & MonthSTR & DaySTR & ".zip"
Cells(1, 1) = myURL
Dim WinHttpReq As Object
Set WinHttpReq = CreateObject("Microsoft.XMLHTTP")
WinHttpReq.Open "GET", myURL, False
WinHttpReq.Send
myURL = WinHttpReq.ResponseBody
If WinHttpReq.Status = 200 Then
Set oStream = CreateObject("ADODB.Stream")
oStream.Open
oStream.Type = 1
oStream.Write WinHttpReq.ResponseBody
TargetFileName = "C:\Users\istvan.szabo\Documents \Basslink\AEMO RAW DATA\RAWRAW\" & YearSTR & MonthSTR & DaySTR & ".zip"
oStream.SaveToFile (TargetFileName)
oStream.Close
End If
'try unzippin'
Fname = TargetFileName
If Fname = False Then
'Do nothing
Else
'Root folder for the new folder.
'You can also use DefPath = "C:\Users\Ron\test\"
DefPath = Application.DefaultFilePath
If Right(DefPath, 1) <> "\" Then
DefPath = DefPath & "\"
End If
FileNameFolder = "C:\Users\istvan.szabo\Documents\Basslink\AEMO RAW DATA\RAWRAW\" & YearSTR & MonthSTR & DaySTR
'Make the normal folder in DefPath
MkDir FileNameFolder
'Extract the files into the newly created folder
Set oApp = CreateObject("Shell.Application")
oApp.Namespace(FileNameFolder).CopyHere oApp.Namespace(Fname).items
On Error Resume Next
Set FSO = CreateObject("scripting.filesystemobject")
FSO.deletefolder Environ("Temp") & "\Temporary Directory*", True
i = 1
StrFile = Dir(FileNameFolder & "\")
Do While Len(StrFile) > 0
FileList(i) = FileNameFolder & "\" & StrFile
FileListCSV(i) = Left(StrFile, Len(StrFile) - 3) & "csv"
StrFile = Dir
i = i + 1
Loop
'unzip the unzipped
For i = 1 To 288
Fname = FileList(i)
If Fname = False Then
'Do nothing
Else:
DefPath = Application.DefaultFilePath
If Right(DefPath, 1) <> "\" Then
DefPath = DefPath & "\"
End If
FileNameFolder = "C:\Users\istvan.szabo\Documents\Basslink\AEMO RAW DATA\"
Set oApp = CreateObject("Shell.Application")
oApp.Namespace(FileNameFolder).CopyHere oApp.Namespace(Fname).items
On Error Resume Next
Set FSO = CreateObject("scripting.filesystemobject")
FSO.deletefolder Environ("Temp") & "\Temporary Directory*", True
Call OpenSearch
End If
Next i
End If
Next DayNUM
Next MonthNUM
StarMonth = 1
EndMonth = 5
Next YearNUM
Application.ScreenUpdating = True
End Sub
You could check the file without opening it. That would save you time and resources. Here is a quick draw of the code I would use:
Sub OpenSearch()
Dim ROW, j As Integer
Dim fileID
Dim buf As String
Dim tmp As Variant
Open FileNameFolder & FileListCSV(i) For Input As #1
For j = 1 To 7
ROW = 3
Do Until EOF(1)
Line Input #1, buf
'Remove double quotes
buf = Replace(buf, """", "")
'Split line to a array
tmp = Split(buf, ",")
'5 is the 6th column in excel tmp index starts with 0
fileID = tmp(5)
If fileID = WantedID(j, 1) Then
MsgBox "WE HAVE A MATCH!"
End If
ROW = ROW + 1
Loop
Next j
Close #1
Kill FileNameFolder & FileListCSV(i)
End Sub
EDIT: Also try to add a resource cleanup code, for example: Set WinHttpReq = Nothing, Set oStream = Nothing etc.
In line with other advice in the comments: -
You should close of resources when you are done with them using Set WinHttpReq = Nothing for example. This can avoid memory problems that are similar to the issue you are seeing.
It is also advisable to remove On Error Resume Next. This is hiding errors and you may well be missing results that you need. It would also allow for more information during errors.
I took your two code blocks and wrote them into one that I believe will be stable during running and make it to the end, Run this and let us know if it did resolve the issue. I did it this way as there was a lot of small changes that went towards what I suspect will be more stable and make it to the end.
Sub DownloadandUnpackFile()
Dim FSO As New FileSystemObject
Dim DteDate As Date
Dim Fl As File
Dim Fl_Root As File
Dim Fldr As Folder
Dim Fldr_Root As Folder
Dim LngCounter As Long
Dim LngCounter2 As Long
Dim oApp As Object
Dim oStream As Object
Dim oWinHttpReq As Object
Dim RngIDs As Range
Dim StrURL As String
Dim StrRootURL As String
Dim VntFile As Variant
Dim VntFolder As Variant
Dim VntRootFile As Variant
Dim VntRootFolder As Variant
Dim WkBk As Workbook
Dim WkSht As Worksheet
'This will speed up processing, but you might not see progress while it is working
Application.ScreenUpdating = False
'Set variables
StrRootURL = "http://www.nemweb.com.au/REPORTS/ARCHIVE/Dispatch_SCADA/PUBLIC_DISPATCHSCADA_"
'You should be a little more explicit here for clarity, refernce a worksheet
'E.g. StrID = ThisWorkbook.Worksheets("Sheet1").Range(Cells(2, 1), Cells(8, 1))
Set RngIDs = Range(Cells(2, 1), Cells(8, 1))
Set oWinHttpReq = CreateObject("Microsoft.XMLHTTP")
Set oApp = CreateObject("Shell.Application")
'Loop from 21/Feb/2015 to today
For DteDate = CDate("21/Feb/2015") To Date
StrURL = StrRootURL & Format(DteDate, "YYYYMMDD") & ".zip"
Debug.Print StrURL
oWinHttpReq.Open "GET", StrURL, False
oWinHttpReq.Send
StrURL = oWinHttpReq.ResponseBody
If oWinHttpReq.Status = 200 Then
Set oStream = CreateObject("ADODB.Stream")
oStream.Open
oStream.Type = 1
oStream.Write oWinHttpReq.ResponseBody
VntRootFile = Environ("UserProfile") & "\Documents\Basslink\AEMO RAW DATA\RAWRAW\" & Format(DteDate, "YYYYMMDD") & ".zip"
oStream.SaveToFile VntRootFile
oStream.Close
Set oStream = Nothing
VntRootFolder = Environ("UserProfile") & "\Documents\Basslink\AEMO RAW DATA\RAWRAW\" & Format(DteDate, "YYYYMMDD") & "\"
FSO.CreateFolder VntRootFolder
oApp.Namespace(VntRootFolder).CopyHere oApp.Namespace(VntRootFile).Items
Set Fldr_Root = FSO.GetFolder(VntRootFolder)
'Unzip the zipped zips
For Each Fl_Root In Fldr_Root.Files
If Right(LCase(Fl_Root.Name), 4) = ".zip" Then
VntFolder = Fl_Root.ParentFolder & "\" & Left(Fl_Root.Name, Len(Fl_Root.Name) - 4) & "\"
FSO.CreateFolder VntFolder
VntFile = Fl_Root.Path
oApp.Namespace(VntFolder).CopyHere oApp.Namespace(VntFile).Items
Set Fldr = FSO.GetFolder(VntFolder)
For Each Fl In Fldr.Files
If Right(LCase(Fl.Name), 4) = ".csv" Then
Set WkBk = Application.Workbooks.Open(Fl.Path)
Set WkSht = WkBk.Worksheets(1)
For LngCounter = 1 To RngIDs.Rows.Count
LngCounter2 = 1
Do Until WkSht.Cells(LngCounter2, 6) = ""
If WkSht.Cells(LngCounter2, 6) = RngIDs.Cells(LngCounter, 1) Then
Debug.Print "FOUND: " & Fl.Name & ": " & WkSht.Cells(LngCounter2, 6).Address
End If
LngCounter2 = LngCounter2 + 1
Loop
Next
Set WkSht = Nothing
WkBk.Close 0
Set WkBk = Nothing
End If
DoEvents
Next
Set Fldr = Nothing
End If
Next
Fldr_Root.Delete True
Set Fldr_Root = Nothing
FSO.DeleteFile VntRootFile, True
End If
DoEvents
Next
Set oApp = Nothing
Set oWinHttpReq = Nothing
Set RngIDs = Nothing
Application.ScreenUpdating = True
End Sub
Changes I have made: -
I used early binding to FileSystemObject simply to make it easier
to write up. You will need the 'Windows Scripting Runtime' reference
added (Tools > References > tick 'Windows Scripting Runtime')
I iterated through dates as a single loop rather then three loops of
strings working as a date
I set IDs to be a range and note a variant
I opened references once, reuse them (i.e. oApp), and then close
them
I added DoEvents to give time back to the computer to run anything it
may need to, this maintains a health system.
I used Debug.Print to add information to the immediate window instead
of msgbox, but you should really list the finds out in a separate
worksheet, (debug.print has a size limit so you may end up only
seeing X number of results as others are truncated off.

Excel VBA to Search for Text in PDF and Extract and Name Pages

I have the following code, which looks at each cell in column A of my spreadsheet, searches for the text it finds there in the specified PDF and then extracts the page where it finds the text as a PDF, naming it with the value in the cell of the spreadsheet. The code works but is rather slow, I may need to search for as many as 200 words in a PDF which could be as long as 600 pages. Is there a way to make the code faster? Currently it loops through each cell searches through each page looping through each word until it finds the word in the cell.
Sub test_with_PDF()
Dim objApp As Object
Dim objPDDoc As Object
Dim objjso As Object
Dim wordsCount As Long
Dim page As Long
Dim i As Long
Dim strData As String
Dim strFileName As String
Dim lastrow As Long, c As Range
Dim PageNos As Integer
Dim newPDF As Acrobat.CAcroPDDoc
Dim NewName As String
Dim Folder As String
lastrow = Sheets("Sheet1").Cells(Rows.Count, "A").End(xlUp).Row
strFileName = selectFile()
Folder = GetFolder()
Set objApp = CreateObject("AcroExch.App")
Set objPDDoc = CreateObject("AcroExch.PDDoc")
'AD.1 open file, if =false file is damage
If objPDDoc.Open(strFileName) Then
Set objjso = objPDDoc.GetJSObject
PageNos = 0
For Each c In Sheets("Sheet1").Range("A2:A" & lastrow)
For page = 0 To objPDDoc.GetNumPages - 1
wordsCount = objjso.GetPageNumWords(page)
For i = 0 To wordsCount
If InStr(1, c.Value, ", ") = 0 Then
If objjso.getPageNthWord(page, i) = c.Value Then
PageNos = PageNos + 1
If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then
Set newPDF = CreateObject("AcroExch.pdDoc")
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.Open (NewName)
newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
Else
Set newPDF = CreateObject("AcroExch.PDDoc")
newPDF.Create
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.InsertPages -1, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
End If
End If
Else
If objjso.getPageNthWord(page, i) = c.Offset(0, 1).Value Then
If objjso.getPageNthWord(page, i + 1) = c.Offset(0, 2).Value Then
PageNos = PageNos + 1
If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then
Set newPDF = CreateObject("AcroExch.pdDoc")
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.Open (NewName)
newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
Else
Set newPDF = CreateObject("AcroExch.PDDoc")
newPDF.Create
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.InsertPages -1, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
End If
Exit For
End If
End If
End If
Next i
Next page
c.Offset(0, 3).Value = PageNos
PageNos = 0
Next c
MsgBox "Done"
Else
MsgBox "error!"
End If
End Sub
Function FileExist(path As String) As Boolean
If Dir(path) <> vbNullString Then FileExist = True
End Function
Function selectFile()
Dim fd As FileDialog, fileName As String
On Error GoTo ErrorHandler
Set fd = Application.FileDialog(msoFileDialogFilePicker)
fd.AllowMultiSelect = False
If fd.Show = True Then
If fd.SelectedItems(1) <> vbNullString Then
fileName = fd.SelectedItems(1)
End If
Else
'Exit code if no file is selected
End
End If
'Return Selected FileName
selectFile = fileName
Set fd = Nothing
Exit Function
ErrorHandler:
Set fd = Nothing
MsgBox "Error " & Err & ": " & Error(Err)
End Function
Function GetFolder() As String
Dim fldr As FileDialog
Dim sItem As String
Set fldr = Application.FileDialog(msoFileDialogFolderPicker)
With fldr
.Title = "Select the Folder where you want you new PDFs to go"
.AllowMultiSelect = False
.InitialFileName = Application.DefaultFilePath
If .Show <> -1 Then GoTo NextCode
sItem = .SelectedItems(1)
End With
NextCode:
GetFolder = sItem
Set fldr = Nothing
End Function
Many thanks in advance.
Loops are definitely excellent for some things, but can tie down processing with these higher queries. Recently, a colleague and I were doing a similar task (not pdf-related though), and we had much success with using a range.find method instead of a loop executing instr on each cell.
Some points of interest:
-To mimic the “loop cells” functionality when using the .find method, we ended our range statement with .cells, as seen below:
activesheet.usedrange.cells.find( )
Where the desired string goes within the ( ).
-The return value: “A Range object that represents the first cell where that information is found.”
Once the .find method returns a range, a subsequent subroutine can extract the page number and document name.
-If you need to find the nth instance of an occurrence, “You can use the FindNext andFindPrevious methods to repeat the search.” (Microsoft)
Microsoft overview of range.find:
https://msdn.microsoft.com/en-us/vba/excel-vba/articles/range-find-method-excel
So with this approach, the user can use a loop based on a count of cells in your list to execute the .find method for each string.
Downside is (I assume) that this must be done on text within the excel application; also, I’ve not tested it to determine if the string has to inhabit the cell by itself (I don’t think this is a concern).
‘===================
Another suggestion that might be beneficial is to first bulk-rip all text from the .pdf with as little looping as possible (direct actions at the document object level). Then your find/return approach can be applied to the bulk text.
I did a similar activity when creating study notes from a professor’s PowerPoints; I grabbed all the text into a .txt file, then returned every sentence containing the instance of a list of strings.
‘=====================
A few caveats: I admit that I have not executed parsing at the sheer size of your project, so my suggestions might not be advantageous in practice.
Also, I have not done much work parsing .pdf documents, as I try to opt for anything that is .txt/excel app first, and engage it instead.
Good luck in your endeavors; I hope I was able to at least provide food for thought!
Sorry to post a quick, incomplete answer, but I think I can point you in a good direction.
Instead of making the system look up the two terms hundreds of billions of times, then make hundreds of billions of comparisons, put your search terms into an array, and the text of each page into a long string.Then it only has to do one look up and 200 comparisons per page.
'Dim your Clipboard functions
Public Declare PtrSafe Function OpenClipboard Lib "user32" (ByVal hwnd As Long) As Long
Public Declare PtrSafe Function EmptyClipboard Lib "user32" () As Long
Public Declare PtrSafe Function CloseClipboard Lib "user32" () As Long
'...
Dim objData As New MSForms.DataObject
Dim arrSearch() As String
Dim strTxt As String
'...
'Create array of search terms
For i = 2 To lastrow
arrSearch(i - 2) = Sheets("Sheet1").Cells(1, i)
Next i
For page = 0 To objPDDoc.GetNumPages - 1
'[Move each page into a new document. You already have that code]
'Clear clipboard
OpenClipboard (0&)
EmptyClipboard
CloseClipboard
'Copy page to clipboard
objApp.MenuItemExecute ("SelectAll")
objApp.MenuItemExecute ("Copy")
'You can also do this with the JavaScript object: objjso.ExecMenuItem("Item Name")
'You may have to insert a waiting function like sleep() here to wait for the action to complete
'Put data from clipboard into a string.
objData.GetFromClipboard
strTxt = objData.GetText 'Now you can search the entire content of the page at once, within memory
'Compare each element of the array to the string
For i = LBound(arrSearch) To UBound(arrSearch)
If InStr(1, strTxt, arrSearch(i)) > 0 Then
'[You found a match. Your code here]
End If
Next i
Next page
This is still cumbersome because you have to open each page in a new document. If there is a good way to determine which page you're on purely by text (such as the page number at the bottom of page a, followed immediately by the header at the top of page b) then you might look at copying the entire text of the document into one string, then using the clues from the text to decide which page to extract once you find a match. That would be a lot faster I believe.
Sub BatchRenameCS()
Dim objApp As Object
Dim objPDDoc As Object
Dim objjso As Object
Dim newPDF As Acrobat.CAcroPDDoc
Dim lastrow2 As Long
Dim strFileName As String
Dim Folder As String
Dim Page As Long
Dim Cell As Long
Dim PDFCharacterCount() As Long
Dim CharacterCount As Long
Dim i As Integer
Dim c As Integer
Dim x As Integer
Dim strSource As String
Dim strResult As String
Dim PDFCharacters As String
Dim PDFCharacters2 As String
Dim PDFPasteData() As String
Dim PasteDataPage As Integer
Dim LastRow As Long
Dim NewName As String
Dim NewNamePageNum As Integer
Dim Check()
Sheets("Sheet1").Range("C:D").ClearContents
strFileName = selectFile()
Folder = GetFolder()
'create array with pdf word count
Set objApp = CreateObject("AcroExch.App")
Set objPDDoc = CreateObject("AcroExch.PDDoc")
'AD.1 open file, if =false file is damage
If objPDDoc.Open(strFileName) Then
Set objjso = objPDDoc.GetJSObject
ReDim PDFCharacterCount(1 To objPDDoc.GetNumPages) As Long
For Page = 1 To objPDDoc.GetNumPages
PDFCharacters = ""
PDFCharacters2 = ""
For c = 0 To objjso.GetPageNumWords(Page - 1)
PDFCharacters = PDFCharacters & objjso.getPageNthWord(Page - 1, c)
Next c
For i = 1 To Len(PDFCharacters)
Select Case Asc(Mid(PDFCharacters, i, 1))
Case 48 To 57, 65 To 90, 97 To 122:
PDFCharacters2 = PDFCharacters2 & (Mid(PDFCharacters, i, 1))
Case Else
PDFCharacters2 = PDFCharacters2 & ""
End Select
Next
PDFCharacterCount(Page) = Len(PDFCharacters2)
Next Page
lastrow2 = Sheets("Sheet2").Cells(Rows.Count, "A").End(xlUp).Row
Page = 1
ReDim PDFPasteData(1 To objPDDoc.GetNumPages) As String
For Cell = 1 To lastrow2
strResult = ""
strSource = Sheets("Sheet2").Cells(Cell, 1).Text
PDFPasteData(Page) = PDFPasteData(Page) & " " & strSource
For i = 1 To Len(strSource)
Select Case Asc(Mid(strSource, i, 1))
Case 48 To 57, 65 To 90, 97 To 122:
strResult = strResult & (Mid(strSource, i, 1))
Case Else
strResult = strResult & ""
End Select
Next
CharacterCount = CharacterCount + Len(strResult)
If CharacterCount = PDFCharacterCount(Page) Then
CharacterCount = 0
Page = Page + 1
End If
Next Cell
ReDim Check(2, objPDDoc.GetNumPages)
LastRow = Sheets("Sheet1").Cells(Rows.Count, "A").End(xlUp).Row
For Each LookUpCell In Worksheets("Sheet1").Range("A2:A" & LastRow)
For PasteDataPage = 1 To objPDDoc.GetNumPages
If InStr(PDFPasteData(PasteDataPage), LookUpCell.Value) Then
Check(1, PasteDataPage) = Check(1, PasteDataPage) + 1
Check(2, PasteDataPage) = Check(2, PasteDataPage) & LookUpCell.Offset(0, 1).Value & Chr(10)
If FileExist(Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf") Then
Set newPDF = CreateObject("AcroExch.pdDoc")
NewName = Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf"
newPDF.Open (NewName)
newPDF.InsertPages newPDF.GetNumPages - 1, objPDDoc, PasteDataPage - 1, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Else
Set newPDF = CreateObject("AcroExch.PDDoc")
newPDF.Create
NewName = Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf"
newPDF.InsertPages -1, objPDDoc, PasteDataPage - 1, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
End If
End If
Next PasteDataPage
Next LookUpCell
x = 1
For PasteDataPage = 1 To objPDDoc.GetNumPages
If Check(1, PasteDataPage) <> 1 Then
Sheets("Sheet1").Cells(x, 3) = PasteDataPage
Sheets("Sheet1").Cells(x, 4) = Check(2, PasteDataPage)
x = x + 1
End If
Next PasteDataPage
End If
MsgBox "Done"
End Sub
Function FileExist(path As String) As Boolean
If Dir(path) <> vbNullString Then FileExist = True
End Function
Function selectFile()
Dim fd As FileDialog, fileName As String
On Error GoTo ErrorHandler
Set fd = Application.FileDialog(msoFileDialogFilePicker)
fd.AllowMultiSelect = False
If fd.Show = True Then
If fd.SelectedItems(1) <> vbNullString Then
fileName = fd.SelectedItems(1)
End If
Else
'Exit code if no file is selected
End
End If
'Return Selected FileName
selectFile = fileName
Set fd = Nothing
Exit Function
ErrorHandler:
Set fd = Nothing
MsgBox "Error " & Err & ": " & Error(Err)
End Function
Function GetFolder() As String
Dim fldr As FileDialog
Dim sItem As String
Set fldr = Application.FileDialog(msoFileDialogFolderPicker)
With fldr
.Title = "Select the Folder where you want you new PDFs to go"
.AllowMultiSelect = False
.InitialFileName = Application.DefaultFilePath
If .Show <> -1 Then GoTo NextCode
sItem = .SelectedItems(1)
End With
NextCode:
GetFolder = sItem
Set fldr = Nothing
End Function

Resources