Automation of reading data from multiple XML files - excel

I've been trying to improve my code for a while now, but I can't get any further on my own.
I have a function that is executed via button press.
As it is, it only works with one file.
In the best case I could click a folder and the function would loop through the subfolders and read all XML files from all subfolders and would then enter the desired words in a table.
It would help me if I could read multiple XML files from a subfolder and not just one. Maybe then I can get further and get the other part right by myself.
This is my code so far:
Private Sub CommandButtonImport_Click()
Dim fd As Office.FileDialog
Set fd = Application.FileDialog(msoFileDialogFilePicker)
With fd
.Filters.Clear
.Title = "Select a XML File"
.AllowMultiSelect = True
If .Show = True Then
xmlFileName = .SelectedItems(1)
Dim xDoc As Object
Set xDoc = CreateObject("MSXML2.DOMDocument")
xDoc.async = False: xDoc.ValidateOnParse = False
xDoc.Load (xmlFileName)
Set Products = xDoc.DocumentElement
row_number = 1
Rows("11:11").Select
Selection.Insert Shift:=xlDown, CopyOrigin:=xlFormatFromRightOrBelow
For Each Product In Products.ChildNodes
Range("C11").Value = Products.ChildNodes(0).ChildNodes(0).Attributes.Item(21).Value
Range("F11").Value = Products.ChildNodes(0).ChildNodes(0).Attributes.Item(0).Value
Range("G11").Value = Products.ChildNodes(0).ChildNodes(0).ChildNodes(1).ChildNodes(0).Attributes.Item(1).Value
Range("C:C").Columns.AutoFit
row_number = row_number + 1
Next Product
End If
End With
Add_Row_Number
End Sub
I am not sure but this might Help
Any input can help and I would be very grateful thanks in advance RomanWASD

Use the getFolder method of a FileSystemObject to create a folder object. Then use Subfolders property and Files property in a recursive manner.
Option Explicit
Private Sub CommandButtonImport_Click()
Dim fd As Office.FileDialog, folder As String, n As Long
Set fd = Application.FileDialog(msoFileDialogFolderPicker)
With fd
.Filters.Clear
.Title = "Select a Folder"
.AllowMultiSelect = True
If .Show = True Then
folder = .SelectedItems(1)
Else
Exit Sub
End If
End With
Dim fso As Object, ws As Worksheet, t0 As Single: t0 = Timer
Set ws = ActiveSheet ' or better as Thisworkbook.Sheets("Name")
Set fso = CreateObject("Scripting.FileSystemObject")
' recurse down folder tree
n = n + ScanFolder(ws, fso.GetFolder(folder))
ws.Range("C:C").Columns.AutoFit
MsgBox n & " files scanned", vbInformation, Format(Timer - t0, "0.0 secs")
End Sub
Function ScanFolder(ws As Worksheet, folder As Object) As Long
Dim subfolder As Object, file As Object, n As Long
For Each subfolder In folder.SubFolders
n = n + ScanFolder(ws, subfolder) ' recurse
Next
For Each file In folder.Files
If file.Type = "XML Document" Then
ParseFile ws, file
n = n + 1
End If
Next
ScanFolder = n ' number of files
End Function
Function ParseFile(ws As Worksheet, file As Object)
Dim xDoc As Object, Products As Object
Set xDoc = CreateObject("MSXML2.DOMDocument")
With xDoc
.async = False
.ValidateOnParse = False
.Load file.Path 'folder and filename
Set Products = .DocumentElement
End With
If Products Is Nothing Then
Else
ws.Rows("11:11").Insert Shift:=xlDown, CopyOrigin:=xlFormatFromRightOrBelow
With Products.ChildNodes(0).ChildNodes(0)
ws.Range("C11").Value = .Attributes(21).NodeValue
ws.Range("F11").Value = .Attributes(0).NodeValue
ws.Range("G11").Value = .ChildNodes(1).ChildNodes(0).Attributes(1).NodeValue
End With
End If
End Function

I was recently dealing with a similar problem. The fastest solution I tried was to use import XML in VBA, import it as table and load table into array.
Sub xmlintoarray()
Dim FSO As Object
Dim FSOfile As Object
Dim wb As Workbook
Dim path As String
path = "C:\Documents\Studypool"
Set FSO = CreateObject("Scripting.FileSystemObject")
Set FSOfile = FSO.GetFolder(path)
Set FSOfile = FSOfile.Files
Set wb = ThisWorkbook
For Each FSOfile In FSOfile
wb.Sheets.Add.Name = FSOfile.Name
wb.XmlImport FSOfile.path, Importmap:=Nothing, overwrite:=True, _
Destination:=ThisWorkbook.Sheets(FSOfile.Name).Range("$A$1")
Next
'here insert code to merge tables
'create array from merged table
'or create merge arrays together.
End Sub

Related

To count tags in XML from folders and subfolders

The below mention code can successfully count the required tags in an XML files and also provides name of file and tag count in excel sheet. I have just one query that currently it only reads the folder individually. However if there are 300 folders in a parent folder i need to select each folder every time. Is there anyway if anyone can amend the code so that if there are 300 folders in a parent folder in read each and every file (XML) in all subfolders. This will be very helpful for me.
I have tried to do it my own but this is beyond my capacity.
Option Explicit
Sub process_folder()
Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
Set wb = ThisWorkbook
Set ws = wb.Sheets(1)
ws.UsedRange.Clear
ws.Range("A1:C1") = Array("Source", "<Headline> Tag Count")
iRow = 1
' create FSO and regular expression pattern
Dim FSO As Object, ts As Object, regEx As Object, txt As String
Set FSO = CreateObject("Scripting.FileSystemObject")
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.pattern = "<Headline>(.*)</Headline>"
End With
'Opens the folder picker dialog to allow user selection
Dim myfolder As String, myfile As String, n As Long
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Please select a folder"
.Show
.AllowMultiSelect = False
If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
MsgBox "You did not select a folder"
Exit Sub
End If
myfolder = .SelectedItems(1) & "\" 'Assign selected folder to MyFolder
End With
'Loop through all files in a folder until DIR cannot find anymore
Application.ScreenUpdating = False
myfile = Dir(myfolder & "*.xml")
Do While myfile <> ""
iRow = iRow + 1
ws.Cells(iRow, 1) = myfile
' open file and read all lines
Set ts = FSO.OpenTextFile(myfolder & myfile)
txt = ts.ReadAll
ts.Close
' count pattern matches
Dim m As Object
If regEx.test(txt) Then
Set m = regEx.Execute(txt)
ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
ws.Cells(iRow, 3) = m.Count
Else
ws.Cells(iRow, 2) = "No tags"
ws.Cells(iRow, 3) = 0
End If
myfile = Dir 'DIR gets the next file in the folder
Loop
' results
ws.UsedRange.Columns.AutoFit
Application.ScreenUpdating = True
End Sub
Use Subfolders property of the parent folder object.
Option Explicit
Sub process_folder()
Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
Set wb = ThisWorkbook
Set ws = wb.Sheets(1)
ws.UsedRange.Clear
ws.Range("A1:B1") = Array("Source", "<Headline> Tag Count")
iRow = 1
' create FSO and regular expression pattern
Dim fso As Object, ts As Object, regEx As Object, txt As String
Set fso = CreateObject("Scripting.FileSystemObject")
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = "<Headline>(.*)</Headline>"
End With
'Opens the folder picker dialog to allow user selection
Dim myfolder, myfile As String, n As Long
Dim parentfolder As String, oParent
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Please select a folder"
.Show
.AllowMultiSelect = False
If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
MsgBox "You did not select a folder"
Exit Sub
End If
parentfolder = .SelectedItems(1) 'Assign selected folder to ParentFolder
End With
Set oParent = fso.getFolder(parentfolder)
' build collection or files
Dim colFiles As Collection
Set colFiles = New Collection
Call GetFiles(oParent, "xml", colFiles)
'Loop through all files in collection
Application.ScreenUpdating = False
For n = 1 To colFiles.Count
myfile = colFiles(n)
iRow = iRow + 1
ws.Cells(iRow, 1) = myfile
' open file and read all lines
Set ts = fso.OpenTextFile(myfile)
txt = ts.ReadAll
ts.Close
' count pattern matches
Dim m As Object
If regEx.test(txt) Then
Set m = regEx.Execute(txt)
ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
ws.Cells(iRow, 3) = m.Count
Else
ws.Cells(iRow, 2) = "No tags"
ws.Cells(iRow, 3) = 0
End If
' results
ws.UsedRange.Columns.AutoFit
Next
Application.ScreenUpdating = True
MsgBox colFiles.Count & " Files process", vbInformation
End Sub
Sub GetFiles(oFolder, ext, ByRef colFiles)
Dim f As Object
For Each f In oFolder.Files
If f.Name Like "*." & ext Then
colFiles.Add oFolder.Path & "\" & f.Name
End If
Next
' call recursively
For Each f In oFolder.subfolders
Call GetFiles(f, ext, colFiles)
Next
End Sub
Loop Through All Folders and Subfolders
In this post under the title Subfolder Paths to Collection, you can find the CollSubfolderPaths function, which will return the paths of all folders and their subfolders in a collection.
In your code, you could utilize it in the following way.
Sub process_folder()
' Preceding code...
Application.ScreenUpdating = False
' Return the paths of all folders and subfolders in a collection.
Dim MyFolders As Collection: Set MyFolders = CollSubfolderPaths(myfolder)
Dim Item As Variant
' Loop through the items in the collection.
For Each Item In MyFolders
' Get the first file.
myfile = Dir(Item & "\" & "*.xml")
'Loop through all files in a folder until DIR cannot find anymore
Do While myfile <> ""
' The same code...
Loop
Next Item
' results
ws.UsedRange.Columns.AutoFit
Application.ScreenUpdating = True
End Sub

Initial Folder and Multiple Select (FileDialog)

I have 2 problems regarding FileDialog.
The below code is to copy a file from another folder into another. But if it couldn't locate that file, it would open the FileDialog to select the file.
Problems:
When the FileDialog is opened, it would instead default to Documents and not the AltPath.
Is it possible to select 2 or more files with FileDialog without resorting to loop?
Dim fso As Object
Dim ws As Worksheet
Dim targetFile As Object
Dim S_Line As Long
Dim BasePath As String
Dim AltPath As String
Dim AltPath2 As String
Dim MainPath As String
Dim NewPath As String
Dim Position As String
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = ActiveSheet
BasePath = "Z:\Test\Folder\"
AltPath = "B:\Test\Folder\"
MainPath = BasePath & "File.xlsm"
NewPath = "D:\Folder\"
S_Line = 0
Position = UCase(Trim(ws.Cells(R_Line, 8).Value2))
If Position = "OK" Then
If Right(MainPath, 1) = "\" Then
MainPath = Left(MainPath, Len(MainPath) - 1)
End If
If fso.FileExists(MainPath) = True Then
fso.CopyFile Source:=MainPath, Destination:=NewPath
Else
Do While S_Line < 2
Set targetFile = Application.FileDialog(msoFileDialogFilePicker)
With targetFile
.Title = "Select a File"
.AllowMultiSelect = True
.InitialFolderName = AltPath
If .Show <> -1 Then
MsgBox "You didn't select anything"
Exit Sub
End If
AltPath2 = .SelectedItems(1)
End With
fso.CopyFile Source:=AltPath2, Destination:=NewPath
S_Line = S_Line + 1
Loop
End If
You did not answer my clarification question and your question is not so clear. Please, test the next code. It will open the dialog in the folder you need, and copy the selected items in the folder you want. I commented the lines being strictly connected to your environment (Position, S_Line), since I cannot deduce which are they and how to be used:
Sub copyFileSourceDest()
Dim fso As Object
Dim ws As Worksheet
Dim AltPath2 As String
Dim MainPath As String
Dim NewPath As String
Dim Position As String
Const AltPath As String = "B:\Test\Folder\"
Const BasePath As String = "Z:\Test\Folder\"
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = ActiveSheet
MainPath = BasePath & "File.xlsm"
NewPath = ThisWorkbook.path & "\NewFold\" ' "D:\Folder\"
'Position = UCase(Trim(ws.cells(R_Line, 8).Value2))
'If Position = "OK" Then
'the following sequence looks useless, since it is a FILE path:
'If Right(MainPath, 1) = "\" Then
' MainPath = left(MainPath, Len(MainPath) - 1)
'End If
If fso.FileExists(MainPath) = True Then
fso.CopyFile Source:=MainPath, Destination:=NewPath
Else
Dim item As Variant
With Application.FileDialog(msoFileDialogFilePicker)
.Title = "Select a File"
.AllowMultiSelect = True
'.InitialFolderName = AltPath 'it does not exist in this Dialog type
.InitialFileName = AltPath
If .Show <> -1 Then
MsgBox "You didn't select anything"
Exit Sub
End If
For Each item In .SelectedItems
AltPath2 = item
fso.CopyFile Source:=AltPath2, Destination:=NewPath
Next
End With
End If
'End If
End Sub
It will simple copy (all) files you select in the Dialog. Not understanding why necessary a loop as your code tried...

Create new excel file using VBA in a Word Doc

I'm trying to create a new excel workbook where I'll put in data from an array I generated.
I keep getting this error, "Run-time error '424': Object Required"
I'm using code that I found online at this link.
Set NewBook = app.Workbooks.Add
With NewBook
.Title = "Cable Numbers"
.Subject = "Documentation"
'parentFld is a file path where a previous document is being used
.SaveAs FileName:=parentFld & "CableNumbers.xlsx"
End With
The workbook isn't even being created. How can I create a new workbook if my macro is in a word document?
Here's some more of my code for reference:
Function SelectFile()
With Application.FileDialog(msoFileDialogFilePicker)
' show the file picker dialog box
If .Show <> 0 Then
SelectFile = .SelectedItems(1)
End If
End With
End Function
Function PrintValue(PrintRow As Long, PrintCol As Long, ArrayName As ArrayList, ArrayIndex As Long)
'Probably doesn't work. I want to print the value from the array into a spreadsheet's first column
app.ActiveSheet.Cells(PrintRow, PrintCol) = ArrayName(ArrayIndex)
End Function
Sub getFirstColumn()
' Gets the first column of all tables in a given file
'Declare variables
Dim wbName As String
Dim TableCount, tableNum As Integer 'This is the number of tables in thedocument minus 1 since loop starts at 0'
Dim dataCell As Variant
Dim printRowCounter, indexCounter As Long
Dim docO As Document
Dim app As Object
Dim fileObj, fldObj As New FileSystemObject
Set fileObj = CreateObject("Scripting.FileSystemObject")
Set fldObj = CreateObject("Scripting.FileSystemObject")
Set app = CreateObject("Excel.Application")
printRowCounter = 1
indexCounter = 0
'Choose a file to get data from. This is a ms Word doc
MsgBox "Choose a workbook file. This file must have a table in it."
wbPath = SelectFile()
wbName = CStr(fileObj.GetFileName(wbPath))
Documents(wbName).Activate
parentFld = fldObj.GetParentFolderName(wbPath)
TableCount = ActiveDocument.Tables.Count
Dim firstColArray As ArrayList
Set firstColArray = New ArrayList
For tableNum = 1 To TableCount
RowCount = ActiveDocument.Tables(tableNum).Range.Rows.Count
For rowNum = 1 To RowCount
ActiveDocument.Tables(tableNum).Cell(Row:=rowNum, Column:=1).Select 'For testing purposes
cellContent = ActiveDocument.Tables(tableNum).Cell(Row:=rowNum, Column:=1)
If cellContent = "" Then
GoTo Skip
Else
dataCell = ActiveDocument.Tables(tableNum).Cell(Row:=rowNum, Column:=1)
firstColArray.Add dataCell
End If
Skip:
Next rowNum
Next tableNum
MsgBox "All data has been collected." & vbNewLine & "Creating an excel file..."
'Creating an excel workbook and inputing data
Set NewBook = app.Workbooks.Add
With NewBook
.Title = "Numbers"
.Subject = "Documentation"
.SaveAs.Close FileName:=parentFld & "\Numbers.xlsx"
End With
app.Workbooks("Numbers.xlsx").Activate
For Each thing In firstColArray
app.ActiveSheet.Cells(printRowCounter, 1) = firstColArray(indexCounter)
printRowCounter = printRowCounter + 1
indexCounter = indexCounter + 1
Next thing
End Sub
Function FileExists(FName As String) As Boolean
' Returns True if the file FName exists, else False
Dim fs
Set fs = CreateObject("Scripting.FileSystemObject")
FileExists = fs.FileExists(FName)
Set fs = Nothing
End Function
Set oxlApp = CreateObject("Excel.Application")
excelStatus = FileExists(CStr(newExcelPath))
If excelStatus = True Then 'If the file exists, append to it
excelStatus = "append"
ElseIf excelStatus = False Then 'If the file doesn't exist, create a new file
excelStatus = "new"
Set NewBook = oxlApp.Workbooks.Add
With NewBook
.Title = "Cable Numbers"
.Subject = "Documentation"
.SaveAs fileName:=newExcelPath
.Close
End With
End If
Set oxlWbk = oxlApp.Workbooks.Open(fileName:=newExcelPath) 'Open
If excelStatus = "new" Then 'If the excel is new, select the first sheet
'oxlWbk.ActiveWorkbook.Sheets(1).Select
ElseIf excelStatus = "append" Then 'If the excel is old, add a sheet to the end and select it
oxlApp.ActiveWorkbook.Sheets.Add After:=oxlWbk.Sheets(oxlWbk.Sheets.Count)
oxlApp.ActiveWorkbook.Sheets(oxlWbk.Sheets.Count).Select
End If

Merge excel files into a new excel file based on filename

I have a folder containing about 500-600 excel files from a script I have made where the file names end up like this
101a12345.xlsx
101a67899.xlsx
102a12345.xlsx
102a78999.xlsx
The file names follow that patern, 101a, 102a etc. What i want to do is merge those based on that paternt into 1 excel file. Therefore, the 101a12345.xlsx and 101a67899.xlsx should merge into an 101aMaster.xlsx. All excel files are single sheet.
I have found a sample code here which i am trying to implement: How to merge multiple workbooks into one based on workbooks names
Taken from the link above:
Sub test(sourceFolder As String, destinationFolder As String)
Const TO_DELETE_SHEET_NAME As String = "toBeDeleted"
'------------------------------------------------------------------
Dim settingSheetsNumber As Integer
Dim settingDisplayAlerts As Boolean
Dim dict As Object
Dim wkbSource As Excel.Workbook
Dim wks As Excel.Worksheet
Dim filepath As String
Dim code As String * 4
Dim wkbDestination As Excel.Workbook
Dim varKey As Variant
'------------------------------------------------------------------
'Change [SheetsInNewWorkbook] setting of Excel.Application object to
'create new workbooks with a single sheet only.
With Excel.Application
settingDisplayAlerts = .DisplayAlerts
settingSheetsNumber = .SheetsInNewWorkbook
.SheetsInNewWorkbook = 1
.DisplayAlerts = False
End With
Set dict = VBA.CreateObject("Scripting.Dictionary")
filepath = Dir(sourceFolder)
'Loop through each Excel file in folder
Do While filepath <> ""
If VBA.Right$(filepath, 5) = ".xlsx" Then
Set wkbSource = Excel.Workbooks.Open(sourceFolder & filepath)
Set wks = wkbSource.Worksheets(1)
code = VBA.Left$(wkbSource.Name, 4)
'If this code doesn't exist in the dictionary yet, add it.
If Not dict.exists(code) Then
Set wkbDestination = Excel.Workbooks.Add
wkbDestination.Worksheets(1).Name = TO_DELETE_SHEET_NAME
Call dict.Add(code, wkbDestination)
Else
Set wkbDestination = dict.Item(code)
End If
Call wks.Copy(Before:=wkbDestination.Worksheets(1))
wkbDestination.Worksheets(1).Name = VBA.Mid$(filepath, 6)
Call wkbSource.Close(False)
End If
filepath = Dir
Loop
'Save newly created files.
For Each varKey In dict.keys
Set wkbDestination = dict.Item(varKey)
'Remove empty sheet.
Set wks = Nothing
On Error Resume Next
Set wks = wkbDestination.Worksheets(TO_DELETE_SHEET_NAME)
On Error GoTo 0
If Not wks Is Nothing Then wks.Delete
Call wkbDestination.SaveAs(Filename:=destinationFolder & varKey & ".xlsx")
Next varKey
'Restore Excel.Application settings.
With Excel.Application
.DisplayAlerts = settingDisplayAlerts
.SheetsInNewWorkbook = settingSheetsNumber
End With
End Sub
However, this code opens all workbooks and at about 60-70 open excel files i receive an error: Run-time Error '1004' - Method 'Open' of object 'Workbooks' failed.
is there a way to make this code work?
Excel version is pro plus 2016.
Merge Workbooks
It will open the first of each files starting with the unique first four characters, and copy the first worksheet of each next opened file to the first opened file and finally save it as a new file.
There need not be only 2 files (starting with the same four characters) and there can only be one.
Adjust the values in the constants section.
Option Explicit
Sub mergeWorkbooks()
Const sPath As String = "F:\Test\2021\67077087\"
Const sPattern As String = "*.xlsx"
Const dPath As String = "F:\Test\2021\67077087\Destination\"
Const dName As String = "Master.xlsx"
Const KeyLen As Long = 4
Dim PatLen As Long: PatLen = Len(sPattern)
Dim fName As String: fName = Dir(sPath & sPattern)
Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
Do While Len(fName) > 0
dict(Left(fName, KeyLen)) = Empty
fName = Dir
Loop
Application.ScreenUpdating = False
On Error Resume Next
MkDir dPath
On Error GoTo 0
Dim wb As Workbook
Dim Key As Variant
Dim wsLen As Long
For Each Key In dict.Keys
Set wb = Nothing
fName = Dir(sPath & Key & sPattern)
Do While Len(fName) > 0
wsLen = Len(fName) - PatLen - KeyLen + 2
If wb Is Nothing Then
Set wb = Workbooks.Open(sPath & fName)
wb.Worksheets(1).Name = Mid(fName, KeyLen, wsLen)
'Debug.Print wb.Name
Else
With Workbooks.Open(sPath & fName)
'Debug.Print .Name
.Worksheets(1).Name = Mid(fName, KeyLen, wsLen)
.Worksheets(1).Copy After:=wb.Sheets(wb.Sheets.Count)
.Close False
End With
End If
fName = Dir
Loop
Application.DisplayAlerts = False
wb.SaveAs dPath & Key & dName ', xlOpenXMLWorkbook
Application.DisplayAlerts = True
wb.Close False
Next Key
Application.ScreenUpdating = True
End Sub
Test for Names
Use the following to print all names in the active workbook to the VBE Immediate window (CTRL+G).
Sub listNames()
Dim nm As Name
For Each nm In ActiveWorkbook.Names
Debug.Print nm.Name
Next nm
End Sub
First, check if the names (if any) are used in some formulas.
Use the following to delete all names in the active workbook.
Sub deleteNames()
Dim nm As Name
For Each nm In ActiveWorkbook.Names
nm.Delete
Next nm
End Sub
Untested but here's one approach where you don't have multiple files open at the same time:
Sub test(sourceFolder As String, destinationFolder As String)
Dim dict As Object, code As String
Dim colFiles As Collection, f, k, wbNew As Workbook, wb As Workbook
Set dict = VBA.CreateObject("Scripting.Dictionary")
'ensure trailing "\"
EnsureSlash sourceFolder
EnsureSlash destinationFolder
'get a collection of all xlsx files in the source folder
Set colFiles = allFiles(sourceFolder, "*.xlsx")
If colFiles.Count = 0 Then Exit Sub 'no files
'organize the files into groups according to first four characters of the filename
For Each f In colFiles
code = Left(f.Name, 4)
If Not dict.exists(code) Then Set dict(code) = New Collection 'need new group?
dict(code).Add f 'add the file to the collection for this code
Next f
'loop over the groups
For Each k In dict
Set colFiles = dict(k) 'the files for this code
Set wbNew = Workbooks.Add(Template:=xlWBATWorksheet) 'one sheet
For Each f In colFiles
With Workbooks.Open(f.Path)
.Worksheets(1).Copy after:=wbNew.Sheets(wbNew.Sheets.Count)
wbNew.Sheets(wbNew.Sheets.Count).Name = Replace(f.Name, ".xlsx", "")
.Close False
End With
Next f
Application.DisplayAlerts = False
wbNew.Sheets(1).Delete 'remove the empty sheet
Application.DisplayAlerts = True
wbNew.SaveAs destinationFolder & k & ".xlsx"
wbNew.Close
Next k
End Sub
'Return all files in `sourceFolder` which match `pattern`
' as a collection of file objects
Function allFiles(sourceFolder As String, pattern As String) As Collection
Dim col As New Collection, f
For Each f In CreateObject("scripting.filesystemobject").getfolder(sourceFolder).Files
If f.Name Like pattern Then col.Add f
Next f
Set allFiles = col
End Function
'Utility - check a path ends in a backslash
' use Application.PathSeparator if needs to be cross-platform
Sub EnsureSlash(ByRef f As String)
If Right(f, 1) <> "\" Then f = f & "\"
End Sub

Get all files in a folder and subfolders

I want to do the following:
Prompt user to choose a folder
Loop through folder (and subfolders if they exist)
Get all .xlsx files
Get specific column from those files (all have the same structure) and combine data from that column
I get all subfolders and all files but I get 5 times as much as I should.
L column is where I get all my data and Insert into Identical Master File (into L column).
I have 5 files - I should get 5 items in the last column, I simply add new folder in it, and same files(copied), so now I should get 10 items in the last column, instead I get 50.
Sub LoopThroughFolder()
Dim MyFile As String, Str As String, MyDir As String, Wb As Workbook
Dim Rws As Long, Rng As Range, r As Range
Set Wb = ThisWorkbook: Wb.Sheets(2).Range("L:L").ClearContents
Dim FSO As Object, fld As Object, Fil As Object
Dim wbkCS As Workbook
Dim FolderPath As String
Dim fsoFile As Object
Dim fsoFol As Object
Dim fileName As String
Dim sWb As Workbook
Dim MatchingColumn As Range
Dim MatchingRowNb As Long
MsgBox "Choose a folder: "
Application.DisplayAlerts = False
With Application.FileDialog(msoFileDialogFolderPicker)
.InitialFileName = "C:\Users\"
.AllowMultiSelect = False
If .Show <> -1 Then
MsgBox "No folder selected! Exiting script."
Exit Sub
End If
FolderPath = .SelectedItems(1)
End With
If Right(FolderPath, 1) <> "\" Then
FolderPath = FolderPath + "\"
End If
Set FSO = CreateObject("Scripting.FileSystemObject")
Set fld = FSO.GetFolder(FolderPath)
If FSO.FolderExists(fld) Then
For Each fsoFol In FSO.GetFolder(FolderPath).SubFolders
For Each fsoFile In fsoFol.Files
If Mid(fsoFile.Name, InStrRev(fsoFile.Name, ".") + 1) = "xlsx" Then
fileName = fsoFile.Name
Application.ScreenUpdating = False
MyDir = FolderPath 'fld
fileName = Dir(MyDir & "*.xlsx")
ChDir MyDir
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Do While fileName <> ""
Set sWb = Workbooks.Open(fileName)
With sWb.Worksheets(2)
Rws = .Cells(Rows.Count, 12).End(xlUp).Row
Set Rng = Range(.Cells(5, 1), .Cells(Rws, 12))
End With
With Wb.Worksheets(2)
Set MatchingColumn = .Range(.Cells(5, 1), .Cells(.Rows.Count, 1).End(xlUp))
For Each r In Rng.Rows
If r.Cells(1, 1).Value2 <> vbNullString Then 'Ignoring empty rows
If r.Rows.Hidden = False Then
'We find the row where the Ids matche
MatchingRowNb = Application.Match(r.Cells(1, 1).Value2, MatchingColumn, False)
'We add the current value in the cell with the new value comming from the other file
.Cells(4 + MatchingRowNb, 12).Value2 = .Cells(4 + MatchingRowNb, 12).Value2 + r.Cells(1, 12).Value2
End If
End If
Next
End With
sWb.Close SaveChanges:=True
Application.DisplayAlerts = True
fileName = Dir()
Loop
End If
Next
Next
End If
End Sub
You're using both FSO and Dir() to loop over the files, so that's why you're getting the same files over and over.
When your sub ends up doing a bunch of things (particularly when one thing is nested in another, and so on) then it's best to consider splitting it up, so you can concentrate on the one thing that's giving you problems, without all the other things "getting in the way".
Here's a stripped-down version to show what I mean. It works but for clarity doesn't have your file processing code.
Option Explicit
Sub LoopThroughFolder()
Dim Wb As Workbook, sWb As Workbook
Dim FolderPath As String
Dim colFiles As Collection, f
'get a folder
FolderPath = ChooseFolder()
If Len(FolderPath) = 0 Then
MsgBox "No folder selected: exiting"
Exit Sub
End If
'find all excel files in subfolders of that folder
Set colFiles = FileMatches(FolderPath, "*.xlsx")
If colFiles.Count = 0 Then
MsgBox "No xlsx files found"
Exit Sub
End If
Set Wb = ThisWorkbook
Wb.Sheets(2).Range("L:L").ClearContents
'loop over the files we found
For Each f In colFiles
Set sWb = Workbooks.Open(f.Path)
'process the file here
sWb.Close SaveChanges:=True
Next f
End Sub
Function ChooseFolder() As String
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Choose a folder"
.InitialFileName = "C:\Users\"
.AllowMultiSelect = False
If .Show = -1 Then
ChooseFolder = .SelectedItems(1)
If Right(ChooseFolder, 1) <> "\" Then _
ChooseFolder = ChooseFolder + "\"
End If
End With
End Function
'Return a collection of file objects given a starting folder and a file pattern
' e.g. "*.txt"
'Pass False for last parameter if don't want to check subfolders
Function FileMatches(startFolder As String, filePattern As String, _
Optional subFolders As Boolean = True) As Collection
Dim fso, fldr, f, subFldr
Dim colFiles As New Collection
Dim colSub As New Collection
Set fso = CreateObject("scripting.filesystemobject")
colSub.Add startFolder
Do While colSub.Count > 0
Set fldr = fso.getfolder(colSub(1))
colSub.Remove 1
For Each f In fldr.Files 'get files in folder
If UCase(f.Name) Like UCase(filePattern) Then colFiles.Add f
Next f
If subFolders Then 'get subfolders for processing?
For Each subFldr In fldr.subFolders
colSub.Add subFldr.Path
Next subFldr
End If
Loop
Set FileMatches = colFiles
End Function

Resources