Replace and save remaining string in an array - excel

I want to remove predefined parts of the strings in the following table and save the values in an array. For some reason I get an error stating that I'm outside of the index. The lengths of the strings in the table can vary.
Sub New_1()
Dim i, j, k As Integer
Dim Endings As Variant
k = 0
Endings = Array("/A", "/BB", "/CCC", "/DDDD", "/EEEEE")
Dim ArrayValues() As Variant
With Worksheets("table1")
Dim lastRow As Long: lastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
ReDim ArrayValues(lastRow)
For i = lastRow To 1 Step -1
For j = 0 To UBound(Endings)
ArrayValues(k) = Replace(.Range("A" & i), Endings(j), "")
k = k + 1
Next j
Next i
End With
End Sub

You're getting out of bounds because your ArrValues is filled up after not even 3 iterations of your "i" since you're adding up your k every j iterations
If you want an array of the cleaned up cells do this instead:
Sub New_1()
Dim i As Integer, j As Integer, k As Integer
Dim Endings As Variant
Dim ArrayValues() As Variant
Dim lastRow As Long
Endings = Array("/A", "/BB", "/CCC", "/DDDD", "/EEEEE")
With Worksheets("Blad6")
lastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
ReDim ArrayValues(1 To lastRow) 'Then you don't have an empty ArrayValues(0)
For i = lastRow To 1 Step -1
For j = 0 To UBound(Endings)
If j = 0 Then
ArrayValues(i) = Replace(.Range("A" & i), Endings(j), "")
Else
ArrayValues(i) = Replace(ArrayValues(i), Endings(j), "")
End If
Next j
Next i
'Use Array here
End With
End Sub

If your intent is to create an array in which everything after the / is removed, this might be simpler, using the Split function; and also faster by storing the data to be split in a VBA array, in iterating through that array instead of the worksheet cells.
Option Explicit
Sub New_1()
'in VBA, Long is marginally more efficient than Integer
Dim k As Long, v As Variant
Dim dataArr As Variant
Dim ArrayValues() As Variant
With Worksheets("SHEET7")
'faster to loop through VBA array than worksheet cells
'Note that this will be a 2D array with dimensions starting at 1, not 0
dataArr = Range(.Cells(1, 1), .Cells(.Rows.Count, 1).End(xlUp))
End With
'This might be simpler
ReDim ArrayValues(1 To UBound(dataArr, 1))
k = 0
For Each v In dataArr
k = k + 1
ArrayValues(k) = Split(v, "/")(0)
Next v
End Sub

Related

Getting the maximum value of a specific column in a 2d array [duplicate]

I use the code hereunder to calculate max values as described in this post (vba max value of group of values). The code works great but once I have more than 65k lines I get a data type mismatch when trying to pase the array:
sht.Range(Cells(1, lColumn), Cells(last.Row, lColumn)).Value = Application.Index(groupsArray, , lColumn)
Could somebody help me to slice the array in chunks. I have tried to get it working myself but without any luck.
Sub FillGroupsMax()
Dim lColumn As Long
Dim sht As Worksheet
Dim groupsArray As Variant 'array with all group infomation
Dim groupsSeen As Variant 'array with group infomation already seen
Application.ScreenUpdating = False 'stop screen updating makes vba perform better
Set sht = ThisWorkbook.Worksheets("import")
Set last = sht.Range("A:A").Find("*", Cells(1, 1), searchdirection:=xlPrevious) 'last cell with value in column A
lColumn = sht.Cells(1, Columns.Count).End(xlToLeft).Column
groupsArray = sht.Range(Cells(1, 1), Cells(last.Row, lColumn))
'collect all the information on the Sheet into an array
'Improves performance by not visiting the sheet
For dRow = 2 To last.Row 'for each of the rows skipping header
'check if group as already been seen
If inArrayValue(Cells(dRow, 1).Value, groupsSeen) > 0 Then
'if it has been seen/calculated attribute value
'Cells(dRow, 4).Value = inArrayValue(Cells(dRow, 1).Value, groupsSeen)
groupsArray(dRow, lColumn) = inArrayValue(Cells(dRow, 1).Value, groupsSeen)
Else
'if it hasn't been seen then find max
'Cells(dRow, 4).Value = getMax(Cells(dRow, 1).Value, groupsArray)
groupsArray(dRow, lColumn) = getMax(Cells(dRow, 1).Value, groupsArray, lColumn)
'array construction from empty
If IsEmpty(groupsSeen) Then
ReDim groupsSeen(0)
'groupsSeen(0) = Array(Cells(dRow, 1).Value, Cells(dRow, 4).Value)
groupsSeen(0) = Array(groupsArray(dRow, 1), groupsArray(dRow, lColumn))
'attribute value to array
Else
ReDim Preserve groupsSeen(0 To UBound(groupsSeen) + 1)
groupsSeen(UBound(groupsSeen)) = Array(groupsArray(dRow, 1), groupsArray(dRow, lColumn))
End If
End If
Next
sht.Range(Cells(1, lColumn), Cells(last.Row, lColumn)).Value = Application.Index(groupsArray, , lColumn)
'reactivate Screen updating
Application.ScreenUpdating = True
End Sub
Function getMax(group As String, groupsArray As Variant, lColumn As Long) As Double
'for each in array
For n = 1 To UBound(groupsArray)
'if its the same group the Max we seen so far the record
If groupsArray(n, 1) = group And groupsArray(n, lColumn - 1) > maxSoFar Then
maxSoFar = groupsArray(n, lColumn - 1)
End If
Next
'set function value
getMax = maxSoFar
End Function
Function inArrayValue(group As String, groupsSeen As Variant) As Double
'set function value
inArrayValue = 0
'if array is empty then exit
If IsEmpty(groupsSeen) Then Exit Function
'for each in array
For n = 0 To UBound(groupsSeen)
'if we find the group
If groupsSeen(n)(0) = group Then
'set function value to the Max value already seen
inArrayValue = groupsSeen(n)(1)
'exit function earlier
Exit Function
End If
Next
End Function
You can write a helper function to use instead of Application.Index
Bonus - it will be much faster than using Index (>5x)
Sub Tester()
Dim arr, arrCol
arr = Range("A2:J80000").Value
arrCol = GetColumn(arr, 5) '<< get the fifth column
Range("L2").Resize(UBound(arrCol, 1), 1).Value = arrCol
End Sub
'extract a single column from a 1-based 2-D array
Function GetColumn(arr, colNumber)
Dim arrRet, i As Long
ReDim arrRet(1 To UBound(arr, 1), 1 To 1)
For i = 1 To UBound(arr, 1)
arrRet(i, 1) = arr(i, colNumber)
Next i
GetColumn = arrRet
End Function
EDIT - since QHarr asked about timing here's a basic example
Sub Tester()
Dim arr, arrCol, t, i as long
arr = Range("A2:J80000").Value
t = Timer
For i = 1 to 100
arrCol = GetColumn(arr, 5) '<< get the fifth column
Next i
Debug.print Timer - t '<<# of seconds for execution
End Sub
Below, whilst not as tidy as could be, is a way to process an array in chunks and Index to access a column and write out to the sheet.
I populated two columns (A:B) with data. Both had 132,000 rows, populated incrementally, with values from 1 to 132,000 in each column for my test run.
You can fiddle with cutOff to get the chunk size just below the point where the fail happens.
The code below is simply to demonstrate the principle of looping in batches, upto the set cutoff in each batch, until all rows have been processed.
Option Explicit
Public Sub WriteArrayToSheet()
Dim wb As Workbook
Dim ws As Worksheet
Set wb = ThisWorkbook
Set sht = wb.Worksheets("Sheet1") 'change as appropriate
Dim myArr() 'dynamic array
myArr = sht.Range("A1").CurrentRegion.Value 'you may want a more robust method
Dim cutOff As Long 'the max value - what ever it is before error occurs
cutOff = 1000
Dim totalRows As Long 'total rows in array read in from sheet
totalRows = UBound(myArr, 1)
Dim totalArraysNeeded As Long
'Determine how many lots of cutOff chunks there are in the total number of array rows
totalArraysNeeded = Application.WorksheetFunction.Ceiling(totalRows / cutOff, 1)
Dim rotations As Long 'number of times to loop original array to handle all rows
Dim rowCountTotal As Long
Dim rowCount As Long
Dim tempArr() 'this will hold the chunk of the original array
Dim rowCounter As Long
Dim lastRow As Long
Dim nextRow As Long
Dim i As Long
Dim j As Long
Dim numRows As Long
rotations = 1
Do While rotations < totalArraysNeeded
If rotations < totalArraysNeeded - 1 Then
ReDim tempArr(1 To cutOff, 1 To UBound(myArr, 2)) 'size chunk array
numRows = cutOff
Else
numRows = totalRows - rowCountTotal
ReDim tempArr(1 To numRows, 1 To UBound(myArr, 2)) 'size chunk array
End If
For i = 1 To numRows
rowCount = 1 'rows in this chunk looped
rowCountTotal = rowCountTotal + 1 'rows in original array looped
For j = LBound(myArr, 2) To UBound(myArr, 2)
tempArr(i, j) = myArr(rowCountTotal, j)
Next j
rowCount = rowCount + 1
Next i
With sht
lastRow = .Cells(.Rows.Count, "E").End(xlUp).Row 'Column where I am writing the sliced column out to
End With
If lastRow = 1 Then
nextRow = 1
Else
nextRow = lastRow + 1
End If
sht.Range("E" & nextRow).Resize(UBound(tempArr, 1), 1) = Application.Index(tempArr, , 1) 'write out to sheet
rotations = rotations + 1
Loop
End Sub
As #Tim suggested, the best way to slice a large array is use a loop to copy the column.
Though in your case, most of the processing time is spent on computing the maximum since your code is using a nested loop.
If you want to reduce significantly the processing time, then use a dictionary:
Sub Usage
GetMaxByGroupTo _
sourceGroups := ThisWorkbook.Range("Sheet1!A2:A100"), _
sourceValues := ThisWorkbook.Range("Sheet1!B2:B100"), _
target := ThisWorkbook.Range("Sheet1!C2")
End Sub
Sub GetMaxByGroupTo(sourceGroups As Range, sourceValues As Range, target As Range)
Dim dict As Object, groups(), values(), r As Long, max
Set dict = CreateObject("Scripting.Dictionary")
groups = sourceGroups.Value2
values = sourceValues.Value2
' store the maximum value of each group in a dictionary for an efficient lookup '
For r = Lbound(groups) to Ubound(groups)
max = dict(groups(r, 1))
If VarType(max) And values(r, 1) <= max Then Else dict(groups(r, 1)) = values(r, 1)
Next
' build and copy the result array to the sheet '
For r = Lbound(groups) to Ubound(groups)
values(r, 1) = dict(groups(r, 1))
Next
target.Resize(Ubound(groups), 1).Value2 = values
End Sub

Compare two sheets and highlight unmatched rows using unique ID only

I want to match rows from two different sheets and highlight only in the first column of the unmatched row or better still copy the unmatched rows into a new sheet. The code should compare the rows of the two Sheets and color the new rows in the second sheet. Sheet2 (say Jan 2020) contains more rows than Sheet1 (Dec 2019) as its the recently updated sheet and they both contain rows of over 22k with both having unique ID as the first column.
My below code tries to highlight all the unmatching cells and takes longer time to finish. What I wish is for the code to just color the unmatched in column A (the vb.Red) only(since its the unique ID) while ignoring the rest of the column/cells (vb.Yellow) and or if possible copy the highlighted rows into a new sheet.
Sub RunCompare()
Call compareSheets("Sheet1", "Sheet2") 'compareSheets("2019-01 Database", "2019-02 Database")
End Sub
Sub compareSheets(shtSheet1 As String, shtSheet2 As String)
Dim c As Integer, j As Integer, i As Integer, mydiffs As Integer, cnt1 As Integer, cnt2 As Integer
Dim noexist As Integer
cnt2 = Worksheets("Sheet2").Cells.SpecialCells(xlCellTypeLastCell).Row
cnt1 = Worksheets("Sheet1").Cells.SpecialCells(xlCellTypeLastCell).Row
'For each cell in sheet2 that is not the same in Sheet1, color it yellow
For i = 1 To cnt2
For j = 1 To cnt1
If ActiveWorkbook.Worksheets(shtSheet2).Cells(i, 1).Value = ActiveWorkbook.Worksheets(shtSheet1).Cells(j, 1).Value Then
For c = 2 To 22
If Not ActiveWorkbook.Worksheets(shtSheet2).Cells(i, c).Value = ActiveWorkbook.Worksheets(shtSheet1).Cells(j, c).Value Then
ActiveWorkbook.Worksheets(shtSheet2).Cells(i, c).Interior.Color = vbYellow
mydiffs = mydiffs + 1
End If
Next
Exit For
End If
If j = cnt1 Then
ActiveWorkbook.Worksheets(shtSheet2).Cells(i, 1).Interior.Color = vbRed
End If
Next
Next
'Display a message box to demonstrate the differences and if there is a new entry on the second sheet
'MsgBox mydiffs & ":differences found, " & noexist & ":no exist", vbInformation
ActiveWorkbook.Sheets(shtSheet2).Select
End Sub
Let's simplify the task and do it step by step.
This is how the input in the two sheets can look like:
Then, we may consider reading these and saving them to an array:
Set rangeA = ThisWorkbook.Worksheets(1).Range("A1:Z1")
Set rangeB = ThisWorkbook.Worksheets(2).Range("A1:ZZ1")
Dim arrayA As Variant
Dim arrayB As Variant
With Application
arrayA = .Transpose(.Transpose(rangeA))
arrayB = .Transpose(.Transpose(rangeB))
End With
Looping between the data in the two arrays is quite fast in vba. The writing to the third worksheet is done only once the two values from the two arrays match:
Dim myValA As Variant
Dim myValB As Variant
Dim currentRow As Long: currentRow = 1
For Each myValA In arrayA
For Each myValB In arrayB
If myValA = myValB Then
ThisWorkbook.Worksheets(3).Cells(currentRow, 1) = myValA
currentRow = currentRow + 1
End If
Next
Next
This is the result in the third worksheet, all matching values are in a single row:
This is how the whole code looks like:
Sub CompareTwoRanges()
Dim rangeA As Range
Dim rangeB As Range
Set rangeA = ThisWorkbook.Worksheets(1).Range("A1:Z1")
Set rangeB = ThisWorkbook.Worksheets(2).Range("A1:ZZ1")
Dim arrayA As Variant
Dim arrayB As Variant
With Application
arrayA = .Transpose(.Transpose(rangeA))
arrayB = .Transpose(.Transpose(rangeB))
End With
Dim myValA As Variant
Dim myValB As Variant
Dim currentRow As Long: currentRow = 1
For Each myValA In arrayA
For Each myValB In arrayB
If myValA = myValB Then
ThisWorkbook.Worksheets(3).Cells(currentRow, 1) = myValA
currentRow = currentRow + 1
End If
Next
Next
End Sub
Note - there will be another performance bonus, if the results are written to an array and then written from the array to the worksheet. Thus the writing would happen only once. This is the change, that needs to be implemented in the code, after the array declarations:
Dim myValA As Variant
Dim myValB As Variant
Dim resultArray() As Variant
ReDim Preserve resultArray(2 ^ 20)
Dim i As Long: i = 0
For Each myValA In arrayA
For Each myValB In arrayB
If myValA = myValB Then
resultArray(i) = myValA
i = i + 1
End If
Next
Next
ReDim Preserve resultArray(i)
ThisWorkbook.Worksheets(3).Cells(1, 1).Resize(UBound(resultArray)) = Application.Transpose(resultArray)
when you get cell value, it spends time.
so, you can target Range transfer 2d Variant
Dim endRow AS Long
Dim olderRange AS Range
Dim olderVariant AS Variant
endRow = olderSheet.cells(rows.count,1).end(xlup).row
Set olderRange = olderSheet.Range(olderSheet.Cells(startRow, startCol), olderSheet.Cells(endRow, endCol))
'Transfer
olderVariant = olderRange
For currentRow = 1 to UBound(olderVariant, 1)
'Loop
'if you want change real Cell value Or interior
'add row Or Col weight
if olderVariant(currentRow, currentCol) = newerVariant(currentRow, currentCol) THen
newerSheet.Cells(currentRow+10,currentCol+10).interior.colorIndex = 3
End if
Next currentRow
In case anyone has the same kind of problem, I have found an easier way to do it. Providing your sheet2 is the comparison sheet:
Dim Ary1 As Variant, Ary2 As Variant
Dim r As Long
Ary1 = Sheets("Sheet1").UsedRange.Value2
Ary2 = Sheets("Sheet2").UsedRange.Value2
With CreateObject("scripting.dictionary")
For r = 1 To UBound(Ary1)
.Item(Ary1(r, 1)) = Empty
Next r
For r = 1 To UBound(Ary2)
If Not .Exists(Ary2(r, 1)) Then Sheets("Sheet2").Cells(r, 1).Interior.Color = vbRed
Next r
End With

Remove rows from a 2d array if value in column is empty

I have a large table of lab measurement logs, which I work with using arrays.
(Im a chemist, a lab technician and Ive started to learn VBA only last week, please bear with me.)
Im trying to figure out, how to load the table into an array and then remove rows with an empty value in the 5th column so that I can "export" the table without blanks in the 5th column via an array into a different sheet.
I first tested this with some code I found for a 1D array, where I would make 2 arrays, one placeholder array which Id loop through adding only non-blanks to a second array.
For Counter = LBound(TestArr) To UBound(TestArr)
If TestArr(Counter, 1) <> "" Then
NoBlankSize = NoBlankSize + 1
NoBlanksArr(UBound(NoBlanksArr)) = TestArr(Counter, 1)
ReDim Preserve NoBlanksArr(0 To UBound(NoBlanksArr) + 1)
End If
Next Counter
It works in 1D, but I cant seem to get it two work with 2 dimensions.
Heres the array Im using for reading and outputting the data
Sub ArrayTest()
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Dim TestArray() As Variant
Dim Dimension1 As Long, Dimension2 As Long
Sheets("Tracker").Activate
Dimension1 = Range("A3", Range("A2").End(xlDown)).Cells.Count - 1
Dimension2 = Range("A2", Range("A2").End(xlToRight)).Cells.Count - 1
ReDim TestArray(0 To Dimension1, 0 To Dimension2)
'load into array
For Dimension1 = LBound(TestArray, 1) To UBound(TestArray, 1)
For Dimension2 = LBound(TestArray, 2) To UBound(TestArray, 2)
TestArray(Dimension1, Dimension2) = Range("A4").Offset(Dimension1, Dimension2).Value
Next Dimension2
Next Dimension1
Sheets("Output").Activate
ActiveSheet.Range("A2").Select
'read from array
For Dimension1 = LBound(TestArray, 1) To UBound(TestArray, 1)
For Dimension2 = LBound(TestArray, 2) To UBound(TestArray, 2)
ActiveCell.Offset(Dimension1, Dimension2).Value = TestArray(Dimension1, Dimension2)
Next Dimension2
Next Dimension1
Erase TestArray
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
End Sub
Thank you for any help in advance.
The Redim Preserve statement does not work for two-dimensional arrays if you want to change the number of records (rows).
You could load the range into an array, and then when you want to export the array to another range, loop through that array while skipping blank records.
An example:
Option Explicit
Sub ArrayTest()
Dim wb As Workbook, wsInput As Worksheet, wsOutput As Worksheet
Dim myArr As Variant
Dim i As Long, k As Long, LRow As Long
Set wb = ThisWorkbook
Set wsInput = wb.Sheets("Tracker")
Set wsOutput = wb.Sheets("Output")
LRow = wsOutput.Cells(wsOutput.Rows.Count, "A").End(xlUp).Row + 1
'Load a range into the array (example range)
myArr = wsInput.Range("A1:Z100")
'Fill another range with the array
For i = LBound(myArr) To UBound(myArr)
'Check if the first field of the current record is empty
If Not Len(myArr(i, 1)) = 0 Then
'Loop through the record and fill the row
For k = LBound(myArr, 2) To UBound(myArr, 2)
wsOutput.Cells(LRow, k) = myArr(i, k)
Next k
LRow = LRow + 1
End If
Next i
End Sub
From your code, it appears you want to
test a column of data on a worksheet to see if there are blanks.
if there are blanks in the particular column, exclude that row
copy the data with the excluded rows to a new area
You can probably do that easier (and quicker) with a filter: code below checking for blanks in column2
Option Explicit
Sub removeCol2BlankRows()
Dim wsSrc As Worksheet, wsRes As Worksheet
Dim rSrc As Range, rRes As Range
Set wsSrc = ThisWorkbook.Worksheets("sheet1")
Set rSrc = wsSrc.Cells(1, 1).CurrentRegion 'many ways to do this
Set wsRes = ThisWorkbook.Worksheets("sheet1")
Set rRes = wsRes.Cells(1, 10)
If wsSrc.AutoFilterMode = True Then wsSrc.AutoFilterMode = False
rSrc.AutoFilter field:=2, Criteria1:="<>"
rSrc.SpecialCells(xlCellTypeVisible).Copy rRes
wsRes.AutoFilterMode = False
End Sub
If you really just want to filter the VBA arrays in code, I'd store the non-blank rows in a dictionary, and then write it back to the new array:
Option Explicit
Sub removeCol2BlankRows()
Dim testArr As Variant
Dim noBlanksArr As Variant
Dim myDict As Object
Dim I As Long, J As Long, V
Dim rwData(1 To 4) As Variant
With ThisWorkbook.Worksheets("sheet1")
testArr = .Range(.Cells(1, 1), .Cells(.Rows.Count, 1).End(xlUp)).Resize(columnsize:=4)
End With
Set myDict = CreateObject("Scripting.Dictionary")
For I = 1 To UBound(testArr, 1)
If testArr(I, 2) <> "" Then
For J = 1 To UBound(testArr, 2)
rwData(J) = testArr(I, J)
Next J
myDict.Add Key:=I, Item:=rwData
End If
Next I
ReDim noBlanksArr(1 To myDict.Count, 1 To 4)
I = 0
For Each V In myDict.keys
I = I + 1
For J = 1 To 4
noBlanksArr(I, J) = myDict(V)(J)
Next J
Next V
End Sub

Out of Memory Error - On Sorting Data

I have the following code. Which was working fine but now I'm getting an error. "Run-time error'7': Out of Memory" Below is my code:
Sub SpiltData()
Dim tmpArr As Variant, rCell As Range, v, i As Long, v2(), J As Long, K As Long
Dim ws1 As Worksheet, ws2 As Worksheet, n As Long
ThisWorkbook.Sheets("Data").Activate
Set ws1 = ActiveSheet
Set ws2 = Sheets("Hours Sorted") 'You need to specify a sheet here
v = ws1.Range("J6").CurrentRegion.Value
ReDim v2(1 To UBound(v, 1) * 100, 1 To UBound(v, 2))
n = 1
For i = LBound(v, 1) To UBound(v, 1)
tmpArr = Split(v(i, 10), Chr(10))
For K = 0 To UBound(tmpArr)
For J = LBound(v, 2) To UBound(v, 2)
v2(n, J) = v(i, J)
Next J
v2(n, 10) = tmpArr(K)
n = n + 1
Next K
Next i
ws2.Range("A1").Resize(n, UBound(v2, 2)) = v2
End Sub
Is there something I can do differently?
If you are starting with a huge selection, and your carriage return delimited data in position 10 of your array is large then you are resulting in "Huge x Large" number of results, all held in memory until it's dumped back out into ws2.
Alternatively, you can iterate through your range and write out as you encounter values. Keeping your variables nice and small and, presumably, your code easier to understand and debug. To make this work quicker, you can shut off the application.screenupdating toggle.
Sub SpiltData()
Dim rngRead As Range
Dim rngReadRow As Range
Dim rngReadCell As Range
Dim arrSplit As Variant
Dim intWriteRow As Integer
'initialize intWriteRow
intWriteRow = 1
'get that range
Set rngReadRow = Sheets("Data").Range("J6").CurrentRegion
'loop through each row
For Each rngReadRow In rngReadRow.Rows
'split your carriage return delimited cell into an array
arrSplit = Split(rngReadRow.Cells(1, 11), Chr(10))
'iterate the array and write out
For Each arrItem In arrSplit
'by iterating each cell in the readrow
For Each rngReadCell In rngReadRow.Cells
'and writing it to the other worksheet
Sheets("Hours Sorted").Cells(intWriteRow, rngReadCell.Column).Value = rngReadCell
'increment the row to which we are writing
intWriteRow = intWriteRow + 1
Next rngReadCell
'and now write the split out value in the array
Sheets("Hours Sorted").Cells(intWriteRow, 11) = arrItem
Next arrItem
Next rngReadRow
End Sub
This will probably have to be tweaked a small bit since I converted from arrays to ranges and made some assumptions, but it will get you in the ballpark and excel will crank on this all day long if it needs to and your main limit is the number of rows in excel instead of your ram.

Remove duplicates from array using VBA

Assume I have a block of data in Excel 2010, 100 rows by 3 columns.
Column C contains some duplicates, say it starts off as
1, 1, 1, 2, 3, 4, 5, ..... , 97, 98
Using VBA, I would like to remove the duplicate rows so I am left with 98 rows and 3 columns.
1, 2, 3, ..... , 97, 98
I know there is a button in Excel 2010 to do that but it inteferes with the rest of my code subsequently and gives incorrect results.
Furthermore, I would like to do it in arrays, then paste the results on the worksheet, rather than methods such as Application.Worksheetfunction.countif(.....
So something like:
Dim myarray() as Variant
myarray=cells(1,1).Currentregion.value
Dim a as Long
For a=1 to Ubound(myarray,1)
'something here to
Next a
I answered a similar question. Here is the code I used:
Dim dict As Object
Dim rowCount As Long
Dim strVal As String
Set dict = CreateObject("Scripting.Dictionary")
rowCount = Sheet1.Range("A1").CurrentRegion.Rows.Count
'you can change the loop condition to iterate through the array rows instead
Do While rowCount > 1
strVal = Sheet1.Cells(rowCount, 1).Value2
If dict.exists(strVal) Then
Sheet1.Rows(rowCount).EntireRow.Delete
Else
'if doing this with an array, then add code in the Else block
' to assign values from this row to the array of unique values
dict.Add strVal, 0
End If
rowCount = rowCount - 1
Loop
Set dict = Nothing
If you want to use an array, then loop through the elements with the same conditional (if/else) statements. If the item doesn't exist in the dictionary, then you can add it to the dictionary and add the row values to another array.
Honestly, I think the most efficient way is to adapt code you'd get from the macro recorder. You can perform the above function in one line:
Sheet1.UsedRange.RemoveDuplicates Columns:=3, Header:=xlYes
Function eliminateDuplicate(poArr As Variant) As Variant
Dim poArrNoDup()
dupArrIndex = -1
For i = LBound(poArr) To UBound(poArr)
dupBool = False
For j = LBound(poArr) To i
If poArr(i) = poArr(j) And Not i = j Then
dupBool = True
End If
Next j
If dupBool = False Then
dupArrIndex = dupArrIndex + 1
ReDim Preserve poArrNoDup(dupArrIndex)
poArrNoDup(dupArrIndex) = poArr(i)
End If
Next i
eliminateDuplicate = poArrNoDup
End Function
Simple function to remove duplicates from a 1D array
Private Function DeDupeArray(vArray As Variant) As Variant
Dim oDict As Object, i As Long
Set oDict = CreateObject("Scripting.Dictionary")
For i = LBound(vArray) To UBound(vArray)
oDict(vArray(i)) = True
Next
DeDupeArray = oDict.keys()
End Function
Edit:
With stdVBA (a library largely maintained by myself) you can use:
uniqueValues = stdEnumerator.CreateFromArray(myArray).Unique().AsArray()
An improvement on #RBILLC and #radoslav006 answers, this version searches the array with the duplicates removed for existing values so it searchs less values to find a duplicate.
Public Function RemoveDuplicatesFromArray(sourceArray As Variant)
Dim duplicateFound As Boolean
Dim arrayIndex As Integer, i As Integer, j As Integer
Dim deduplicatedArray() As Variant
arrayIndex = -1
deduplicatedArray = Array(1)
For i = LBound(sourceArray) To UBound(sourceArray)
duplicateFound = False
For j = LBound(deduplicatedArray) To UBound(deduplicatedArray)
If sourceArray(i) = deduplicatedArray(j) Then
duplicateFound = True
Exit For
End If
Next j
If duplicateFound = False Then
arrayIndex = arrayIndex + 1
ReDim Preserve deduplicatedArray(arrayIndex)
deduplicatedArray(arrayIndex) = sourceArray(i)
End If
Next i
RemoveDuplicatesFromArray = deduplicatedArray
End Function
Here's another approach for working with an array:
Sub tester()
Dim arr, arrout
arr = Range("A1").CurrentRegion.Value 'collect the input array
arrout = UniqueRows(arr) 'get only unique rows
Range("H1").Resize(UBound(arrout, 1), UBound(arrout, 2)).Value = arrout
End Sub
Function UniqueRows(arrIn As Variant) As Variant
Dim keys, rw As Long, col As Long, k, sep, arrout
Dim dict As Object, lbr As Long, lbc As Long, ubr As Long, ubc As Long, rwOut As Long
Set dict = CreateObject("scripting.dictionary")
'input array bounds
lbr = LBound(arrIn, 1)
ubr = UBound(arrIn, 1)
lbc = LBound(arrIn, 2)
ubc = UBound(arrIn, 2)
ReDim keys(lbr To ubr)
'First pass:collect all the row "keys" in an array
' and unique keys in a dictionary
For rw = lbr To ubr
k = "": sep = ""
For col = lbc To ubc
k = k & sep & arrIn(rw, col)
sep = Chr(0)
Next col
keys(rw) = k 'collect key for this row
dict(k) = True 'just collecting unique keys
Next rw
'Resize output array to # of unique rows
ReDim arrout(lbr To dict.Count + (lbr - 1), lbc To ubc)
rwOut = lbr
'Second pass: copy each unique row to the output array
For rw = lbr To ubr
If dict(keys(rw)) Then 'not yet output?
For col = lbc To ubc 'copying this row over to output...
arrout(rwOut, col) = arrIn(rw, col)
Next col
rwOut = rwOut + 1 'increment output "row"
dict(keys(rw)) = False 'flag this key as copied
End If
Next rw
UniqueRows = arrout
End Function
Answer from #RBILLC could be easily improved by adding an Exit For inside internal loop:
Function eliminateDuplicate(poArr As Variant) As Variant
Dim poArrNoDup()
dupArrIndex = -1
For i = LBound(poArr) To UBound(poArr)
dupBool = False
For j = LBound(poArr) To i
If poArr(i) = poArr(j) And Not i = j Then
dupBool = True
Exit For
End If
Next j
If dupBool = False Then
dupArrIndex = dupArrIndex + 1
ReDim Preserve poArrNoDup(dupArrIndex)
poArrNoDup(dupArrIndex) = poArr(i)
End If
Next i
eliminateDuplicate = poArrNoDup
End Function
I think this is really a case for using excel's native functions, at least for the initial array acquisition, and I don't think there's any simpler way to do it. This sub will output the unique values starting in column 5. I assumed that the target range was empty, so if it's not, change r and c.
Sub testUniques()
Dim arr, r As Long, c As Long, h As Long, w As Long
Dim this As Worksheet: Set this = ActiveSheet
arr = Application.Unique(this.Cells(1, 1).CurrentRegion)
r = 1
c = 5
h = UBound(arr, 1) - 1
w = UBound(arr, 2) - 1
this.Range(this.Cells(r, c), this.Cells(r + h, c + w)) = arr
End Sub
I know this is old, but here's something I used to copy duplicate values to another range so that I could see them quickly to establish data integrity for a database I was standing up from various spreadsheets. To make the procedure delete the duplicates it would be as simple as replacing the dupRng lines with Cell.Delete Shift:=xlToLeft or something to that effect.
I haven't tested that personally, but it should work.
Sub PartCompare()
Dim partRng As Range, partArr() As Variant, i As Integer
Dim Cell As Range, lrow As Integer
lrow = ThisWorkbook.Worksheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Row
i = 0
Set partRng = ThisWorkbook.Worksheets("Sheet1").Range(Cells(1, 1), Cells(lrow, 1))
For Each Cell In partRng.Cells
ReDim Preserve partArr(i)
partArr(i) = Cell.Value
i = i + 1
Next
Dim dupRng As Range, j As Integer, x As Integer, c As Integer
Set dupRng = ThisWorkbook.Worksheets("Sheet1").Range("D1")
x = 0
c = 1
For Each Cell In partRng.Cells
For j = c To UBound(partArr)
If partArr(j) = Cell.Value Then
dupRng.Offset(x, 0).Value = Cell.Value
dupRng.Offset(x, 1).Value = Cell.Address()
x = x + 1
Exit For
End If
Next j
c = c + 1
Next Cell
End Sub
Remove duplicates (plus related row items) from array
As OP wanted a VBA solution close to RemoveDuplicates, I demonstrate an array approach using a â–ºdictionary to get not the unique items per se (dict.keys), but the related row indices of first occurrencies (dict.items).
These are used to retain the whole row data via procedure LeaveUniques profiting from the advanced possibilities of the â–ºApplication.Index() function - c.f. Some peculiarities of the the Application.Index function
Example Call
Sub ExampleCall()
'[0]define range and assign data to 1-based 2-dim datafield
With Sheet1 ' << reference to your project's sheet Code(Name)
Dim lastRow: lastRow = .Cells(.Rows.Count, "C").End(xlUp).Row
Dim rng: Set rng = .Range("C2:E" & lastRow)
End With
Dim data: data = rng ' assign data to 2-dim datafield
'[1]get uniques (column 1) and remove duplicate rows
LeaveUniques data ' << call procedure LeaveUniques (c.f. RemoveDuplicates)
'[2]overwrite original range
rng.Clear
rng.Resize(UBound(data), UBound(data, 2)) = data
End Sub
Procedure LeaveUniques
Sub LeaveUniques(ByRef data As Variant, Optional ByVal colNum As Long = 1)
'Purpose: procedure removes duplicates of given column number in entire array
data = Application.Index(data, uniqueRowIndices(data, colNum), nColIndices(UBound(data, 2)))
End Sub
Help functions to LeaveUniques
Function uniqueRowIndices(data, Optional ByVal colNum As Long = 1)
'Purpose: return data index numbers referring to uniques
'a) set late bound dictionary to memory
Dim dict As Object
Set dict = CreateObject("Scripting.Dictionary")
'b) slice e.g. first data column (colNum = 1)
Dim colData
colData = Application.Index(data, 0, colNum)
'c) fill dictionary with uniques referring to first occurencies
Dim i As Long
For i = 1 To UBound(colData)
If Not dict.exists(dict(colData(i, 1))) Then dict(colData(i, 1)) = i
Next
'd) return 2-dim array of valid unique 1-based index numbers
uniqueRowIndices = Application.Transpose(dict.items)
End Function
Function nColIndices(ByVal n As Long)
'Purpose: return "flat" array of n column indices, e.g. for n = 3 ~> Array(1, 2, 3)
nColIndices = Application.Transpose(Evaluate("row(1:" & n & ")"))
End Function

Resources