find the same words into different cells - excel

find the common words in title of the books in Excel, the output like this :
'book common user_id
physics physics 1
Principles of plasma physics physics,plasma 2
Fundamentals of plasma physics fundamentals,plasma,physics 3
Fundamentals of thermodynamics fundamentals 4
'

So here's my shot at this problem. I am aware that the code is rather messy: I've been very sloppy with variable names, error handling and so on, but it gives you an idea of how it can be done. I've created a UDF Common() which takes 4 arguments:
rngText: a reference to a single cell containing the text (in your case book) you want to comare
compareList: a range of cells with which to compare the first argument
minOccurences (optional): this is the definition of the minimum number of occurences a word should have to be considered "common". The default vanue is 2
exclusionList (optional): a range of cells containing text that should be excluded (e.g. words like "a", "of", ...)
So for example, if you have your titles in A2:A7 and your exclusion list in E2:E3, you could use the formula = Common( A2, $A$2:$A$7, , $E$2:$E$3 )in cell B2 and copy down to B7.
Option Explicit
Function Common(rngText As Range, compareList As Range, _
Optional minOccurences As Integer = 2, Optional exclusionList As Range) As Variant
'Check if an exclusion list is provided
Dim exclusionListProvided As Boolean
If Not (exclusionList Is Nothing) Then
exclusionListProvided = True
Else
exclusionListProvided = False
End If
'Check the argments
Dim returnError As Boolean
If IsDate(rngText.Value) Or IsNumeric(rngText.Value) Or IsError(rngText.Value) Then 'first argument should refer to a cell containing text
returnError = True
ElseIf minOccurences < 2 Then 'Function should check for at least 2 occurences
returnError = True
ElseIf (compareList.Columns.Count > 1 And compareList.Rows.Count > 1) Then 'compareList should be one-dimensional
returnError = True
ElseIf exclusionListProvided Then
If (exclusionList.Columns.Count > 1 And exclusionList.Rows.Count > 1) Then 'exclusionList should be one-dimensional
returnError = True
End If
Else
returnError = False
End If
'Return an error if one of the arguments is unexpected
If returnError Then
Common = CVErr(xlErrValue)
Else
Dim text As String
text = rngText.Value
'split text into an array of words
Dim words() As String
words = fullSplit(text)
'convert exclusionlist and compareList to arrays
Dim arrExclude()
If exclusionListProvided Then
arrExclude() = rangeToStringArray(exclusionList)
End If
Dim arrCompare()
arrCompare() = rangeToStringArray(compareList)
Dim strCommon As String
'loop through words in text
Dim i As Integer
Dim j As Integer
Dim k As Integer
Dim nOccurences As Integer
Dim excluded As Boolean
Dim compareWords() As String
For i = LBound(words) To UBound(words)
'check if word is in exclusion list
excluded = False
If exclusionListProvided Then
For j = LBound(arrExclude) To UBound(arrExclude)
compareWords = fullSplit(arrExclude(j))
For k = LBound(compareWords) To UBound(compareWords)
If compareWords(k) = words(i) Then
excluded = True
Exit For
End If
Next k
If excluded Then Exit For
Next j
End If
'count the number of occurences of the word in the compare list
If Not excluded Then
nOccurences = 0
For j = LBound(arrCompare) To UBound(arrCompare)
compareWords = fullSplit(arrCompare(j))
For k = LBound(compareWords) To UBound(compareWords)
If LCase(compareWords(k)) = LCase(words(i)) Then
nOccurences = nOccurences + 1
Exit For
End If
Next k
Next j
If nOccurences >= minOccurences Then
If Not strCommon = "" Then
strCommon = strCommon & ", "
End If
strCommon = strCommon & LCase(words(i))
End If
End If
Next i
Common = strCommon
End If
End Function
'split text by using a list of delimiters
Function fullSplit(text As Variant)
'define list of delimiters
Dim delimiters()
delimiters = Array(" ", ",", ".", ";", "?", "!")
'unique delimiter is the first one from the list
Dim uniqueDelimiter As String
uniqueDelimiter = delimiters(0)
'replace all delimiters in the text by the unique delimiter
Dim i As Integer
For i = LBound(delimiters) + 1 To UBound(delimiters)
Replace text, delimiters(i), uniqueDelimiter
Next i
'split the text by using the unique delimiter
fullSplit = SplitText(text, uniqueDelimiter)
End Function
'split text by using a single delimiter
Function SplitText(text As Variant, delimiter As String)
'split the text in substrings on each occurence of the delimiter
Dim tempArray() As String
tempArray = Split(text, delimiter)
'remove empty substrings
Dim LastNonEmpty As Integer
LastNonEmpty = -1
Dim i As Integer
For i = LBound(tempArray) To UBound(tempArray)
If tempArray(i) <> "" Then
LastNonEmpty = LastNonEmpty + 1
tempArray(LastNonEmpty) = tempArray(i)
End If
Next
ReDim Preserve tempArray(0 To LastNonEmpty)
SplitText = tempArray
End Function
'check if two arrays share a least one element
Function sharedElements(array1() As Variant, array2() As Variant) As Boolean
Dim found As Boolean
found = False
Dim i As Integer
Dim j As Integer
For i = LBound(array1) To UBound(array1)
For j = LBound(array2) To UBound(array2)
If array1(i) = array2(j) Then
found = True
Exit For
End If
Next j
If found = True Then Exit For
Next i
sharedElements = found
End Function
'converts a range to an array of strings, omitting all non-text cells
Function rangeToStringArray(myRange As Range)
Dim myArray()
Dim arraySize As Integer
arraySize = 0
Dim c As Object
For Each c In myRange
If IsDate(c.Value) = False And IsNumeric(c.Value) = False And IsError(c.Value) = False Then
ReDim Preserve myArray(arraySize)
myArray(arraySize) = c.Value
arraySize = arraySize + 1
End If
Next
rangeToStringArray = myArray
End Function

Related

Populate 1d array to rows or column with unique items

In my sheet, I have a column with some data and in some of the cells there are more than one item separated by comma
For example:
Ahmed
Reda, Salah
Yasser, Nader, Hany
Kamal
Nader, Ali, Ahmed
Here's the udf that extracts the unique items and populate it to a row not a column.
Function UniqueItems(ByVal rng As Range, ByVal delim As String, ByVal f As Boolean)
Dim strPart, ky, c As Range, dic As Object, temp As String
If f = True Then
Dim strArr() As String
Else
'how to make it 2d array
Dim strArr() As String
End If
Set dic = CreateObject("Scripting.Dictionary")
For Each c In rng
If c.Value <> "" Then
strArr = Split(c.Value, delim)
For Each strPart In strArr
On Error Resume Next
dic.Add Trim(strPart), Trim(strPart)
On Error GoTo 0
Next strPart
temp = ""
For Each ky In dic
temp = temp & ky & delim
Next ky
End If
Next c
Dim v
v = Split(Left(temp, Len(temp) - Len(delim)), delim)
If f = True Then
UniqueItems = Split(Left(temp, Len(temp) - Len(delim)), delim)
Else
''how to make it for column
End If
End Function
How can I add another parameter in the udf arguments to decide if the user want that list of results in a row or a column?
Another point if possible, I wanted to sort the results and I used another udf
Function SortArray(myArray As Variant, bOrder As Boolean)
Dim temp, i As Long, j As Long
For i = LBound(myArray) To UBound(myArray) - 1
For j = i + 1 To UBound(myArray)
If IIf(bOrder, UCase(Trim(Replace(myArray(i), "/", ""))) > UCase(Trim(Replace(myArray(j), "/", ""))), UCase(Trim(Replace(myArray(i), "/", ""))) < UCase(Trim(Replace(myArray(j), "/", "")))) Then
temp = myArray(j)
myArray(j) = myArray(i)
myArray(i) = temp
End If
Next j
Next i
SortArray = myArray
End Function
But the udf doesn't sort as expected
the data:
the expected:
I have amended your function so that the code is more concise. You'll notice that the last parameter is optional and determines whether the function returns a horizontal or vertical array.
So if no argument is passed to the function or if the argument passed is FALSE, the function returns a horizontal array. If TRUE is passed, it returns a vertical array.
Function UniqueItems(ByVal rng As Range, ByVal delim As String, Optional ByVal transpose As Boolean = False)
Dim dic As Object
Dim c As Range
Dim strArr() As String
Dim strPart As Variant
Set dic = CreateObject("Scripting.Dictionary")
For Each c In rng
If c.Value <> "" Then
strArr() = Split(c.Value, delim)
For Each strPart In strArr
dic(Trim(strPart)) = ""
Next strPart
End If
Next c
If transpose = True Then
UniqueItems = Application.transpose(dic.keys())
Else
UniqueItems = dic.keys()
End If
End Function
To return a horizontal array . . .
=UniqueItems(A1:A100, ",")
or
=UniqueItems(A1:A100, ",", FALSE)
To return a vertical array . . .
=UniqueItems(A1:A100, ",", TRUE)

Check for similarities within a single cell

Does anyone knows how to higlight similar words within a single cell?
What I want to achieve is to find duplicates/similar words between < ; > in a cell.
example cell:
home;music;car;window;musician
desired result:
music in a word musician is hilglighted or the output is 'TRUE' in the next column if similar word like this has been found.
I was able to compare words between two columns but not within single cell.
So far I was able to create a formula to separate the words in the cell to have each range separate to work with.
Function IdDuplicates(rng As Range) As String
Dim StringtoAnalyze As Variant
Dim I As Integer
Dim J As Integer
Const minWordLen As Integer = 4
StringtoAnalyze = Split(UCase(rng.Value), ";")
For I = UBound(StringtoAnalyze) To 0 Step -1
If Len(StringtoAnalyze(I)) < minWordLen Then GoTo SkipA
For J = 0 To I - 1
If StringtoAnalyze(J) = StringtoAnalyze(I) Then
IdDuplicates = "TRUE"
GoTo SkipB
End If
Next J
SkipA:
Next I
IdDuplicates = "FALSE"
SkipB:
End Function
Any ideas?
This function returns either FALSE or a list of the similar words
Option Explicit
Function IdDuplicates(rng As Range) As String
Dim s As String, word, m As Object, i As Long
With CreateObject("VBScript.RegExp")
.IgnoreCase = True
.Global = True
s = Trim(rng.Value2)
For Each word In Split(s, ";")
.Pattern = word
Set m = .Execute(s)
If m.Count > 1 Then
IdDuplicates = IdDuplicates & "," & word
End If
Next
End With
If IdDuplicates = "" Then
IdDuplicates = "FALSE"
Else
IdDuplicates = Mid(IdDuplicates, 2) ' remove lead ,
End If
End Function
Match Partially in Delimited String
Option Explicit
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose: Returns a boolean indicating whether any of the substrings
' in a delimited string is contained in another substring.
' Example: 'music;car;musician' - 'music' is found in 'musician' - TRUE
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Function MatchPart( _
ByVal DelimitedString As Variant, _
Optional ByVal StringDelimiter As String = ";") _
As Boolean
Dim S As String: S = CStr(DelimitedString)
Dim Substrings() As String
Substrings = Split(DelimitedString, StringDelimiter)
Dim sUpper As Long: sUpper = UBound(Substrings)
If sUpper < 1 Then Exit Function
Dim i As Long, j As Long, iPos As Long
Dim iSub As String, jSub As String
For i = 0 To sUpper - 1
iSub = Substrings(i)
For j = i + 1 To sUpper
jSub = Substrings(j)
'Debug.Print i, iSub, j, jSub
If Len(iSub) <= Len(jSub) Then
If InStr(1, jSub, iSub, vbTextCompare) > 0 Then
MatchPart = True
Exit Function
End If
Else
If InStr(1, iSub, jSub, vbTextCompare) > 0 Then
MatchPart = True
Exit Function
End If
End If
Next j
Next i
End Function

How can I write a function in VBA to extract cell references from a formula?

I currently have a function which can extract the cell reference using vbscript.regexp which is not something I am particularly familiar with, however for the most part it works. The idea is to populate a list with each of the references from a formula, containing the workbook name (if an external workbook), sheet name (if a different sheet to the one where the input cell is) and finally the cell being referenced.
For example, if my input cell has a formula of simply "=AA111" then my function when applied to this cell will output "AA111" (which I can then use to determine the page number of where this cell is. In a more complicated example, if my input cell has a formula of "=[WorkbookName]SheetNameA!AB222 + SheetNameB!AC333" then my function would return "[WorkbookName]SheetNameA!AB222" and "SheetNameB!AC333". I am then able to 'read' these strings by looking at the positions of the ']' symbol which denotes the end of the workbook reference (if any) or the '!' which denotes the end of the sheet reference (if any).
The code I am currently using is as follows;
Public Function CellRef(CellInput As Range) As String
CellRefDetector = "'?([a-zA-Z0-9\s\[\]\.])*'?!?\$?[A-Z]+\$?[0-9]+(:\$?[A-Z]+\$?[0-9]+)?"
With CreateObject("vbscript.regexp")
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = CellRefDetector
Set Results = .Execute(CellInput.Formula)
End With
I am using the regex string as developed here: https://www.get-digital-help.com/extract-cell-references-from-a-formula/
This works perfectly with the following issue; there are some characters (particularly operators) which I would like to be found and included within a workbook name or sheet name, but that I would like to not be found such that I can separate sums of different cells as different references. These include; + - _ / \ all of which may be found in the name of a workbook or sheet but I would also want to split up the reference in between different references.
For example, in my above code, a '+' operator in the workbook name will cause the workbook name to only be taken past that point. For example, "=[WorkbookName+Example]SheetNameC!AD222" would return "Example]Sheet_name_C!AD444" However, if I include a '+' operator in the regex line (i.e. included within my search of the regex), then the function will include something like "=AA111+AB222+AC333" all as one cell reference and not distinguish between them.
I hope I have given enough information for someone to provide assistance! Thanks in advance.
I don't know if this is any use to you, but a teacher friend of mine asked me for some code that would do more or less the same thing as you are asking.
The trick to those Workbook and Worksheet names which contain operator characters is that they'll always be bound by an inverted comma. So we can temporarily remove those items before identifying genuine operators.
There'll doubtless be an easier and quicker way to do it, but in the shortness of time, this is what I came up with for my friend:
Option Explicit
Public Function RangesInFormula(target As Range) As Variant
Dim str As String
Dim item As Variant, items As Variant
Dim addr As String, result As String
'Sense checking
If target.Cells.Count <> 1 Then
RangesInFormula = CVErr(xlErrRef)
Exit Function
End If
If Not target.HasFormula Then Exit Function
'Split formula into array.
items = SplitFormulaByOperators(target)
'Try to grab an address from each formula item.
For Each item In items
If Len(CStr(item)) > 0 Then
addr = RangeToString(CStr(item))
If Len(addr) > 0 Then
If Len(result) > 0 Then result = result & "|"
result = result & addr
End If
End If
Next
'Return the addresses if there are any.
If Len(result) > 0 Then
RangesInFormula = result
End If
End Function
Private Function RemoveTextBetween(startChar As String, endChar As String, sourceText As String) As String
Dim startPt As Long, endPt As Long
Dim tmp As String
tmp = sourceText
Do While True
endPt = InStr(tmp, endChar)
If startChar = endChar Then
endPt = InStr(endPt + 1, tmp, endChar)
End If
If endPt = 0 Then Exit Do
startPt = InStrRev(tmp, startChar, endPt - 1)
If startPt = 0 Then Exit Do
tmp = Left(tmp, startPt - 1) & Right(tmp, Len(tmp) - endPt)
Loop
RemoveTextBetween = tmp
End Function
Private Function SplitFormulaByOperators(target As Range) As Variant
Const OPERATORS = "+|-|*|/|%|^|>=|<=|<>|=|>|<|&|(|)"
Dim ws As Worksheet
Dim opr As Variant, ex As Variant
Dim tmp As String
Dim extractedList As Collection
Dim extracted(1) As String
Dim oprs() As String, result() As String
Dim i As Long
'Convert constant to array.
oprs = Split(OPERATORS, "|")
'Remove text between quotes
tmp = target.formula
tmp = RemoveTextBetween("""", """", tmp)
'Temporarily extract workbook and sheet names which have operators in them.
'They can be identified by bounding inverted commas.
Set extractedList = New Collection
Do While True
extracted(0) = ExtractTextBetween("'", "'", tmp, excludeMarkers:=False)
If Len(extracted(0)) = 0 Then Exit Do
extracted(1) = "|ex" & extractedList.Count & "|"
ex = Empty: On Error Resume Next
ex = extractedList(extracted(0)): On Error GoTo 0
If IsEmpty(ex) Then
extractedList.Add extracted, extracted(0)
tmp = Replace(tmp, extracted(0), extracted(1))
End If
Loop
'Replace the operators
For Each opr In oprs
tmp = Replace(tmp, opr, "|op|")
Next
result = Split(tmp, "|op|")
'Trim whitespace and commas.
For i = LBound(result) To UBound(result)
result(i) = TrimWhitespace(result(i), &H2C)
'Convert back to the extracted values.
For Each ex In extractedList
result(i) = Replace(result(i), ex(1), ex(0))
Next
Next
'Return the array.
SplitFormulaByOperators = result
End Function
Private Function ExtractTextBetween( _
startChar As String, _
endChar As String, _
sourceText As String, _
Optional acceptStartOfStringToEndChar As Boolean = False, _
Optional acceptStartCharToEndOfString As Boolean = False, _
Optional excludeMarkers As Boolean = True) As String
Dim i As Long, startPt As Long, endPt As Long
Dim tmp As String
For i = 1 To Len(sourceText)
tmp = Mid(sourceText, i, Len(startChar))
If startPt = 0 Then
If tmp = startChar Then
startPt = i
End If
Else
If tmp = endChar Then
endPt = i + Len(endChar) - 1
If excludeMarkers Then
startPt = startPt + Len(startChar)
endPt = endPt - Len(endChar)
End If
tmp = Mid(sourceText, startPt, endPt - startPt + 1)
ExtractTextBetween = tmp
Exit Function
End If
End If
Next
If startPt > 0 And endPt = 0 And acceptStartCharToEndOfString Then
If excludeMarkers Then
startPt = startPt + Len(startChar)
End If
tmp = Right(sourceText, Len(sourceText) - startPt + 1)
ExtractTextBetween = tmp
Exit Function
End If
If endPt > 0 And startPt = 0 And acceptStartOfStringToEndChar Then
If excludeMarkers Then
endPt = endPt - Len(endChar)
End If
tmp = Left(sourceText, endPt)
ExtractTextBetween = tmp
Exit Function
End If
End Function
Private Function RangeToString(addr As String) As String
Dim rng As Range
On Error Resume Next
Set rng = Evaluate(addr)
On Error GoTo 0
If rng Is Nothing Then Exit Function
'We have a range, so the address can be returned as valid reference.
RangeToString = addr
End Function
Private Function TrimWhitespace(txt As String, ParamArray additionals() As Variant) As String
Dim i As Long, j As Long, c As Long
Dim startPos As Long, endPos As Long
Dim whitespaces As Variant
Dim isWhitespace As Boolean
' List of whitespace characters.
whitespaces = Array( _
&H9, &HA, &HB, &HC, &HD, &H20, &H85, &HA0, _
&H1680, &H2000, &H2001, &H2002, &H2003, &H2004, &H2005, &H2006, _
&H2007, &H2008, &H2009, &H200A, &H2028, &H2029, &H202F, &H205F, _
&H3000, &H180E, &H200B, &H200C, &H200D, &H2060, &HFEFF)
' Find the first non-whitespace.
For i = 1 To Len(txt)
c = Asc(Mid(txt, i, 1))
isWhitespace = False
For j = LBound(whitespaces) To UBound(whitespaces)
If c = whitespaces(j) Then
isWhitespace = True
Exit For
End If
Next
For j = LBound(additionals) To UBound(additionals)
If c = additionals(j) Then
isWhitespace = True
Exit For
End If
Next
If Not isWhitespace Then
startPos = i
Exit For
End If
Next
' If there's no start position, return an empty string.
If startPos = 0 Then Exit Function
' Find the last non-whitespace.
For i = Len(txt) To startPos Step -1
c = Asc(Mid(txt, i, 1))
isWhitespace = False
For j = LBound(whitespaces) To UBound(whitespaces)
If c = whitespaces(j) Then
isWhitespace = True
Exit For
End If
Next
For j = LBound(additionals) To UBound(additionals)
If c = additionals(j) Then
isWhitespace = True
Exit For
End If
Next
If Not isWhitespace Then
endPos = i
Exit For
End If
Next
TrimWhitespace = Mid(txt, startPos, endPos - startPos + 1)
End Function

How to count the total number of specific words in a cell and do the same for other cells as well using VBA?

How do I count the total number of "alt" and "first" that appeared in a cell and do the same for other cells as well while ignoring empty cells in the process? For instance, if a cell has first, first, alt, first, first, first, it should give me firstcounter = 5 (where firstcounter is the total count for first) and altcounter= 1(altcounter is the total count for alt). After that I can use the value of firstcounter and altcounter found to concatenate them into a string as shown in column B in the form of "first-" & firstcounter, "alt-"& altcounter.
Dim ia As Long
Dim lastrow2 As Long
Dim ws1 As Worksheet
Set ws1 = ThisWorkbook.Sheets(1)
lastrow2 = ws1.Range("A" & ws1.Rows.count).End(xlUp).Row
For ia = 2 To lastrow2
Dim arr() As Variant
' Split the string to an array
arr = Split(ws1.Cells(ia, "A"), ",").Value
'what should i do after split
Enter the following into a code module...
Function CountWords$(r)
Dim a&, f&, w
For Each w In Split(r, ",")
If w = "alt" Then a = a + 1
If w = "first" Then f = f + 1
Next
If (a + f) Then CountWords = "first-" & f & ",alt-" & a
End Function
Then in cell B2 enter this formula:
=CountWords(A2)
Now copy it downwards as far as you need.
Update
To use the above function from VBA without entering formulas in the worksheet you can do it like this...
Sub Cena()
Dim i&, v
With [a2:a8]
v = .Value2
For i = 1 To UBound(v)
v(i, 1) = CountWords(v(i, 1))
Next
.Offset(, 1) = v
End With
End Sub
Function CountWords$(r)
Dim a&, f&, w
For Each w In Split(r, ",")
If w = "alt" Then a = a + 1
If w = "first" Then f = f + 1
Next
If (a + f) Then CountWords = "first-" & f & ",alt-" & a
End Function
Update #2
In response to your questions in the comments, you can use this variation instead...
Sub Cena()
Dim i&, v
With [a2].Resize(Cells(Rows.Count, 1).End(xlUp).Row - 1)
v = .Value2
For i = 1 To UBound(v)
v(i, 1) = CountWords(v(i, 1))
Next
.Cells = v
End With
End Sub
Function CountWords$(r)
Dim a&, f&, w
For Each w In Split(r, ",")
If w = "alt" Then a = a + 1
If w = "first" Then f = f + 1
Next
If (a + f) Then CountWords = "first-" & f & ",alt-" & a
End Function
In order to make this independent from the words alt and first and whitespaces in the string I would use the following functions
Option Explicit
'Add a reference to Microsoft VBScript Regular Expressions 5.5
Public Function RemoveWhiteSpace(target As String) As String
With New RegExp
.Pattern = "\s"
.MultiLine = True
.Global = True
RemoveWhiteSpace = .Replace(target, vbNullString)
End With
End Function
'Add a reference to Microsoft Scripting Runtime
Function CountWordsA(rg As Range) As String
On Error GoTo EH
Dim dict As Dictionary
Set dict = New Dictionary
Dim vDat As Variant
vDat = RemoveWhiteSpace(rg.Value)
vDat = Split(vDat, ",")
Dim i As Long
For i = LBound(vDat) To UBound(vDat)
If dict.Exists(vDat(i)) Then
dict(vDat(i)) = dict(vDat(i)) + 1
Else
dict.Add vDat(i), 1
End If
Next i
Dim vKey As Variant
ReDim vDat(1 To dict.Count)
i = 1
For Each vKey In dict.Keys
vDat(i) = vKey & "-" & dict(vKey)
i = i + 1
Next vKey
CountWordsA = Join(vDat, ",")
Exit Function
EH:
CountWordsA = ""
End Function
Sub TestIt()
Dim rg As Range
Set rg = Range("A2:A8")
Dim sngCell As Range
For Each sngCell In rg
sngCell.Offset(, 1) = CountWordsA(sngCell)
Next sngCell
End Sub
More about dictionaries and regular expressions
Alternative using Filter() function
This demonstrates the use of the Filter() function to count words via function UBound():
Function CountTerms() (usable also in formulae)
Function CountTerms(ByVal WordList As String, Optional TermList As String = "first,alt", Optional DELIM As String = ",") As String
'Purpose: count found terms in wordlist and return result as list
'[1] assign lists to arrays
Dim words, terms
words = Split(WordList, DELIM): terms = Split(TermList, DELIM)
'[2] count filtered search terms
Dim i As Long
For i = 0 To UBound(terms)
terms(i) = terms(i) & "-" & UBound(Filter(words, terms(i), True, vbTextCompare)) + 1
Next i
'[3] return terms as joined list, e.g. "first-5,alt-1"
CountTerms = Join(terms, ",")
End Function
Example call (due to comment) & help function getRange()
In order to loop over the entire range and replace the original data with the results list:
Sub ExampleCall()
'[1] get range data assigning them to variant temporary array
Dim rng As Range, tmp
Set rng = getRange(Sheet1, tmp) ' << change to sheet's Code(Name)
'[2] loop through array values and get counts
Dim i As Long
For i = 1 To UBound(tmp)
tmp(i, 1) = CountTerms(tmp(i, 1))
Next i
'[3] write to target (here: overwriting due to comment)
rng.Offset(ColumnOffset:=0) = tmp
End Sub
Function getRange(mySheet As Worksheet, tmp) As Range
'Purpose: assign current column A:A data to referenced tmp array
With mySheet
Set getRange = .Range("A2:A" & .Cells(.Rows.Count, 1).End(xlUp).Row)
tmp = getRange ' assign range data to referenced tmp array
End With
End Function

Finding Patterns: Identify the string portion that is common to a group of cells

I don't know how to search for this or how to explain without an example.
I'm looking for an excel function that compares cell strings and identifies the portion they have in common.
Conditions
Compares 2 or more cells.
The common string is identified as long as 2 cells share it. *(ie: if comparing more than 2, it's enough to have 2 cells with that string. Not all the compared cells need to have it.) *
The string has at least 3 or more chars to avoid single characters and pairs being flagged.
Example
----------------------------------------------------------------------
| Pattern | Page URL 1 | Page URL 2 | Page URL 3 |
----------------------------------------------------------------------
| test | example.net/test/ | www.test.com | www.notest.com |
----------------------------------------------------------------------
| q=age | another.com?q=age | test.com/q=age | test.com/q=lol |
----------------------------------------------------------------------
Probably obvious by now, but what I'm trying to achieve/analyze is if there are string patterns that are common to large sets of URLs.
(forgive my poor attempt trying to draw a table)
This doesn't fully answer the question but I think it will give you what you need to get it. Give it a try. Place the following code in a new moudule:
Public Sub FindStrings()
Dim rng1 As Excel.Range
Dim rng2 As Excel.Range
Set rng1 = ActiveSheet.Range("A1")
Set rng2 = ActiveSheet.Range("A2")
Dim i As Integer
Dim j As Integer
Dim searchVal As String
For i = 3 To Len(rng2)
For j = 1 To Len(rng1)
searchVal = Mid(rng1, j, i)
If Len(searchVal) < i Then Exit For
If InStr(1, rng2, searchVal) Then Debug.Print searchVal
Next j
Next i
End Sub
In cell A1 put example.net/test
In cell A2 put www.test.com
Result
tes
est
test
UPDATE
I updated the code to search for a minimum of 4 characters instead of 3 (as you mentioned above). Furthermore, I guessed you wouldn't want strings such as www. and .com returned, nor strings with the / or . character. So the code pulls those out as well. Also, it compares every column combination.
Option Explicit
Public Sub CompareStrings()
Dim Arr As Variant
Dim i As Integer
Dim j As Integer
Dim StartRange As Excel.Range
Dim SearchRange As Excel.Range
Dim Counter As Integer
Dim ComparableRange As Variant
Dim Comparable As Integer
Dim Compared As Integer
Dim SearchVal As String
Set StartRange = ActiveSheet.Range("A1")
Counter = 0
For Each ComparableRange In ActiveSheet.Range("A1:A2")
Set SearchRange = Range(StartRange.Offset(Counter), Cells(StartRange.Offset(Counter).Row, Columns.Count).End(xlToLeft))
Arr = Application.Transpose(Application.Transpose(SearchRange.Value))
Debug.Print "Row " & SearchRange.Row & ":"
For j = LBound(Arr) To UBound(Arr)
For i = j + 1 To UBound(Arr)
For Comparable = 4 To Len(Arr(j))
For Compared = 1 To Len(Arr(i))
SearchVal = Mid(Arr(j), Compared, Comparable)
If InStr(1, SearchVal, ".") = 0 Then
If InStr(1, SearchVal, "/") = 0 Then
If Len(SearchVal) < Comparable Then Exit For
If InStr(1, Arr(i), SearchVal) > 0 Then Debug.Print vbTab & SearchVal
End If
End If
Next Compared
Next Comparable
Next i
Next j
Counter = Counter + 1
Next ComparableRange
End Sub
When comparing test.com/q=age with another.com?q=age You will still get results such as:
q=ag
=age
q=age
...though I suspect you only want the third one. The longer the matching strings are the more results you will get. The last results are the ones you will probably want.
Copy the following code into a module. Read the comments at the top of CommonString for usage.
Option Explicit
Public Function CommonString(rng As Range, iMinLen As Integer, Optional strDelimiter As String = ",") As String
'Finds the maximum number of cells (iMax) in "rng" that have a common substring of length at least "iMinLen".
'The function returns a string with the format "iMax: substring1,substring2,substring3..."
' where substring1, substring2, etc. are unique substrings found in exactly iMax cells.
'The output does not include any substrings of the unique substrings.
'The delimter between substrings can be specified by the optional parameter "strDelimiter".
'If no common substrings of length at least "iMinLen" are found, "CommonString" will return an empty string.
Dim blnRemove() As Boolean
Dim dicSubStrings As Object 'records the number of times substrings are found in pairwise string comparisons
Dim iCandidates As Integer
Dim iCol As Integer
Dim iCurrCommon As Integer
Dim iCurrLen As Integer
Dim iMax As Integer
Dim iMaxCommon As Integer
Dim iNumStrings As Integer
Dim iOutCount As Integer
Dim iRow As Integer
Dim iString1 As Integer
Dim iString2 As Integer
Dim iSubStr1 As Integer
Dim iSubStr2 As Integer
Dim lngSumLen As Long
Dim str1D() As String
Dim strCandidates() As String
Dim strOut() As String
Dim strSim() As String
Dim strSub As String
Dim vKey As Variant
Dim vStringsIn() As Variant
Set dicSubStrings = CreateObject("Scripting.Dictionary")
vStringsIn = rng.Value
iNumStrings = Application.CountA(rng)
ReDim str1D(1 To iNumStrings)
' pull the strings into a 1-D array
For iRow = 1 To UBound(vStringsIn, 1)
For iCol = 1 To UBound(vStringsIn, 2)
iCurrLen = Len(vStringsIn(iRow, iCol))
If iCurrLen > 0 Then
iString1 = iString1 + 1
str1D(iString1) = vStringsIn(iRow, iCol)
lngSumLen = lngSumLen + iCurrLen
End If
Next iCol
Next iRow
'initialize the array that will hold the substrings to output
ReDim strOut(1 To lngSumLen - iNumStrings * (iMinLen - 1))
'find common substrings from all pairwise combination of strings
For iString1 = 1 To iNumStrings - 1
For iString2 = iString1 + 1 To iNumStrings
strSim = Sim2Strings(str1D(iString1), str1D(iString2), iMinLen)
'loop through all common substrings
For iSubStr1 = 1 To UBound(strSim)
If dicSubStrings.Exists(strSim(iSubStr1)) Then
iCurrCommon = dicSubStrings(strSim(iSubStr1)) + 1
dicSubStrings(strSim(iSubStr1)) = iCurrCommon
If iCurrCommon > iMaxCommon Then iMaxCommon = iCurrCommon
Else 'add common substrings to the "dicSubStrings" dictionary
dicSubStrings.Add strSim(iSubStr1), 1
If iMaxCommon = 0 Then iMaxCommon = 1
End If
Next iSubStr1
Next iString2
Next iString1
If dicSubStrings.Count = 0 Then Exit Function
ReDim strCandidates(1 To dicSubStrings.Count)
'add the candidate substrings to the "strCandidates" array
'candidate substrings are those found in exactly "iMaxCommon" pairwise comparisons
For Each vKey In dicSubStrings.keys
If dicSubStrings(vKey) = iMaxCommon Then
iCandidates = iCandidates + 1
strCandidates(iCandidates) = CStr(vKey)
End If
Next vKey
ReDim blnRemove(1 To iCandidates)
iOutCount = iCandidates
'keep only the candidate substrings that are not a substring within another candidate substring
For iSubStr1 = 1 To iCandidates - 1
If Not blnRemove(iSubStr1) Then
For iSubStr2 = 1 To iCandidates - 1
If Not blnRemove(iSubStr2) Then
If Len(strCandidates(iSubStr1)) <> Len(strCandidates(iSubStr2)) Then
If Len(strCandidates(iSubStr1)) > Len(strCandidates(iSubStr2)) Then
If InStr(strCandidates(iSubStr1), strCandidates(iSubStr2)) > 0 Then
blnRemove(iSubStr2) = True
iOutCount = iOutCount - 1
End If
Else
If InStr(strCandidates(iSubStr2), strCandidates(iSubStr1)) > 0 Then
blnRemove(iSubStr1) = True
iOutCount = iOutCount - 1
End If
End If
End If
End If
Next iSubStr2
End If
Next iSubStr1
ReDim strOut(1 To iOutCount)
iOutCount = 0
'add the successful candidates to "strOut"
For iSubStr1 = 1 To iCandidates
If Not blnRemove(iSubStr1) Then
iOutCount = iOutCount + 1
strOut(iOutCount) = strCandidates(iSubStr1)
End If
Next iSubStr1
'convert "iMaxCommon" (pairwise counts) to number of cells (iMax) by solving the formula:
'(iMax ^ 2 - iMax) / 2 = iMaxCommon
iMax = ((8 * iMaxCommon + 1) ^ 0.5 + 1) / 2
CommonString = iMax & ": " & Join(strOut, strDelimiter)
End Function
Private Function Sim2Strings(str1 As String, str2 As String, iMinLen As Integer) As String()
'Returns a list of unique substrings common to both "str1" and "str2" that
' have a length of at least "iMinLen".
Dim dicInList As Object
Dim iCharFrom As Integer
Dim iLen1 As Integer
Dim iSearchLen As Integer
Dim iSubStr As Integer
Dim strCurr As String
Dim strList() As String
Dim vKey As Variant
iLen1 = Len(str1)
Set dicInList = CreateObject("Scripting.Dictionary")
'add common substrings to the "dicInList" dictionary
For iCharFrom = 1 To iLen1 - iMinLen + 1
For iSearchLen = iMinLen To iLen1 - iCharFrom + 1
strCurr = Mid(str1, iCharFrom, iSearchLen)
If InStr(str2, strCurr) = 0 Then
Exit For
Else
If Not dicInList.Exists(strCurr) Then
dicInList.Add strCurr, 0
End If
End If
Next iSearchLen
Next iCharFrom
If dicInList.Count = 0 Then
ReDim strList(0)
Else
ReDim Preserve strList(1 To dicInList.Count)
'output the keys in the "dicInList" dictionary to the "strList" array
For Each vKey In dicInList.keys
iSubStr = iSubStr + 1
strList(iSubStr) = vKey
Next vKey
End If
Sim2Strings = strList
End Function

Resources