I have two Strings in different cells for example
ADSGPINDTDANPR
RGTELDDGIQADSGPINDTDANPRY VPGYY ESQSDDPHFHEK
Also character sequences can have gaps for similar sequences like RGX in following example
LADNS TFDDDLDDLTPSKMKPANFKGD
RSLA FDDDLDDLTPSRGXKMKPANFKGDYG
What I want to do is Highlight both the sequences as shown in above example in Bold And Italic but in color using VBA code.
Edit :
As per the first answer of #milo5m
Sequence is highlighted as follows
MNTVEEVDSEEDEESAP GSV GSMPSTGSAKYYTNRVPFDMIA
EQPMNTVEEVDSEEDEESAPA
But desired result should be like this:
MNTVEEVDSEEDEESAP GSV GSMPSTGSAKYYTNRVPFDMIA
EQPMNTVEEVDSEEDEESAPA
Edit 2 :
Currently answer of #milo5m highlights single characters as shown in below examples
SKPERYSG
TAPGEQAQD
SKPERYSG
AQD QKLAPSE
In above examples no sequence should have been highlighted...
In other words, It should only highlight the Single characters when there is space before the Character, otherwise it should not highlight single characters between sequences.
It's really nice that we have these two as a reference
https://www.sciencedirect.com/science/article/pii/S0890540114000765
https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
Here is the main function which is returning all subsequences forming the LCS (if you join all the keys you get the LCS in reverse subsequence form).
Function returns a dictionary where keys are subsequences, and values are arrays with 2 elements (position of subsequence in seqA and position of subsequence in seqB).
Function GetLCSSubSequenceDict(seqA As String, seqB As String) As Object
Set GetLCSSubSequenceDict = Nothing
Dim i As Long, n As Long
n = Len(seqA)
If n = 0 Then: Exit Function
Dim j As Long, m As Long
m = Len(seqB)
If m = 0 Then: Exit Function
Dim T() As Long
ReDim T(0 To n, 0 To m)
'Building up table
For i = 1 To n
For j = 1 To m
If Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
'bitwise max
T(i, j) = T(i - 1, j) Xor ((T(i - 1, j) Xor T(i, j - 1)) And --(T(i - 1, j) < T(i, j - 1)))
Else
T(i, j) = T(i - 1, j - 1) + 1
End If
Next j
Next i
Dim subseqKey As String
Dim subseqABDict As Object
Set subseqABDict = CreateObject("Scripting.Dictionary")
'Backtracking and building up dict of subsequences
'key = subsequence
'value = array(starting pos of the key in seqA,starting pos of the key in seqB)
i = n
j = m
Do While (i > 0 And j > 0)
If Not Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
subseqKey = Mid$(seqA, i, 1) & subseqKey
i = i - 1
j = j - 1
ElseIf T(i - 1, j) > T(i, j - 1) Then
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i + 1, j + 1)
subseqKey = vbNullString
End If
i = i - 1
Else
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i + 1, j + 1)
subseqKey = vbNullString
End If
j = j - 1
End If
Loop
If subseqKey <> vbNullString Then
subseqABDict(subseqKey) = Array(i + 1, j + 1)
End If
Set GetLCSSubSequenceDict = subseqABDict
Set subseqABDict = Nothing
End Function
Time complexity of the function is O ( Len(seqA) * Len(seqB) ), for those who are interested.
Following is a show case of setting font properties on 2 ranges seqA and seqB.
Sub test()
Dim seqA As Range
Dim seqB As Range
Set seqA = Range("A4")
Set seqB = Range("B4")
Dim fontColor As Long
fontColor = RGB(84, 84, 84)
Dim subseqKey As Variant
Dim lcsSubSequenceDict As Object
Set lcsSubSequenceDict = GetLCSSubSequenceDict(seqA.Value2, seqB.Value2)
'gives subsequences in reversed order, since we used backtracking
'MsgBox Join(lcsSubSequenceDict.keys())
If lcsSubSequenceDict Is Nothing Then: Exit Sub
For Each subseqKey In lcsSubSequenceDict
With seqA.Characters(lcsSubSequenceDict(subseqKey)(0), Len(subseqKey)).Font
.color = fontColor
.Bold = True
.Italic = True
End With
With seqB.Characters(lcsSubSequenceDict(subseqKey)(1), Len(subseqKey)).Font
.color = fontColor
.Bold = True
.Italic = True
End With
Next subseqKey
Set lcsSubSequenceDict = Nothing
Set seqA = Nothing
Set seqB = Nothing
End Sub
Edit:
(1) Fixed - Backtracking showing positions in sequences further to the right for multiple candidates
(2) Main Function returns Collection now (was returning Dictionary)
Function GetLCSSubSequenceCollection(seqA As String, seqB As String) As Collection
Set GetLCSSubSequenceCollection = Nothing
Dim i As Long, n As Long
n = Len(seqA)
If n = 0 Then: Exit Function
Dim j As Long, m As Long
m = Len(seqB)
If m = 0 Then: Exit Function
Dim T() As Long
ReDim T(0 To n, 0 To m)
'Building up table
For i = 1 To n
For j = 1 To m
If Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
'bitwise max
T(i, j) = T(i - 1, j) Xor ((T(i - 1, j) Xor T(i, j - 1)) And --(T(i - 1, j) < T(i, j - 1)))
Else
T(i, j) = T(i - 1, j - 1) + 1
End If
Next j
Next i
Dim subseqKey As String
Dim subseqABCollection As Collection
Set subseqABCollection = New Collection
'Backtracking and building up collection of subsequences
'value = array(subsequence, starting pos of the key in seqA,starting pos of the key in seqB)
i = n
j = m
Do While (i > 0 And j > 0)
If Not Mid$(seqA, i, 1) <> Mid$(seqB, j, 1) Then
subseqKey = Mid$(seqA, i, 1) & subseqKey
i = i - 1
j = j - 1
ElseIf T(i - 1, j) > T(i, j - 1) Then
If subseqKey <> vbNullString Then
subseqABCollection.Add Array(subseqKey, i + 1, j + 1)
subseqKey = vbNullString
End If
i = i - 1
Else
If subseqKey <> vbNullString Then
subseqABCollection.Add Array(subseqKey, i + 1, j + 1)
subseqKey = vbNullString
End If
j = j - 1
End If
Loop
If subseqKey <> vbNullString Then
subseqABCollection.Add Array(subseqKey, i + 1, j + 1)
End If
If subseqABCollection.Count = 0 Then: Exit Function
'fix backtracking showing positions in arrays further to the right for multiple candidates
'using inStr to chack starting from prev position etc
Dim prevSubseqPosA As Long, prevSubseqPosB As Long
prevSubseqPosA = 1
prevSubseqPosB = 1
Set GetLCSSubSequenceCollection = New Collection
For i = subseqABCollection.Count To 1 Step -1
subseqKey = subseqABCollection.Item(i)(0)
prevSubseqPosA = InStr(prevSubseqPosA, seqA, subseqKey)
prevSubseqPosB = InStr(prevSubseqPosB, seqB, subseqKey)
GetLCSSubSequenceCollection.Add Array(subseqKey, prevSubseqPosA, prevSubseqPosB)
prevSubseqPosA = prevSubseqPosA + Len(subseqKey)
prevSubseqPosB = prevSubseqPosB + Len(subseqKey)
Next
Set subseqABCollection = Nothing
End Function
Sub testCollection()
Dim lcs As String
Dim seqA As Range
Dim seqB As Range
Set seqA = Range("A4")
Set seqB = Range("B4")
Dim fontColor As Long
fontColor = RGB(84, 84, 84)
Dim lcsSubSequenceItem As Variant
Dim lcsSubSequenceCollection As Collection
Set lcsSubSequenceCollection = GetLCSSubSequenceCollection(seqA.Value2, seqB.Value2)
If lcsSubSequenceCollection Is Nothing Then: Exit Sub
For Each lcsSubSequenceItem In lcsSubSequenceCollection
With seqA.Characters(lcsSubSequenceItem(1), Len(lcsSubSequenceItem(0))).Font
.color = fontColor
.Bold = True
.Italic = True
End With
With seqB.Characters(lcsSubSequenceItem(2), Len(lcsSubSequenceItem(0))).Font
.color = fontColor
.Bold = True
.Italic = True
End With
lcs = lcs & lcsSubSequenceItem(0)
Next lcsSubSequenceItem
MsgBox lcs & " [ LEN = " & Len(lcs) & " ]"
Set lcsSubSequenceCollection = Nothing
Set seqA = Nothing
Set seqB = Nothing
End Sub
Tough challenge, and I'm not sure if it's that feasible with Excel alone. Assuming that:
You will not allow the 1st entry to have gaps;
You allow for 0+ gaps in between in the 2nd entry;
You are looking for the longest match between both entries;
You have ms365;
You may try the below answer that I based of on a formula first, see the below screenshot:
Formula in C1:
=LET(x,SCAN(,UNIQUE(TOCOL(MID(A1,SEQUENCE(LEN(A1)),SEQUENCE(1,LEN(A1))))),LAMBDA(a,b,TEXTJOIN("*",,MID(b,SEQUENCE(1,LEN(b)),1)))),y,SEARCH(x,B1),z,SORTBY(HSTACK(x,y),LEN(x)*(ISNUMBER(y)),-1),SUBSTITUTE(TAKE(FILTER(z,ISNUMBER(INDEX(z,,2))),1,1),"*",))
The above will identify the longest substring that has a match with 0+ gaps in between. This is going to be the input to the below macro:
Sub Test()
Dim ws As Worksheet, lr As Long, x As Long, y As Long, z As Long, a As Long, arr As Variant, s As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
lr = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
arr = ws.Range("A1:C" & lr)
For x = LBound(arr) To UBound(arr)
s = arr(x, 3)
'Format column A:A
ws.Cells(x, 1).Characters(InStr(1, ws.Cells(x, 1).Value, s), Len(s)).Font.Bold = True
ws.Cells(x, 1).Characters(InStr(1, ws.Cells(x, 1).Value, s), Len(s)).Font.Italic = True
'Format column B:B
z = 0
For y = 1 To Len(s)
z = InStr(z + 1, ws.Cells(x, 2).Value, Mid(s, y, 1))
ws.Cells(x, 2).Characters(z, 1).Font.Bold = True
ws.Cells(x, 2).Characters(z, 1).Font.Italic = True
Next
Next
End Sub
The results look like:
In Excel 365, I have data in this format:
Or, in text:
1,2,3,7 A
4 B
5 C
6, 8 D
And I'm trying to split the data so it becomes this:
Or, in text
1 A
2 A
3 A
4 B
5 C
6 D
7 A
8 D
The leftmost row is always composed by numbers separated by comma or a single number. The right row can be any data.
The following VBA code will do most of what you want:
Sub ExpandRows()
Dim R As Range
Dim Rw As Range
Dim I As Integer
Dim J As Integer
Dim K As Integer
Dim S As String
Dim Tokens(1 To 1000) As String
Dim NTokens As Integer
Const Delim As String = ","
Dim StartSize As Integer
Dim TopCell As Range
Dim BotCell As Range
Set R = Selection
Set TopCell = R.Cells(1, 1)
Set BotCell = R.Cells(R.Rows.Count, 1)
StartSize = R.Rows.Count
For I = StartSize To 1 Step -1
S = R(I, 1)
If (S <> "") Then
J = 0
NTokens = 0
Do
K = InStr(J + 1, S, Delim)
If (K = 0) Then
NTokens = NTokens + 1
Tokens(NTokens) = Mid$(S, J + 1, Len(S) - J)
Else
NTokens = NTokens + 1
Tokens(NTokens) = Mid$(S, J + 1, (K - J - 1))
J = K
End If
Loop Until (K = 0)
End If
If (NTokens > 1) Then
For J = NTokens To 2 Step -1
If (Tokens(J) <> "") Then
Set Rw = R.Cells(I, 1).EntireRow
Call Rw.Select
Call Rw.Copy
Call R.Cells(I + 1, 1).EntireRow.Select
Call Rw.Insert(xlDown)
If (I = 1) Then
Set TopCell = TopCell.Cells(0, 1)
Set R = Range(TopCell, BotCell)
End If
Call R.Select
Call R.Cells(I + 1, 1).Select
R(I + 1, 1) = Tokens(J)
End If
Next J
R(I, 1) = Tokens(1)
End If
Next I
End Sub
This code will split the cells and create new rows with a single entry.
To use it, select the first column and execute the method.
After that, all you have to do is sort on the first column.
I have this code that looks at column A and loops through to create an array to paste to another destination, but I want to manipulate it to loop through sets of 250 cells and create a concatenated array and print it to cells B1. After that set of 250, I go cells a251-a501, and so forth until I reach the end of the list and have each set of 250 concatenated ID's (separated by a ";") to print to the next destination row (B1>B2>B3, etc..)
Sub JC_Fill()
Dim varArray() As Variant
Dim x As Long, i As Long
i = 0
x = 1
With ThisWorkbook.Worksheets("Sheet1").UsedRange.Rows.Count
ReDim varArray(1) 'resize array
Do Until Cells(x, 1).Value = ""
If Cells(x, 1) <> "" Then
varArray(i) = Cells(x, 1).Value
i = i + 1
ReDim Preserve varArray(i)
End If
x = x + 1
Loop
ReDim Preserve varArray(i - 1)
End With
ThisWorkbook.Worksheets("Sheet1").Range("B1").Value = varArray
End Sub
How could I edit my Do While/Loop to repeat the process every 250 cells and then concatenate the array to one cell separated by ; and then offset the next batch until I have no more ID's to cycle through?
Try changing your code this way:
Sub JC_Fill()
Dim OutString
Dim x As Long, i As Long
Dim out_row As Long
i = 0
x = 1
out_row = 1
With ThisWorkbook.Worksheets("Sheet1").UsedRange.Rows.Count
OutString = ""
Do Until Cells(x, 1).Value = ""
If Cells(x, 1) <> "" Then
If (x > 1) Then OutString = OutString & ";"
OutString = OutString & Cells(x, 1).Value
End If
If (x Mod 250) = 0 Then
ThisWorkbook.Worksheets("Sheet1").Range("B" & out_row).Value = OutString
OutString = ""
out_row = out_row + 1
End If
x = x + 1
Loop
End With
ThisWorkbook.Worksheets("Sheet1").Range("B" & out_row).Value = OutString
End Sub
For interest, you can do this without looping each of the 250 cells.
Sub x()
Dim n As Long, v As Variant, r As Range, n2 As Long
n = 5 '250 for you
n2 = Range("A" & Rows.Count).End(xlUp).Row
Set r = Range("A1").Resize(n)
Do While Len(r(1)) > 0
If n2 - r(1).Row < n Then Set r = r.Resize(n2 - r(1).Row + 1)
If r.Count = 1 Then
v = r.Value
Else
v = Join(Application.Transpose(r), ";")
End If
Range("B" & Rows.Count).End(xlUp)(2).Value = v
Set r = r.Offset(n)
Loop
End Sub
Column A contains the labels or outcome value, Columns B-N contain varying lengths of comma separated values, but range for each column is the same (i.e., 1-64). The goal is to covert to a new table with Column A representing the value range (1-64) and Columns B-N the labels/outcome from the original table.
A semi-related solution was sought here, but without use of macros.
I will let you to modify this code,
Sub splitThem()
Dim i As Long, j As Long, k As Long, x As Long
x = 1
Sheets.Add.Name = "newsheet"
For j = 2 To Cells(1, Columns.Count).End(xlToLeft).Column
For i = 1 To Cells(Rows.Count, 2).End(xlUp).Row
If Cells(i, j) <> "" Then
For k = 1 To Len(Cells(i, j)) - Len(Replace(Cells(i, j), ",", "")) + 1
Sheets("newsheet").Cells(x, j) = Cells(i, 1)
x = x + 1
Next k
End If
Next i
x = 1
Next j
End Sub
Try this code.
Sub test()
Dim vDB, vR()
Dim vSplit, v As Variant
Dim Ws As Worksheet
Dim i As Long, n As Long, j As Integer, c As Integer
vDB = Range("a2").CurrentRegion
n = UBound(vDB, 1)
c = UBound(vDB, 2)
ReDim vR(1 To 64, 1 To c)
For i = 1 To 64
vR(i, 1) = i
Next i
For i = 2 To n
For j = 2 To c
vSplit = Split(vDB(i, j), ",")
For Each v In vSplit
vR(v, j) = vDB(i, 1)
Next v
Next j
Next i
Set Ws = Sheets.Add '<~~ replace your sheet : Sheets(2)
With Ws
For i = 1 To c
.Range("b1")(1, i) = "COND" & i
Next i
.Range("a2").Resize(64, c) = vR
End With
End Sub
I want this code to search the column name called((attribute value1,attribute value2..N)
If that column contains fraction values, it should convert it to decimal. I'm using this macros(VBA).
The code is working but it is converting only one column(attribute value1).
It will take more time because I have multiple columns(attribute value2...N) that have fraction values.
Please help me out I am struck here.
Sub deci()
Dim LR As Long
Dim Dash As Long
Dim Whole As Double
Dim lngDataColumn As Long
Dim pi
lngDataColumn = 4
Sheets("Sheet3").Select
LR = Cells(Rows.Count, lngDataColumn).End(xlUp).row
For r = 2 To LR
s = Cells(r, lngDataColumn)
arr = Split(s, ",")
For i = LBound(arr) To UBound(arr)
Whole = 0
P = InStr(arr(i), " IN")
If P > 0 Then
Worksheet = (Left((arr(i)), P - 1))
Else
Worksheet = arr(i)
End If
Dash = InStr(Worksheet, "-")
If Dash > 0 Then
Whole = Frac(Left(Worksheet, Dash - 1))
Worksheet = Mid(Worksheet, Dash + 1)
End If
af = Right(arr(i), Len(arr(i)) - P + 1)
evfrac = Whole + Left(CStr(Evaluate(Worksheet)), 5)
' evfrac = Whole + Format(Evaluate(frac), "0.###")
ss = ss & evfrac & af & ", "
Next i
Cells(r, lngDataColumn) = Left(ss, Len(ss) - 2)
ss = ""
Next r
End Sub
Function Frac(ByVal X As String) As Double
Dim P As Integer, N As Double, Num As Double, Den As Double
X = Trim$(X)
P = InStr(X, "/")
If P = 0 Then
N = Val(X)
Else
Den = Val(Mid$(X, P + 1))
If Den = 0 Then Err.Raise 11 ' Divide by zero
X = Trim$(Left$(X, P - 1))
P = InStr(X, " ")
If P = 0 Then
Num = Val(X)
Else
Num = Val(Mid$(X, P + 1))
N = Val(Left$(X, P - 1))
End If
End If
If Den <> 0 Then
N = N + Num / Den
End If
Frac = N
End Function
The reason it's only doing one column is because that's exactly what your telling it to do with this section of the code:
lngDataColumn = 4
Sheets("Sheet3").Select
LR = Cells(Rows.Count, lngDataColumn).End(xlUp).row
Because your setting lngDataColumn as a fixed figure, your code is only executed on column 4. If you want to do more columns as a loop, you need to increment this value in the same maner you are incrementing r in your for loop.
For example:
lngDataColumn = 10
Sheets("Sheet3").Select
For 4 To lngDataColumn
LR = Cells(Rows.Count, lngDataColumn).End(xlUp).row
'Rest of code
Next lngDataColumn