I have a list containing sub-lists. If the differences between sub-lists are less than 0.1, I want to group these sub-lists.
import numpy as np
def difference(A, B):
difference = []
zip_object = zip(sorted(A), sorted(B))
for l1, l2 in zip_object:
difference.append(abs(l1-l2))
sum_ = 0
for i in difference:
sum_ += i
if round(sum_, 5) <=0.1:
return True
return False
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
AAA = []
for i in range(len(aaa)):
a = [i]
for j in range(len(aaa)):
if i < j and difference(aaa[i], aaa[j])==True:
a.append(j)
AAA.append(a)
print(AAA)
My code yields:
[[0, 1, 4], [1, 4], [2], [3, 5], [4], [5]]
But I want the result like this
[[0, 1, 4], [3, 5]]
As long as you can import numpy, here is a short solution that uses numpy
import numpy as np
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
# sort each row for calculating differences
aaa = np.sort(aaa,axis=1)
# calculate the difference
diff = np.sum(np.abs(aaa[:,None,:] - aaa),axis=2)
# don't need an item with itself
np.fill_diagonal(diff,1e5)
# find the pairs of rows that have a small difference
locs = np.unique(np.sort(np.where(diff <= .1 ),axis=0),axis=1).T
# get the results
res = {}
redundant = []
flattened_list = []
for pair in locs:
if pair[0] not in res.keys() and pair[0] not in redundant:
res[pair[0]] = [pair[1]]
redundant.extend(pair)
elif pair[0] in res.keys():
if pair[1] not in res[pair[0]]:
res[pair[0]].append(pair[1])
flattened_list = [[key,*val] for key,val in res.items()]
I've created a verbose program that you can adjust to your needs.
def new_difference(list1, list2):
list_length = len(list1)
list1 = sorted(list1)
list2 = sorted(list2)
total = 0
for i in range(list_length):
total += abs(list1[i] - list2[i])
return round(total, 5) <= 0.1;
def add_good_diff():
# if 0 matches with 1, create dictionary 0: [1]
# if 0 matches with 4, add to the list like so 0: [1, 4]
if not i in good_diff:
good_diff[i] = [j]
else:
good_diff[i].append(j)
def proceed_with_diff(i, j):
# let's say 0'th list matches with 1 and 4
# when we get to the next list, we don't want to compare 1 and 4
# so, check each list in good_diff. If found, return False
# which means, skip matching i and j
for item in good_diff:
if i in good_diff[item] and j in good_diff[item]:
print(f"{i} and {j} already diff'ed successfully")
return False
return True
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
sets = len(aaa)
good_diff = {} # stores {0: {1, 4}, 3: {5}} - successful matches
final_list = [] # is used to flatten output to [[0, 1, 4], [3, 5]]
# starts with 0'th item
for i in range(0, sets):
# compares 0'th item with 1..5
for j in range(i+1, sets):
print(f'Matching {i} and {j}')
# if i and j have been compared already, don't compare them again
# proceed_with_diff returns True a match has not been done before
if proceed_with_diff(i, j):
# if diff is within accepted value, add it to the dictionary
if new_difference(aaa[i], aaa[j]):
print(f'{i} matches {j}. Adding to good_diff')
add_good_diff()
# flatten the dictionary
# {0: [1, 4]} will become [0, 1, 4]
for item in good_diff:
final_list.append([item] + good_diff[item])
print(final_list)
When you run that, you will see the result:
[[0, 1, 4], [3, 5]]
Give it a shot.
I want to split a list (float or integer) according to the following conditions:
Splitting the list into all possible subsamples.
No duplication.
A unit sample cannot be a subsample.
I have what splits a list into equal sizes by giving the number of subsamples.
The code I have laid my hand on which worked but does not give me what I want
import numpy as np
x = [1,2,3,4,5,6,7,8,9,10]
l = np.array_split(x,3)
output
[[1, 2, 3, 4], [5, 6, 7], [8, 9, 10]]
I desire to have a list of subsample of all possibilities without duplication. that is, a combination of all unique possibilities when the list is split into 2, 3, 4, etc (no two or more subsamples will have the same element)).
I do not what to specify chunk number so that it will not be limited to such number.
Here is what I did manually
From the following series [1,2,3,4,5,6,7,8,9,10] I sliced it into all possible blocks as follows:
when splitting into 2
[1][2,3,4,5,6,7,8,9,10]
[1,2][3,4,5,6,7,8,9,10]
[1,2,3][4,5,6,7,8,9,10]
[1,2,3,4][5,6,7,8,9,10]
[1,2,3,4,5][6,7,8,9,10]
[1,2,3,4,5,6][7,8,9,10]
[1,2,3,4,5,6,7,8][9,10]
[1,2,3,4,5,6,7,8,9][10]
when splitting into 3
[1][2][3,4,5,6,7,8,9,10]
[1][2,3][4,5,6,7,8,9,10]
[1][2,3,4][5,6,7,8,9,10]
[1][2,3,4,5][6,7,8,9,10]
[1][2,3,4,5,6][7,8,9,10]
[1][2,3,4,5,6,7][8,9,10]
[1][2,3,4,5,6,7,8][9,10]
[1][2,3,4,5,6,7,8,9][10]
[1,2][3][4,5,6,7,8,9,10]
[1,2][3,4][5,6,7,8,9,10]
[1,2][3,4,5][6,7,8,9,10]
[1,2][3,4,5,6][7,8,9,10]
[1,2][3,4,5,6,7][8,9,10]
[1,2][3,4,5,6,7,8][9,10]
[1,2][3,4,5,6,7,8,9][10]
[1,2,3][4][5,6,7,8,9,10]
[1,2,3][4,5][6,7,8,9,10]
[1,2,3][4,5,6][7,8,9,10]
[1,2,3][4,5,6,7][8,9,10]
[1,2,3][4,5,6,7,8][9,10]
[1,2,3][4,5,6,7,8,9][10]
[1,2,3,4][5][6,7,8,9,10]
[1,2,3,4][5,6][7,8,9,10]
[1,2,3,4][5,6,7][8,9,10]
[1,2,3,4][5,6,7,8][9,10]
[1,2,3,4][5,6,7,8,9][10]
[1,2,3,4,5][6][7,8,9,10]
[1,2,3,4,5][6,7][8,9,10]
[1,2,3,4,5][6,7,8][9,10]
[1,2,3,4,5][6,7,8,9][10]
[1,2,3,4,5,6][7][8,9,10]
[1,2,3,4,5,6][7,8][9,10]
[1,2,3,4,5,6][7,8,9][10]
when splitting into 4
[1,2,3,4,5,6,7][8][9,10]
[1,2,3,4,5,6,7][8,9][10]
[1,2,3,4,5,6,7,8][9][10]
After all possible splitting into blocks, I removed all the single digit blocks and also removed all duplicated blocks.
[2,3,4,5,6,7,8,9,10]
[1,2,3,4,5,6,7,8,9]
[3,4,5,6,7,8,9,10]
[2,3]
[2,3,4]
[2,3,4,5]
[2,3,4,5,6]
[2,3,4,5,6,7]
[2,3,4,5,6,7,8]
[2,3,4,5,6,7,8,9]
[4,5,6,7,8,9,10]
[3,4]
[3,4,5]
[3,4,5,6]
[3,4,5,6,7]
[3,4,5,6,7,8]
[1,2][3,4,5,6,7,8,9]
[5,6,7,8,9,10]
[4,5]
[4,5,6]
[4,5,6,7]
[4,5,6,7,8]
[1,2,3][4,5,6,7,8,9]
[6,7,8,9,10]
[5,6]
[5,6,7]
[5,6,7,8]
[1,2,3,4][5,6,7,8,9]
[1,2,3,4,5][7,8,9,10]
[6,7]
[6,7,8]
[6,7,8,9]
[8,9,10]
[7,8]
[1,2,3,4,5,6][7,8,9]
[9,10]
[1,2,3,4,5,6,7][8,9]
[1,2,3,4,5,6,7,8]
Here, I gather all the possible chunks together.
This is what I desire as an output.
[[2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9], [3,4,5,6,7,8,9,10], [2,3], [2,3,4], [2,3,4,5], [2,3,4,5,6], [2,3,4,5,6,7], [2,3,4,5,6,7,8], [2,3,4,5,6,7,8,9], [4,5,6,7,8,9,10],[3,4], [3,4,5], [3,4,5,6], [3,4,5,6,7], [3,4,5,6,7,8], [1,2], [3,4,5,6,7,8,9], [5,6,7,8,9,10], [4,5], [4,5,6], [4,5,6,7], [4,5,6,7,8], [1,2,3], [4,5,6,7,8,9], [6,7,8,9,10], [5,6], [5,6,7], [5,6,7,8], [1,2,3,4], [5,6,7,8,9], [1,2,3,4,5], [7,8,9,10], [6,7], [6,7,8], [6,7,8,9], [8,9,10], [7,8], [1,2,3,4,5,6], [7,8,9], [9,10], [1,2,3,4,5,6,7], [8,9], [1,2,3,4,5,6,7,8]]
It looks to me like the problem reduces to finding all substrings of length 2 or greater that leave at least one fragment of length 1. In other words, you won't have to enumerate every partition to find them.
def parts(thing):
result = []
for i in range(len(thing)):
for j in range(i + 1, len(thing) + 1):
if 1 < len(thing[i:j]) < len(thing):
result.append(thing[i:j])
return result
res = parts([*range(1,11)])
# res
# [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6],
# [1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8],
# [1, 2, 3, 4, 5, 6, 7, 8, 9], [2, 3], [2, 3, 4], [2, 3, 4, 5],
# [2, 3, 4, 5, 6], [2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 8],
# [2, 3, 4, 5, 6, 7, 8, 9], [2, 3, 4, 5, 6, 7, 8, 9, 10], [3, 4], [3, 4, 5],
# [3, 4, 5, 6], [3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8], [3, 4, 5, 6, 7, 8, 9],
# [3, 4, 5, 6, 7, 8, 9, 10], [4, 5], [4, 5, 6], [4, 5, 6, 7], [4, 5, 6, 7, 8],
# [4, 5, 6, 7, 8, 9], [4, 5, 6, 7, 8, 9, 10], [5, 6], [5, 6, 7], [5, 6, 7, 8],
# [5, 6, 7, 8, 9], [5, 6, 7, 8, 9, 10], [6, 7], [6, 7, 8], [6, 7, 8, 9],
# [6, 7, 8, 9, 10], [7, 8], [7, 8, 9], [7, 8, 9, 10], [8, 9], [8, 9, 10],
# [9, 10]]