How to find a column in a ndarray - python-3.x

Say i have a array
x = array([[ 0, 1, 2, 5],
[ 3, 4, 5, 5],
[ 6, 7, 8, 5],
[ 9, 10, 11, 5]])
I need to find the position/index of [3, 4, 5, 5]. In this case, it should return 1.

Create an array y that has all rows equal to the one you are looking for. Then, do an elementwise comparison x == y and find the rows where you get all True.
import numpy as np
x1 = np.array([[0, 1, 2, 5], [3, 4, 5, 5],
[6, 7, 8, 5], [9, 10, 11, 5]])
y1 = np.array([[3, 4, 5, 5]] * 4)
print(np.where(np.all(x1 == y1, axis=1))[0]) # => [1]
This approach returns an array of the indices where the desired row appears.
y2 = np.array([[1, 1, 1, 1]] * 4)
print(np.where(np.all(x1 == y2, axis=1))[0]) # => []
x2 = np.array([[3, 4, 5, 5], [3, 4, 5, 5],
[6, 7, 8, 5], [9, 10, 11, 5]])
print(np.where(np.all(x2 == y1, axis=1))[0]) # => [0 1]

Related

Select on second dimension on a 3D pytorch tensor with an array of indexes

I am kind of new with numpy and torch and I am struggling to understand what to me seems the most basic operations.
For instance, given this tensor:
A = tensor([[[6, 3, 8, 3],
[1, 0, 9, 9]],
[[4, 9, 4, 1],
[8, 1, 3, 5]],
[[9, 7, 5, 6],
[3, 7, 8, 1]]])
And this other tensor:
B = tensor([1, 0, 1])
I would like to use B as indexes for A so that I get a 3 by 4 tensor that looks like this:
[[1, 0, 9, 9],
[4, 9, 4, 1],
[3, 7, 8, 1]]
Thanks!
Ok, my mistake was to assume this:
A[:, B]
is equal to this:
A[[0, 1, 2], B]
Or more generally the solution I wanted is:
A[range(B.shape[0]), B]
Alternatively, you can use torch.gather:
>>> indexer = B.view(-1, 1, 1).expand(-1, -1, 4)
tensor([[[1, 1, 1, 1]],
[[0, 0, 0, 0]],
[[1, 1, 1, 1]]])
>>> A.gather(1, indexer).view(len(B), -1)
tensor([[1, 0, 9, 9],
[4, 9, 4, 1],
[3, 7, 8, 1]])

Transponse a list of lists from 2nd element in python

list_of_lists = [[1, 2, 3, 4], [1, 5, 6, 7], [1, 8, 9, 10]]
I would like to get to:
transposed_list = [[1, 2, 5, 8], [1, 3, 6, 9], [1, 4, 7, 10]]
In other words, only transpose from the 2nd element in the list, keeping the first element in place.
Try:
list_of_lists = [[1, 2, 3, 4], [1, 5, 6, 7], [1, 8, 9, 10]]
out = [
[list_of_lists[i][0]] + list(l)
for i, l in enumerate(zip(*(l[1:] for l in list_of_lists)))
]
print(out)
Prints:
[[1, 2, 5, 8], [1, 3, 6, 9], [1, 4, 7, 10]]

How to classify sub-lists of a list under a specific condition

I have a list containing sub-lists. If the differences between sub-lists are less than 0.1, I want to group these sub-lists.
import numpy as np
def difference(A, B):
difference = []
zip_object = zip(sorted(A), sorted(B))
for l1, l2 in zip_object:
difference.append(abs(l1-l2))
sum_ = 0
for i in difference:
sum_ += i
if round(sum_, 5) <=0.1:
return True
return False
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
AAA = []
for i in range(len(aaa)):
a = [i]
for j in range(len(aaa)):
if i < j and difference(aaa[i], aaa[j])==True:
a.append(j)
AAA.append(a)
print(AAA)
My code yields:
[[0, 1, 4], [1, 4], [2], [3, 5], [4], [5]]
But I want the result like this
[[0, 1, 4], [3, 5]]
As long as you can import numpy, here is a short solution that uses numpy
import numpy as np
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
# sort each row for calculating differences
aaa = np.sort(aaa,axis=1)
# calculate the difference
diff = np.sum(np.abs(aaa[:,None,:] - aaa),axis=2)
# don't need an item with itself
np.fill_diagonal(diff,1e5)
# find the pairs of rows that have a small difference
locs = np.unique(np.sort(np.where(diff <= .1 ),axis=0),axis=1).T
# get the results
res = {}
redundant = []
flattened_list = []
for pair in locs:
if pair[0] not in res.keys() and pair[0] not in redundant:
res[pair[0]] = [pair[1]]
redundant.extend(pair)
elif pair[0] in res.keys():
if pair[1] not in res[pair[0]]:
res[pair[0]].append(pair[1])
flattened_list = [[key,*val] for key,val in res.items()]
I've created a verbose program that you can adjust to your needs.
def new_difference(list1, list2):
list_length = len(list1)
list1 = sorted(list1)
list2 = sorted(list2)
total = 0
for i in range(list_length):
total += abs(list1[i] - list2[i])
return round(total, 5) <= 0.1;
def add_good_diff():
# if 0 matches with 1, create dictionary 0: [1]
# if 0 matches with 4, add to the list like so 0: [1, 4]
if not i in good_diff:
good_diff[i] = [j]
else:
good_diff[i].append(j)
def proceed_with_diff(i, j):
# let's say 0'th list matches with 1 and 4
# when we get to the next list, we don't want to compare 1 and 4
# so, check each list in good_diff. If found, return False
# which means, skip matching i and j
for item in good_diff:
if i in good_diff[item] and j in good_diff[item]:
print(f"{i} and {j} already diff'ed successfully")
return False
return True
aaa = [[1.001, 2, 5, 3, 5, 4, 6, 9, 10],
[2, 5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 3, 5, 4, 6.001, 9, 10.2, 1],
[2, 5, 2.999, 5, 4, 6.001, 9, 10, 1],
[2, 5.5, 2.999, 5, 4, 6.001, 9, 10.2, 1]]
sets = len(aaa)
good_diff = {} # stores {0: {1, 4}, 3: {5}} - successful matches
final_list = [] # is used to flatten output to [[0, 1, 4], [3, 5]]
# starts with 0'th item
for i in range(0, sets):
# compares 0'th item with 1..5
for j in range(i+1, sets):
print(f'Matching {i} and {j}')
# if i and j have been compared already, don't compare them again
# proceed_with_diff returns True a match has not been done before
if proceed_with_diff(i, j):
# if diff is within accepted value, add it to the dictionary
if new_difference(aaa[i], aaa[j]):
print(f'{i} matches {j}. Adding to good_diff')
add_good_diff()
# flatten the dictionary
# {0: [1, 4]} will become [0, 1, 4]
for item in good_diff:
final_list.append([item] + good_diff[item])
print(final_list)
When you run that, you will see the result:
[[0, 1, 4], [3, 5]]
Give it a shot.

PyTorch unfold vs as_stride

It seems PyTorch unfold and as_stride are doing the same thing but for the former, you cannot control the tensor output size.
import torch
import torch.nn as nn
x = torch.arange(0, 10)
x1 = x.unfold(0, 3, 1)
x2 = x.as_strided((8,3), (1,1))
print(f'x1 = {x1}')
print(f'x2 = {x2}')
output:
x1 = tensor([[0, 1, 2],
[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6],
[5, 6, 7],
[6, 7, 8],
[7, 8, 9]])
x2 = tensor([[0, 1, 2],
[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6],
[5, 6, 7],
[6, 7, 8],
[7, 8, 9]])
Then is there any situation that you should use unfold instead of as_stride and vice versa?

numpy remove column by different value in batch

I want to ask how numpy remove columns in batch by list.
The value in the list corresponds to the batch is different from each other.
I know this problem can use the for loop to solve, but it is too slow ...
Can anyone give me some idea to speed up?
array (batch size = 3):
[[0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6]]
remove index in the list (batch size = 3)
[[2, 3, 4], [1, 2, 6], [0, 1, 5]]
output:
[[0, 1, 5, 6], [0, 3, 4, 5], [2, 3, 4, 6]]
Assuming the array is 2d, and the indexing removes equal number of elements per row, we can remove items with a boolean mask:
In [289]: arr = np.array([[0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6]]
...: )
In [290]: idx = np.array([[2, 3, 4], [1, 2, 6], [0, 1, 5]])
In [291]: mask = np.ones_like(arr, dtype=bool)
In [292]: mask[np.arange(3)[:,None], idx] = False
In [293]: arr[mask]
Out[293]: array([0, 1, 5, 6, 0, 3, 4, 5, 2, 3, 4, 6])
In [294]: arr[mask].reshape(3,-1)
Out[294]:
array([[0, 1, 5, 6],
[0, 3, 4, 5],
[2, 3, 4, 6]])

Resources