Related
Hi is there any method for apply trasnformation for certain batch?
It means, I want apply trasnformation for just last batch in every epochs.
What I tried is here
import torch
class test(torch.utils.data.Dataset):
def __init__(self):
self.source = [i for i in range(10)]
def __len__(self):
return len(self.source)
def __getitem__(self, idx):
print(idx)
return self.source[idx]
ds = test()
dl = torch.utils.data.DataLoader(dataset = ds, batch_size = 3,
shuffle = False, num_workers = 5)
for i in dl:
print(i)
because I thought that if I could get idx number, it would be possible to apply for certain batchs.
However If using num_workers outputs are
0
1
2
3
964
57
8
tensor([0, 1, 2])
tensor([3, 4, 5])
tensor([6, 7, 8])
tensor([9])
which are not I thought
without num_worker
0
1
2
tensor([0, 1, 2])
3
4
5
tensor([3, 4, 5])
6
7
8
tensor([6, 7, 8])
9
tensor([9])
So the question is
Why idx works so with num_workers?
How can I apply trasnform for certain batchs (or certain idx)?
When you have num_workers > 1, you have multiple subprocesses doing data loading in parallel. So what is likely happening is that there is a race condition for the print step, and the order you see in the output depends on which subprocess goes first each time.
For most transforms, you can apply them on a specific batch simply by calling the transform after the batch has been loaded. To do this just for the last batch, you could do something like:
for batch_idx, batch_data in dl:
# check if batch is the last batch
if ((batch_idx+1) * batch_size) >= len(ds):
batch_data = transform(batch_data)
I found that
class test_dataset(torch.utils.data.Dataset):
def __init__(self):
self.a = [i for i in range(100)]
def __len__(self):
return len(self.a)
def __getitem__(self, idx):
a = torch.tensor(self.a[idx])
#print(idx)
return idx
a = torch.utils.data.DataLoader(
test_dataset(), batch_size = 10, shuffle = False,
num_workers = 10, pin_memory = True)
for i in a:
print(i)
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
tensor([20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
tensor([30, 31, 32, 33, 34, 35, 36, 37, 38, 39])
tensor([40, 41, 42, 43, 44, 45, 46, 47, 48, 49])
tensor([50, 51, 52, 53, 54, 55, 56, 57, 58, 59])
tensor([60, 61, 62, 63, 64, 65, 66, 67, 68, 69])
tensor([70, 71, 72, 73, 74, 75, 76, 77, 78, 79])
tensor([80, 81, 82, 83, 84, 85, 86, 87, 88, 89])
tensor([90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
I need to get this output in my python code:
but I am getting index out of range error like this:
here's my code:
marks = [['john',80, 90, 76, 82],['katy', 50, 55, 70, 65],['sydney',80,
72, 88, 90]]
marks_c = {}
for i in range(len(marks)):
name = marks[i][0]
l = []
for j in range(2,len(marks[i])):
print(marks[j][i])
print(marks_c)
print(marks_c)
what am I doing wrong?
Does this code deliver what you are looking for:
Code:
marks = [['john', 80, 90, 76, 82], ['katy', 50, 55, 70, 65], ['sydney', 80, 72, 88, 90]]
marks_c = {}
for entry in marks:
marks_c[entry.pop(0)] = entry
print(marks_c)
# Upon request of the questioner:
#
# for i in range(len(marks)):
# name = marks[i][0]
# l = []
# for j in range(1,len(marks[i])):
# l.append(marks[i][j])
# marks_c[name] = l
Output:
{'john': [80, 90, 76, 82], 'katy': [50, 55, 70, 65], 'sydney': [80, 72, 88, 90]}
I am trying to generate 2 list with different size consisting with random numbers. I can generate 2 list with random numbers, but how to achieve 2 different length of lists?
import random
list1 = random.sample(xrange(100), 10)
list2 = random.sample(xrange(100), 10)
print(list1)
print(list2)
Need to generate the lists with 2 random different sizes as well, as if both the lists are completely random.
Try the below code. Hope this would help.
If you want to create random number list of two different sizes. Then you can explicitly, pass the size of the list as a second argument, as given below.
import random
list1 = random.sample(xrange(100), 100)
list2 = random.sample(xrange(100), 10)
print(list1)
print(list2)
Ouput will be :
[46, 73, 13, 89, 44, 23, 74, 8, 19, 79, 36, 80, 85, 42, 82, 39, 61, 15, 27, 68, 67, 30, 11, 21, 86, 16, 63, 95, 17, 90, 37, 81, 20, 71, 93, 99, 40, 6, 47, 92, 58, 35, 12, 2, 10, 98, 87, 50, 51, 97, 70, 65, 78, 22, 72, 45, 59, 0, 52, 14, 1, 84, 43, 24, 54, 31, 18, 69, 7, 75, 53, 25, 57, 94, 83, 66, 3, 5, 88, 32, 4, 28, 29, 55, 9, 77, 60, 62, 41, 76, 48, 56, 34, 91, 33, 96, 49, 38, 26, 64]
[82, 58, 74, 61, 21, 77, 53, 35, 44, 59]
Now if you want to randomly decide the size of the list, the pass a random number as a second argument, by using randint function
import random
list1 = random.sample(range(100), random.randint(1,101))
list2 = random.sample(range(100), random.randint(1,101))
print(list1)
print(list2)
Output would be:
[93, 60, 82, 53, 16, 42, 0, 68, 88, 11, 89, 62, 38, 14, 27, 8, 45, 25, 83, 97, 94]
[30, 5, 19, 11, 14, 6, 7, 86, 16, 53, 71, 12, 90, 32]
You can try something like this, which would randomly generate the size between 1 and 10.
import random
list1 = random.sample(range(100), random.randint(1,10))
list2 = random.sample(range(100), random.randint(1,10))
print(list1)
print(list2)
This will generate random length of the lists. Hope it helps !
You need to randomize the second Parameter as well to become lists of random size:
import random
list1 = random.sample(range(100), random.randint(1,10))
list2 = random.sample(range(100), random.randint(1,10))
print(list1)
print(list2)
I receive "ValueError: setting an array element with a sequence" when running. I have tried to turn everything into a numpy array to no avail.
import matplotlib
import numpy as np
from matplotlib import pyplot
X=np.array([
np.array([1,2,3,4,5,6,7]),
np.array([1,2,3,4,5,6,7]),
np.array([1,2,3,4,5,6,6.5,7.5]),
np.array([1,2,3,4,5,6,7,8]),
np.array([1,2,3,4,5,6,7,8,8.5]),
np.array([1,2,3,4,5,6,7,8]),
np.array([1,2,3,4,5,6,7])])
Y=np.array([
np.array([1,1,1,1,1,1,1]),
np.array([2,2,2,2,2,2,2]),
np.array([3,3,3,3,3,3,2.5,3]),
np.array([4,4,4,4,4,4,4,4]),
np.array([5,5,5,5,5,5,5,5,5]),
np.array([6,6,6,6,6,6,6,6]),
np.array([7,7,7,7,7,7,7])])
Z= np.array([
np.array([4190, 4290, 4200, 4095, 4181, 4965, 4995]),
np.array([4321, 4389, 4311, 4212, 4894, 4999, 5001]),
np.array([4412, 4442, 4389, 4693, 4899, 5010, 5008, 4921]),
np.array([4552, 4651, 4900, 4921, 4932, 5020, 4935, 4735]),
np.array([4791, 4941, 4925, 5000, 4890, 4925, 4882, 4764, 4850]),
np.array([4732, 4795, 4791, 4852, 4911, 4865, 4919, 4862]),
np.array([4520, 4662, 4735,4794,4836,4852,4790])])
matplotlib.pyplot.contour(X, Y, Z)
EDIT
I sort of solved this problem by removing values from my sub-arrays in order to make the lengths equal, however I would still like to know how it is possible to feed an array containing sub-arrays of different lengths into contour plot.
The answer is to make X, Y and Z inputs all 1D arrays and to use tricontour instead of contour.
X=np.array([1,2,3,4,5,6,7,
1,2,3,4,5,6,7,
1,2,3,4,5,6,6.5,7.5,
1,2,3,4,5,6,7,8,
1,2,3,4,5,6,7,8,9,
1,2,3,4,5,6,7,8,
1,2,3,4,5,6,7])
Y=np.array([1,1,1,1,1,1,1,
2,2,2,2,2,2,2,
3,3,3,3,3,3,2.5,3,
4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7])
Z= np.array([80, 73, 65, 57, 61, 55, 60,
78, 73, 71, 55, 55, 60, 90,
65, 62, 61, 61, 51, 60, 71, 78,
70, 58, 58, 65, 80, 81, 90, 81,
80, 59, 51, 58, 70, 70, 90, 89, 78,
90, 63, 55, 58, 65, 78, 79, 70,
100, 68, 54,52,60,72,71])
Y=np.flip(Y,0)
asdf=matplotlib.pyplot.tricontour(X, Y, Z,11)
matplotlib.pyplot.xlim([1,8])
matplotlib.pyplot.ylim([1,7])
matplotlib.pyplot.clabel(asdf, fontsize=6, inline=0)
matplotlib.pyplot.show()
Suppose I clustered a data set using sklearn's K-means.
I can see the centroids easily using KMeans.cluster_centers_ but I need to get the clusters as I get centroids.
How can I do that?
You probably look for the attribute labels_.
You need to do the following (see comments in my code):
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
np.random.seed(0)
# Use Iris data
iris = datasets.load_iris()
X = iris.data
y = iris.target
# KMeans with 3 clusters
clf = KMeans(n_clusters=3)
clf.fit(X,y)
#Coordinates of cluster centers with shape [n_clusters, n_features]
clf.cluster_centers_
#Labels of each point
clf.labels_
# !! Get the indices of the points for each corresponding cluster
mydict = {i: np.where(clf.labels_ == i)[0] for i in range(clf.n_clusters)}
# Transform the dictionary into list
dictlist = []
for key, value in mydict.iteritems():
temp = [key,value]
dictlist.append(temp)
RESULTS
{0: array([ 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 101, 106, 113, 114,
119, 121, 123, 126, 127, 133, 138, 142, 146, 149]),
1: array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
2: array([ 52, 77, 100, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112,
115, 116, 117, 118, 120, 122, 124, 125, 128, 129, 130, 131, 132,
134, 135, 136, 137, 139, 140, 141, 143, 144, 145, 147, 148])}
[[0, array([ 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 101, 106, 113, 114,
119, 121, 123, 126, 127, 133, 138, 142, 146, 149])],
[1, array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])],
[2, array([ 52, 77, 100, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112,
115, 116, 117, 118, 120, 122, 124, 125, 128, 129, 130, 131, 132,
134, 135, 136, 137, 139, 140, 141, 143, 144, 145, 147, 148])]]
It's been very long asked question so I think you already have the answer but let me post as someone can be benefited from it. We can get cluster points by just using its centroid. Scikit-learn has an attribute called cluster_centers_ which returns n_clusters and n_features. The very simple code that you can see it below that to describe the cluster center and please go through all the comments in the code.
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
# Iris data
iris = datasets.load_iris()
X = iris.data
# Standardization
std_data = StandardScaler().fit_transform(X)
# KMeans clustering with 3 clusters
clf = KMeans(n_clusters = 3)
clf.fit(std_data)
# Coordinates of cluster centers with shape [n_clusters, n_features]
# As we have 3 cluster with 4 features
print("Shape of cluster:", clf.cluster_centers_.shape)
# Scatter plot to see each cluster points visually
plt.scatter(std_data[:,0], std_data[:,1], c = clf.labels_, cmap = "rainbow")
plt.title("K-means Clustering of iris data flower")
plt.show()
# Putting ndarray cluster center into pandas DataFrame
coef_df = pd.DataFrame(clf.cluster_centers_, columns = ["Sepal length", "Sepal width", "Petal length", "Petal width"])
print("\nDataFrame containg each cluster points with feature names:\n", coef_df)
# converting ndarray to a nested list
ndarray2list = clf.cluster_centers_.tolist()
print("\nList of clusterd points:\n")
print(ndarray2list)
OUTPUTS:
This is the output of the above code.