Repeat some sequences in a Dataset in Pytorch - pytorch

I have this dataloader inspired in this example:
import torch
from torch.utils.data import Dataset, DataLoader
class CustomTextDataset(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
data = self.X[idx]
label = self.y[idx]
return data, label
# define data and class labels
X = [1, 2, 3, 4, 5]
y = [0, 0, 1, 0, 1]
# define data set object
TD = CustomTextDataset(X, y)
# define dataloader
ddl = DataLoader(TD, batch_size=2)
for sample, target in ddl:
print(sample)
In each batch, in this case of size 2, the print outputs 1, 2; then 3, 4; and then 5. But how can I get the first sample to be 1, 2; then 2, 3; then 3, 4; and last 4, 5? (That is, I repeat the last element).
Can it be generalized to use in the next batch the last n elements of the previous batch and as label not the position of idx but idx+m? Thx.

I don't think this is possible using a pytorch DataLoader. However you could create a custom function in the CustomTextDataset that retrieves a custom batch? Something like:
def get_custom_batch(self, k, n=1, batch_size=2):
tmp_data = tuple([self[k - n + ii] for ii in range(batch_size)])
X = torch.cat([x[0][None, ...] for x in tmp_data], dim=0)
y = torch.cat([x[1][None, ...] for x in tmp_data], dim=0)
return (X, y)

Related

When ı compile yolov3 ı get take warnings

"""YOLO v3 output
"""
import numpy as np
import keras.backend as K
from keras.models import load_model
import os
class YOLO:
def __init__(self, obj_threshold, nms_threshold):
"""Init.
# Arguments
obj_threshold: Integer, threshold for object.
nms_threshold: Integer, threshold for box.
"""
self._t1 = obj_threshold
self._t2 = nms_threshold
self._yolo = load_model('data/yolo.h5')
def _process_feats(self, out, anchors, mask):
"""process output features.
# Arguments
out: Tensor (N, N, 3, 4 + 1 +80), output feature map of yolo.
anchors: List, anchors for box.
mask: List, mask for anchors.
# Returns
boxes: ndarray (N, N, 3, 4), x,y,w,h for per box.
box_confidence: ndarray (N, N, 3, 1), confidence for per box.
box_class_probs: ndarray (N, N, 3, 80), class probs for per box.
"""
grid_h, grid_w, num_boxes = map(int, out.shape[1: 4])
anchors = [anchors[i] for i in mask]
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.variable(anchors),
[1, 1,len(anchors), 2])
out = out[0]
box_xy = K.get_value(K.sigmoid(out[..., :2]))
box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor)
box_confidence = K.get_value(K.sigmoid(out[..., 4]))
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = K.get_value(K.sigmoid(out[..., 5:]))
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy /= (grid_w, grid_h)
box_wh /= (416, 416)
box_xy -= (box_wh / 2.)
boxes = np.concatenate((box_xy, box_wh), axis=-1)
return boxes, box_confidence, box_class_probs
def _filter_boxes(self, boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold.
# Arguments
boxes: ndarray, boxes of objects.
box_confidences: ndarray, confidences of objects.
box_class_probs: ndarray, class_probs of objects.
# Returns
boxes: ndarray, filtered boxes.
classes: ndarray, classes for boxes.
scores: ndarray, scores for boxes.
"""
box_scores = box_confidences * box_class_probs
box_classes = np.argmax(box_scores, axis=-1)
box_class_scores = np.max(box_scores, axis=-1)
pos = np.where(box_class_scores >= self._t1)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
return boxes, classes, scores
def _nms_boxes(self, boxes, scores):
"""Suppress non-maximal boxes.
# Arguments
boxes: ndarray, boxes of objects.
scores: ndarray, scores of objects.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2]
h = boxes[:, 3]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1)
h1 = np.maximum(0.0, yy2 - yy1 + 1)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= self._t2)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def _yolo_out(self, outs, shape):
"""Process output of yolo base net.
# Argument:
outs: output of yolo base net.
shape: shape of original image.
# Returns:
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
boxes, classes, scores = [], [], []
for out, mask in zip(outs, masks):
b, c, s = self._process_feats(out, anchors, mask)
b, c, s = self._filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
boxes = np.concatenate(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
# Scale boxes back to original image shape.
width, height = shape[1], shape[0]
image_dims = [width, height, width, height]
boxes = boxes * image_dims
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = self._nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def predict(self, image, shape):
"""Detect the objects with yolo.
# Arguments
image: ndarray, processed input image.
shape: shape of original image.
# Returns
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
outs = self._yolo.predict(image)
boxes, classes, scores = self._yolo_out(outs, shape)
return boxes, classes, scores
This is the yolo v3 code and when ı work main program ı take this error
InvalidArgumentError: Incompatible shapes: [13,13,2] vs. [1,1,3,2] [Op:Mul]
Main part is
import cv2
import numpy as np
from yolo_model import YOLO
yolo = YOLO(0.6, 0.5)
file = "data/coco_classes.txt"
with open(file) as f:
class_name = f.readlines()
all_classes = [c.strip() for c in class_name]
print("A")
f = "dog_cat.jpg"
path = "images/"+f
image = cv2.imread(path)
cv2.imshow("image",image)
pimage = cv2.resize(image, (416,416))
pimage = np.array(pimage, dtype = "float32")
pimage /= 255.0
pimage = np.expand_dims(pimage, axis = 0)
# yolo
boxes, classes, scores = yolo.predict(pimage, image.shape)
for box, score, cl in zip(boxes, scores, classes):
x,y,w,h = box
top = max(0, np.floor(x + 0.5).astype(int))
left = max(0, np.floor(y + 0.5).astype(int))
right = max(0, np.floor(x + w + 0.5).astype(int))
bottom = max(0, np.floor(y + h + 0.5).astype(int))
cv2.rectangle(image, (top,left), (right, bottom),(255,0,0),2)
cv2.putText(image, "{} {}".format(all_classes[cl],score),(top,left-6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0,0,255),1,cv2.LINE_AA)
cv2.imshow("yolo",image)
I take problem in box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor). Is multiply necessary? And what do box_wh?

Mayavi : setting pipeline.tube radius

I'm plotting a 3D network using Mayavi,
edge_size = 0.2
pts = mlab.points3d(x, y, z,
scale_mode='none',
scale_factor=0.1)
pts.mlab_source.dataset.lines = np.array(graph.edges())
tube = mlab.pipeline.tube(pts, tube_radius=edge_size)
I want to change edge/tube radius. So I tried
tube = mlab.pipeline.tube(pts, tube_radius=listofedgeradius)
I get an error that says,
traits.trait_errors.TraitError: The 'tube_radius' trait of a TubeFactory instance must be a float
From the error, I understand a list cannot be assigned to tube_radius. In this case, I am not sure how to assign a different radius to each edge.
Any suggestions on how to assign edge weights/edge radius will be helpful.
EDIT: Complete working example
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from mayavi import mlab
def main(edge_color=(0.8, 0.8, 0.8), edge_size=0.02):
t = [1, 2, 3, 4, 5]
h = [2, 3, 4, 5, 6]
ed_ls = [(x, y) for x, y in zip(t, h)]
G = nx.OrderedGraph()
G.add_edges_from(ed_ls)
nx.draw(G)
plt.show()
graph_pos = nx.spring_layout(G, dim=3)
# numpy array of x,y,z positions in sorted node order
xyz = np.array([graph_pos[v] for v in sorted(G)])
mlab.figure(1)
mlab.clf()
pts = mlab.points3d(xyz[:, 0], xyz[:, 1], xyz[:, 2])
pts.mlab_source.dataset.lines = np.array(G.edges())
tube = mlab.pipeline.tube(pts, tube_radius=edge_size)
mlab.pipeline.surface(tube, color=edge_color)
mlab.show() # interactive window
main()
New edge weights to be added in the expected output:
listofedgeradius = [1, 2, 3, 4, 5]
tube = mlab.pipeline.tube(pts, tube_radius=listofedgeradius)
Is seems to me that you can't plot multiple tubes with different diameter at once.
So one solution is to plot them one after another:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from mayavi import mlab
def main(edge_color=(0.8, 0.8, 0.8)):
t = [1, 2, 4, 4, 5, 3, 5]
h = [2, 3, 6, 5, 6, 4, 1]
ed_ls = [(x, y) for x, y in zip(t, h)]
G = nx.OrderedGraph()
G.add_edges_from(ed_ls)
graph_pos = nx.spring_layout(G, dim=3)
print(graph_pos)
# numpy array of x,y,z positions in sorted node order
xyz = np.array([graph_pos[v] for v in sorted(G)])
listofedgeradius = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) * 0.1
for i, e in enumerate(G.edges()):
# node number of the edge
i1, i2 = e
# graph_pos is a dictionary
c1 = graph_pos[i1]
c2 = graph_pos[i2]
edge_xyz = np.vstack((c1, c2))
pts = mlab.points3d(edge_xyz[:, 0], edge_xyz[:, 1], edge_xyz[:, 2])
#pts.mlab_source.dataset.lines = np.array(G.edges())
# always first and second point
pts.mlab_source.dataset.lines = np.array([[0, 1]])
tube = mlab.pipeline.tube(pts, tube_radius=listofedgeradius[i])
mlab.pipeline.surface(tube, color=edge_color)
mlab.gcf().scene.parallel_projection = True
mlab.show() # interactive window
main()
Here is a larger example with 100 edges (image below) and one caveat of this solution becomes obvious: the for loop is slow.
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from mayavi import mlab
def main(edge_color=(0.8, 0.8, 0.8)):
n = 100
t = np.random.randint(100, size=n)
h = np.random.randint(100, size=n)
ed_ls = [(x, y) for x, y in zip(t, h)]
G = nx.OrderedGraph()
G.add_edges_from(ed_ls)
graph_pos = nx.spring_layout(G, dim=3)
print(graph_pos)
# numpy array of x,y,z positions in sorted node order
xyz = np.array([graph_pos[v] for v in sorted(G)])
listofedgeradius = np.random.rand(n) * 0.01
for i, e in enumerate(G.edges()):
print(i)
# node number of the edge
i1, i2 = e
# graph_pos is a dictionary
c1 = graph_pos[i1]
c2 = graph_pos[i2]
edge_xyz = np.vstack((c1, c2))
pts = mlab.points3d(edge_xyz[:, 0], edge_xyz[:, 1], edge_xyz[:, 2])
#pts.mlab_source.dataset.lines = np.array(G.edges())
# always first and second point
pts.mlab_source.dataset.lines = np.array([[0, 1]])
tube = mlab.pipeline.tube(pts, tube_radius=listofedgeradius[i])
mlab.pipeline.surface(tube, color=edge_color)
mlab.gcf().scene.parallel_projection = True
mlab.show() # interactive window
main()
Inspired by this, this and this I put together a first example that works well for large graphs (I tried up to 5000 edges). There is still a for loop, but it is not used for plotting, only for gathering the data in numpy arrays, so it's not that bad.
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from mayavi import mlab
def main(edge_color=(0.8, 0.8, 0.8)):
n = 5000
t = np.random.randint(100, size=n)
h = np.random.randint(100, size=n)
ed_ls = [(x, y) for x, y in zip(t, h)]
G = nx.OrderedGraph()
G.add_edges_from(ed_ls)
graph_pos = nx.spring_layout(G, dim=3)
print(graph_pos)
listofedgeradius = np.random.rand(n) * 0.01
# We create a list of positions and connections, each describing a line.
# We will collapse them in one array before plotting.
x = list()
y = list()
z = list()
s = list()
connections = list()
N = 2 # every edge brings two nodes
# The index of the current point in the total amount of points
index = 0
for i, e in enumerate(G.edges()):
# node number of the edge
i1, i2 = e
# graph_pos is a dictionary
c1 = graph_pos[i1]
c2 = graph_pos[i2]
edge_xyz = np.vstack((c1, c2))
x.append(edge_xyz[:, 0])
y.append(edge_xyz[:, 1])
z.append(edge_xyz[:, 2])
s.append(listofedgeradius[i])
s.append(listofedgeradius[i])
# This is the tricky part: in a line, each point is connected
# to the one following it. We have to express this with the indices
# of the final set of points once all lines have been combined
# together, this is why we need to keep track of the total number of
# points already created (index)
ics = np.vstack(
[np.arange(index, index + N - 1.5),
np.arange(index + 1, index + N - .5)]
).T
#print(ics)
connections.append(ics)
index += N
# Now collapse all positions, scalars and connections in big arrays
x = np.hstack(x)
y = np.hstack(y)
z = np.hstack(z)
s = np.hstack(s)
# print(x.shape)
# print(y.shape)
# print(z.shape)
# print(s.shape)
connections = np.vstack(connections)
# # graph_pos is a dictionary
# c1 = graph_pos[i1]
# c2 = graph_pos[i2]
# edge_xyz = np.vstack((c1, c2))
#src = mlab.points3d(x, y, z, s)
#src = mlab.pipeline.scalar_scatter(x, y, z, s)
src = mlab.plot3d(x, y, z, s)
print(src)
print(src.parent)
print(src.parent.parent)
#src.parent.parent.filter.vary_radius = 'vary_radius_by_scalar'
src.parent.parent.filter.vary_radius = 'vary_radius_by_absolute_scalar'
# Connect them
src.mlab_source.dataset.lines = connections
#src.update()
# The stripper filter cleans up connected lines
lines = mlab.pipeline.stripper(src)
# Finally, display the set of lines
#mlab.pipeline.surface(lines, colormap='Accent', line_width=1, opacity=.4)
#tube = mlab.pipeline.tube(src, tube_radius=0.01)
#tube.filter.radius_factor = 1
#tube.filter.vary_radius = 'vary_radius_by_scalar'
#surf = mlab.pipeline.surface(tube, opacity=0.6, color=(0.8,0.8,0))
#t = mlab.plot3d(x, y, z, s, tube_radius=10)
#t.parent.parent.filter.vary_radius = 'vary_radius_by_scalar'
#pts.mlab_source.dataset.lines = np.array(G.edges())
# always first and second point
#pts.mlab_source.dataset.lines = np.array([[0, 1]])
#tube = mlab.pipeline.tube(src, tube_radius=listofedgeradius[i])
#mlab.pipeline.surface(tube, color=edge_color)
# pts = self.scene.mlab.quiver3d(x, y, z, atomsScales, v, w,
# scalars=scalars, mode='sphere', vmin=0.0, vmax=1.0, figure = scene)
# pts.mlab_source.dataset.lines = bonds
# tube = scene.mlab.pipeline.tube(pts, tube_radius=0.01)
# tube.filter.radius_factor = 1
# tube.filter.vary_radius = 'vary_radius_by_scalar'
# surf = scene.mlab.pipeline.surface(tube, opacity=0.6, color=(0.8,0.8,0))
# t = mlab.plot3d(x, y, z, s, tube_radius=10)
#t.parent.parent.filter.vary_radius = 'vary_radius_by_scalar'
# self.plot = self.scene.mlab.plot3d(x, y, z, t,
# tube_radius=self.radius, colormap='Spectral')
# else:
# self.plot.parent.parent.filter.radius = self.radius
mlab.gcf().scene.parallel_projection = True
# And choose a nice view
mlab.view(33.6, 106, 5.5, [0, 0, .05])
mlab.roll(125)
mlab.show()
main()

numpy.dot() gives TypeError: can't multiply sequence by non-int of type 'float'

I have just begun learning Machine Learning using Python. I have written the following class which gives an error:
TypeError: can't multiply sequence by non-int of type 'float'
class Perceptron(object):
def __init__(self, eta=0.01, n_iter=10):
self.eta = eta # Learning Rate
self.n_iter = n_iter # Number of iteration over the training dataset
def fit(self, x, y):
self.w_ = np.zeros(1 + x.shape[1]) # Initialize Weights to zero initially # x = {array-like} : shape[no_of_samples, no_of_features]
self.errors_ = [] # No errors in the beginning of the computation
for _ in range(self.n_iter):
errors = 0
for xi, target in zip(x, y):
update = self.eta * (target - self.predict(xi))
self.w_[1:] += update * xi
self.w_[0] += update
errors += int(update != 0.0)
self.errors_.append(errors)
return self
def net_input(self, x):
return np.dot(x, self.w_[1:]) + self.w_[0]
def predict(self, x):
return np.where(self.net_input(x) >= 0.0, 1, -1)
I am getting an error in the net_input() method at np.dot().
I am using the following dataset :
https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv
If you're reading the training data (data and predictions) from the file iris.csv
sepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,setosa
4.9,3,1.4,0.2,setosa
with:
data = pd.read_csv("iris.csv")
make sure that you define x as the first four columns, otherwise it will contain the strings from the last column:
X = data.iloc[:,0:4]
And prediction values:
y = data.iloc[:,5]
y = y.values.reshape(150,1)
Following changes would help.
def fit(self, x, y):
...
for xi, target in zip(x, y):
update = self.eta * (target - self.predict(xi.reshape(1, x.shape[1]))
...
# Here if you want to implement perceptron, use matmul not dot product
def net_input(self, x):
return np.matmul(x, self.w_[1:]) + self.w_[0]
Check the shape of x.
If it is (a, 1) where a is a number, use this:
def net_input(self, x):
return np.dot(x.T, self.w_[1:]) + self.w_[0]
If it is (1, a) use this:
def net_input(self, x):
return np.dot(x, self.w_[1:].T) + self.w_[0]
My guess is that x is an object dtype array of lists.
If I define an object dtype array:
In [45]: x=np.empty((2,),object)
In [46]: x[:]=[[1,2],[3,4]]
In [49]: x
Out[49]: array([list([1, 2]), list([3, 4])], dtype=object)
I get the same error with a list (or array) of floats:
In [50]: np.dot(x, [1.2,4.5])
...
TypeError: can't multiply sequence by non-int of type 'float'
If instead I give it integers, it works - sort of
In [51]: np.dot(x, [1,2])
Out[51]: [1, 2, 3, 4, 3, 4]
What it has actually done is [1,2]*1 and [3,4]*2, list replication. This is not numeric multiplication.
That's the only combination of variables that makes sense of the error message.
So you need to figure out why x is an object array. Often that's the result of building an array from lists that differ in length
In [54]: x = np.array([[1,2],[3,4,5]])
In [55]: x
Out[55]: array([list([1, 2]), list([3, 4, 5])], dtype=object)
So the basic question when faced with an error like this, what's the shape and dtype of the variables.

TensorFlow, losses after training the model are different than losses printed during the last Epoch of Stochastic Gradient Descent.

I'm trying to do binary classification on two spirals. For testing, I am feeding my neural network the exact spiral data with no noise, and the model seems to work as the losses near 0 during SGD. However, after using my model to infer the exact same data points after SGD has completed, I get completely different losses than what was printed during the last epoch of SGD.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)
# get the spiral points
t_p = np.linspace(0, 4, 1000)
x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)
plt.plot(x1_p, y1_p, x2_p, y2_p)
# generate data points
x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p
def model_variable(shape, name, initializer):
variable = tf.get_variable(name=name,
dtype=tf.float32,
shape=shape,
initializer=initializer
)
tf.add_to_collection('model_variables', variable)
return variable
class Model():
#layer specifications includes bias nodes
def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
self.sess = sess
self.data = data
self.nEpochs = nEpochs
self.learning_rate = learning_rate
if layer_specifications[0] != 2 or layer_specifications[-1] != 1:
raise ValueError('First layer only two nodes, last layer only 1 node')
else:
self.layer_specifications = layer_specifications
self.build_model()
def build_model(self):
# x is the two nodes that will be layer one, will input an x, y coordinate
# and need to classify which spiral is it on, the non phase shifted or the phase
# shifted one.
# y is the output of the model
self.x = tf.placeholder(tf.float32, shape=[2, 1])
self.y = tf.placeholder(tf.float32, shape=[])
self.thetas = []
self.biases = []
for i in range(1, len(self.layer_specifications)):
self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))
#forward propagation
intermediate = self.x
for i in range(0, len(self.layer_specifications)-1):
if i != (len(self.layer_specifications) - 2):
intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
else:
intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])
self.yhat = tf.squeeze(intermediate)
self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);
def train_init(self):
model_variables = tf.get_collection('model_variables')
self.optim = (
tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
.minimize(self.loss, var_list=model_variables)
)
self.check = tf.add_check_numerics_ops()
self.sess.run(tf.initialize_all_variables())
# here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
def train_iter(self, x, y):
loss, _, _ = sess.run([self.loss, self.optim, self.check],
feed_dict = {self.x: x, self.y: y})
print('loss: {0} on:{1}'.format(loss, x))
# here x and y are still x and y coordinates, label is separate
def train(self):
for _ in range(self.nEpochs):
for x, y, label in self.data():
print(label)
self.train_iter([[x], [y]], label)
print("NEW ONE:\n")
# here x and y are still x and y coordinates, label is separate
def infer(self, x, y, label):
return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})
def data():
#so first spiral is label 0, second is label 1
for _ in range(len(x1_dat)-1, -1, -1):
for dat in range(2):
if dat == 0:
yield x1_dat[_], y1_dat[_], 0
else:
yield x2_dat[_], y2_dat[_], 1
layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()
inferrences_1 = []
inferrences_2 = []
losses = 0
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x1_p[i], y1_p[i], 0)
if infer >= 0.5:
print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
losses = losses + 1
inferrences_1.append('r')
else:
inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x2_p[i], y2_p[i], 1)
if infer >= 0.5:
inferrences_2.append('r')
else:
print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
losses = losses + 1
inferrences_2.append('g')
print('total losses: {}'.format(losses))
plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

How to modify my K-Means clustering algorithm to increase the dimensions upto 8?

I have created my k means algorithm for 2 dimensions. I want to modify it for 8 dimensions i.e. the datapoints can take 8-dimensional values and finally return 8-dimensional centroid values.
The code is following :
import random
import math
# Input varibles
#k = 3
#Threshold = 1
DATA = [[2, 1, 1, 2, 1, 1, 1, 5], [ 6, 8, 1, 3, 4, 3, 7, 1],[4, 1, 3, 2, 1, 3, 1, 1],[3, 1, 1, 2, 1, 2, 1, 1],[3 ,1 ,1 ,1, 1, 2, 1, 1],[6, 1, 1, 1, 1, 7, 1, 1],[6, 10, 2, 8, 10, 7, 3, 3]]
BIG_NUMBER = math.pow(10, 10)
data = []
centroids = []
class DataPoint:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
# Initializing The Centroids
def initialize_centroids(k,DATA):
#find data range in x and y
max_x = max(x for x,y in DATA)
max_y = max(y for x,y in DATA)
min_x = min(x for x,y in DATA)
min_y = min(y for x,y in DATA)
#chosse random x and y between this data range
#assign to centroids
for j in range(k):
#x = random.choice(DATA)
random_x = random.uniform(min_x,max_x)
random_y = random.uniform(min_y,max_y)
centroids.append(Centroid(random_x, random_y))
#print("(", centroids[j].get_x(), ",", centroids[j].get_y(), ")")
return centroids
# Assigning Datapoints to nearest Centroids
def initialize_datapoints(k,DATA):
for i in range(len(DATA)):
newpoint = DataPoint(DATA[i][0], DATA[i][1])
bestMinimum = BIG_NUMBER
data.append(newpoint)
for j in range(k):
distance = get_distance(newpoint.get_x(), newpoint.get_y(), centroids[j].get_x(), centroids[j].get_y())
if(distance < bestMinimum):
bestMinimum = distance
newpoint.set_cluster(j)
return
# Calculating Euclidean distance
def get_distance(dataPointX, dataPointY, centroidX, centroidY):
return math.sqrt(math.pow((centroidY - dataPointY), 2) + math.pow((centroidX - dataPointX), 2))
# Updating Centroid and Clusters till the threshold is met
def update_centroids_n_clusters(k,DATA,Threshold):
dist = 0.0
#print ("a")
for j in range(k):
prev_x = centroids[j].get_x()
prev_y = centroids[j].get_y()
totalX = 0
totalY = 0
totalInCluster = 0
for z in range(len(data)):
if (data[z].get_cluster() == j):
totalX += data[z].get_x()
totalY += data[z].get_y()
totalInCluster += 1
if (totalInCluster > 0):
s_x = (totalX / totalInCluster)
s_y = (totalY / totalInCluster)
centroids[j].set_x(s_x)
centroids[j].set_y(s_y)
x1 = centroids[j].get_x()
y1 = centroids[j].get_y()
x2 = prev_x
y2 = prev_y
dist += get_distance(x1,y1,x2,y2)
conv_val = (1/k)*dist
if(conv_val >= Threshold):
for i in range(len(DATA)):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(k):
distance = get_distance(data[i].get_x(), data[i].get_y(), centroids[j].get_x(), centroids[j].get_y())
if (distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data[i].set_cluster(currentCluster)
update_centroids_n_clusters(k, DATA, Threshold)
return
# Performing K_Means
def Kmeans(k, DATA, Threshold):
initialize_centroids(k,DATA)
initialize_datapoints(k, DATA)
update_centroids_n_clusters(k, DATA, Threshold)
for i in range(k):
p = 0
print()
print("Centroid ", i, " is at")
print("(",centroids[i].get_x(), ",", centroids[i].get_y(), ")")
print("Cluster ", i, " includes:")
for j in range(len(DATA)):
if (data[j].get_cluster() == i):
#print("(", data[j].get_x(), ", ", data[j].get_y(), ")")
p += 1
print(p,"points")
return
Kmeans(3,DATA,0.1)
How should I modify my class Centroid and class DataPoint in this code? Thanks!!
Note: The code is in Python 3
Use arrays instead of x and y.
You want e.g. your distance function to be
def distance(array1, array2):
return (array1 - array2)**2
(assuming you use numpy)

Resources