Need Matrix multiplication assistance - python-3.x

So I have these three complete functions that return the desired row of a matrix, column of a matrix, and dot product of the row and column, and I need to somehow mesh them together in a fourth function in order to find the product of two matrices being multiplied together. Any suggestions?
#Test matrices
A = [[2,4], [7,0], [6,3]]
B = [[3,1], [-1,8], [-3, 3]]
C = [[4,1,9], [6,2,8], [7,3,5]]
D = [[2,9], [5,2], [1,0]]
def row(A,i):
Z = []
Z.append(A[i])
return Z[0]
def col(B,j):
Z=[]
for i in range(len(B)):
Z.append(B[i][j])
return Z
def dotProduct(x,y):
prod=0
for i in range(len(x)):
prod=prod+x[i]*y[i]
return prod
def matrixMUL(A, B):
Z = []
....
return Z

Solution with 3 more auxiliary functions:
from functools import partial
#Test matrices
A = [[2,4], [7,0], [6,3]]
B = [[3,1], [-1,8], [-3, 3]]
C = [[4,1,9], [6,2,8], [7,3,5]]
D = [[2,9], [5,2], [1,0]]
def row(A,i):
Z = []
Z.append(A[i])
return Z[0]
def col(B,j):
Z=[]
for i in range(len(B)):
Z.append(B[i][j])
return Z
def dotProduct(x,y):
prod=0
for i in range(len(x)):
prod=prod+x[i]*y[i]
return prod
def shape(A):
num_rows = len(A)
num_cols = len(A[0]) if A else 0
return num_rows, num_cols
def matrix_product_entry(A, B, i, j):
return dotProduct(row(A, i), col(B, j))
def matrix_make(rows, cols, entry_fn):
return [[entry_fn(i, j) for j in range(cols)]
for i in range(rows)]
def matrixMUL(A, B):
n1, k1 = shape(A)
print(n1, k1)
n2, k2 = shape(B)
if k1 != n2:
raise ArithmeticError("matrices shapes are not compatible!")
return matrix_make(n1, k2, partial(matrix_product_entry, A, B))
print (matrixMUL(C, D))
# returns [[22, 38], [30, 58], [34, 69]]

Related

DFS vs. Kruskal runtime (maze generation)

I have written two algorithms for creating unique mazes, one of them using depth-first-search (DFS) and the other using Kruskal's. The DFS algorithm performs as expected, however Kruskal's algorithm runs marginally slower than DFS and I do not know why.
I had written Kruskal's algorithm in Python.
I suspect the random.choice() function seems to be the underlying problem. The difference in runtime becomes noticeable when (r, c) > 30.
Here is the code for Kruskal's algorithm:
# Create a list of all possible edges
def create_edges(r, c):
edges = []
for y in range(r):
for x in range(c):
i = (y, x)
for d in ((0, 1), (0, -1), (1, 0), (-1, 0)):
p = tuple(map(sum, zip(d, i)))
py = p[0]
px = p[1]
if px in range(c) and py in range(r):
edges.append([i, p])
return edges
def kruskal(r, c, sz):
path = []
# Create a list of parent root nodes
roots = {(y, x) : [(y, x)] for y in range(r) for x in range(c)}
edges = create_edges(r, c)
while edges:
# Choose a random edge
edge = random.choice(edges)
parent = edge[0]
child = edge[1]
parent_set = get_set(roots, parent)
child_set = get_set(roots, child)
# Check if the parent / child are already in the same set
if parent_set == child_set:
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
continue
roots[parent_set] += roots[child_set]
roots.pop(child_set)
path.extend((parent, child))
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
return path
def get_set(roots, member):
s = None
for parent, children in roots.items():
if member in children:
s = parent
return s
def create_maze(t, r, c, sz):
maze = [['|_' for _ in range(c)] for _ in range(r)]
for cell in maze: cell.append('| ')
wd = {'DOWN' : ( 1, 0),
'UP' : (-1, 0),
'LEFT' : ( 0, -1),
'RIGHT': ( 0, 1)}
for n in range(len(t) - 1):
a = n
b = n + 1
p1 = t[a]
p2 = t[b]
ay, ax = p1[0], p1[1]
by, bx = p2[0], p2[1]
w = tuple(numpy.array(p2) - numpy.array(p1))
if w in wd.values():
k = list(wd.keys())[list(wd.values()).index(w)]
if k == 'DOWN': maze[ay][ax] = maze[ay][ax].replace('_', ' ')
if k == 'UP': maze[by][bx] = maze[by][bx].replace('_', ' ')
if k == 'LEFT': maze[ay][ax] = maze[ay][ax].replace('|', ' ')
if k == 'RIGHT': maze[by][bx] = maze[by][bx].replace('|', ' ')
return maze
def print_maze(maze, r, c, delay = 0):
s, l = min((r, c)), max((r, c))
a = 1 / (4 * r * c)
e = (1 / (s * l)) ** 2
delay = (a * 2.718 ** (-1 * e)) ** 0.5
time.sleep(delay)
print(' _' * c)
for iy in range(r):
for ix in range(c + 1):
print(maze[iy][ix], end = '')
print('')
print('')
def main():
r = 30
c = 30
sz = r * c
path = kruskal(r, c, sz)
maze = create_maze(path, r, c, sz)
print_maze(maze, r, c)
if __name__ == "__main__":
main()

I tried to apply function "cliff" from installed package named "lace", I got the following error "TypeError: 'map' object is not subscriptable"

I read about the error and try to cast map into list, but the error still appeared, I will show you the main file that contain the error.
def power(L, C, Erange):
assert len(L) == len(C), "The L and C must be corresponded to each other"
E = copy.deepcopy(Erange)
E[0] -= 1
power_table = dict()
for c in set(C): # for each type of class
first = [index for index, eachc in enumerate(C) if eachc == c]
rest = [index for index, eachc in enumerate(C) if eachc != c]
p_first = len(first) / len(L)
p_rest = len(rest) / len(L)
powerc = []
for u, v in zip(E[0:-1], E[1:]): # checking the range (u,v]
like_first = sum([1 for i in first if u < L[i] <= v]) / len(first) * p_first
like_rest = sum([1 for i in rest if u < L[i] <= v]) / len(rest) * p_rest
try:
powerc.append((like_first ** 2 / (like_first + like_rest)))
except ZeroDivisionError:
powerc.append(0)
power_table[c] = powerc
power = []
for l, c in zip(L, C):
for e_cursor in range(len(E)):
if E[e_cursor] >= l: break
power.append(round(power_table[c][e_cursor - 1], 2))
return power
def cliff_core(data, percentage, obj_as_binary, handled_obj=False):
if len(data) < 50:
logging.debug("no enough data to cliff. return the whole dataset")
return range(len(data))
classes = map(toolkit.str2num, zip(*data)[-1])
if not handled_obj:
if obj_as_binary:
classes = [1 if i > 0 else 0 for i in classes]
else:
classes = toolkit.apply_bin_range(classes)
data_power = list()
for col in zip(*data):
col = map(toolkit.str2num, col)
E = toolkit.binrange(col)
data_power.append(power(col, classes, E))
data_power = map(list, zip(*data_power)) # transposing the data power
row_sum = [sum(row) for row in data_power]
index = range(len(data))
zips = zip(data, classes, row_sum, index)
output = list()
for cls in set(classes):
matched = filter(lambda z: z[1] == cls, zips)
random.shuffle(matched)
matched = sorted(matched, key=lambda z: z[2], reverse=True)
if len(matched) < 5:
output.extend([m[3] for m in matched]) # all saved
continue
for i in range(int(len(matched) * percentage)):
output.append(matched[i][3])
return sorted(output)
def cliff(attribute_names,data_matrix,independent_attrs,objective_attr,objective_as_binary=False,
cliff_percentage=0.4):
ori_attrs, alldata = attribute_names, data_matrix # load the database
alldata_t = map(list, zip(*alldata))
valued_data_t = list()
for attr, col in zip(ori_attrs, alldata_t):
if attr in independent_attrs:
valued_data_t.append(col)
valued_data_t.append(alldata_t[attribute_names.index(objective_attr)])
alldata = map(list, zip(*valued_data_t))
alldata = map(lambda row: map(toolkit.str2num, row), alldata) # numbering the 2d table
after_cliff = cliff_core(alldata, cliff_percentage, objective_as_binary)
res = [data_matrix[i] for i in after_cliff]
return res

How to define a callable derivative function

For a project I am working on, I am creating a class of polynomials that I can operate on. The polynomial class can do addition, subtraction, multiplication, synthetic division, and more. It also represents it properly.
For the project, we are required to do create a class for Newton's Method. I was able to create a callable function class for f, such that
>f=polynomial(2,3,4)
>f
2+3x+4x^2
>f(3)
47
I have a derivative function polynomial.derivative(f) outputs 3+8x.
I want to define a function labeled Df so that in my Newtons Method code, I can say, Df(x). It would work so that if x=2:
>Df(2)
19
The derivative of a polynomial is still a polynomial. Thus, instead of returning the string 3+8x, your polynomial.derivative function should return a new polynomial.
class polynomial:
def __init__(c, b, a):
self.coefs = [c, b, a]
[...]
def derivative(self):
return polynomial(*[i*c for i,c in enumerate(self.coefs) if i > 0], 0)
Hence you can use it as follow:
> f = polynomial(2, 3, 4)
> Df = f.derivative()
> f
2+3x+4x^2
> Df
3+8x+0x^2
> f(3)
47
> Df(2)
19
Edit
Of course, it is enumerate and not enumerates. As well, the __init__ misses the self argument. I code this directly on SO without any syntax check.
Of course you can write this in a .py file. Here is a complete working example:
class Polynomial:
def __init__(self, c, b, a):
self.coefs = [c, b, a]
self._derivative = None
#property
def derivative(self):
if self._derivative is None:
self._derivative = Polynomial(*[i*c for i,c in enumerate(self.coefs) if i > 0], 0)
return self._derivative
def __str__(self):
return "+".join([
str(c) + ("x" if i > 0 else "") + (f"^{i}" if i > 1 else "")
for i, c in enumerate(self.coefs)
if c != 0
])
def __call__(self, x):
return sum([c * (x**i) for i, c in enumerate(self.coefs)])
if __name__ == '__main__':
f = Polynomial(2, 3, 4)
print(f"f: y={f}")
print(f"f(3) = {f(3)}")
print(f"f': y={f.derivative}")
print(f"f'(2) = {f.derivative(2)}")
f: y=2+3x+4x^2
f(3) = 47
f': y=3+8x
f'(2) = 19
You can rename the property with the name you prefer: derivative, Df, prime, etc.

How can I find correct parameters in cvxopt.solvers.qp() for SVM?

I have some data and I want to classification like svm with cvxopt function.
In documentation of cvxopt.solvers.qp, there were some matrixes with vectorized and transposed.
How can I find correct params (P, q, G, h, A, b) when I know n_samples and n_features?
solution = cvxopt.solvers.qp(P, q, G, h, A, b)
Solved it. I got a hint how to set parameters of cvx.opt from here. Code is as bellows.
I made a matrix (n_samples, n_samples) and get other parameters with cvxopt.matrix.
class SVM(object):
def __init__(self, kernel=linear_kernel, C=None):
self.kernel = kernel
self.C = C
if self.C is not None: self.C = float(self.C)
def fit(self, X, y):
n_samples, n_features = X.shape
# Gram matrix
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i,j] = self.kernel(X[i], X[j])
P = cvxopt.matrix(np.outer(y,y) * K)
q = cvxopt.matrix(np.ones(n_samples) * -1)
A = cvxopt.matrix(y, (1,n_samples))
b = cvxopt.matrix(0.0)
if self.C is None:
G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
h = cvxopt.matrix(np.zeros(n_samples))
else:
tmp1 = np.diag(np.ones(n_samples) * -1)
tmp2 = np.identity(n_samples)
G = cvxopt.matrix(np.vstack((tmp1, tmp2)))
tmp1 = np.zeros(n_samples)
tmp2 = np.ones(n_samples) * self.C
h = cvxopt.matrix(np.hstack((tmp1, tmp2)))

How to modify my K-Means clustering algorithm to increase the dimensions upto 8?

I have created my k means algorithm for 2 dimensions. I want to modify it for 8 dimensions i.e. the datapoints can take 8-dimensional values and finally return 8-dimensional centroid values.
The code is following :
import random
import math
# Input varibles
#k = 3
#Threshold = 1
DATA = [[2, 1, 1, 2, 1, 1, 1, 5], [ 6, 8, 1, 3, 4, 3, 7, 1],[4, 1, 3, 2, 1, 3, 1, 1],[3, 1, 1, 2, 1, 2, 1, 1],[3 ,1 ,1 ,1, 1, 2, 1, 1],[6, 1, 1, 1, 1, 7, 1, 1],[6, 10, 2, 8, 10, 7, 3, 3]]
BIG_NUMBER = math.pow(10, 10)
data = []
centroids = []
class DataPoint:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
# Initializing The Centroids
def initialize_centroids(k,DATA):
#find data range in x and y
max_x = max(x for x,y in DATA)
max_y = max(y for x,y in DATA)
min_x = min(x for x,y in DATA)
min_y = min(y for x,y in DATA)
#chosse random x and y between this data range
#assign to centroids
for j in range(k):
#x = random.choice(DATA)
random_x = random.uniform(min_x,max_x)
random_y = random.uniform(min_y,max_y)
centroids.append(Centroid(random_x, random_y))
#print("(", centroids[j].get_x(), ",", centroids[j].get_y(), ")")
return centroids
# Assigning Datapoints to nearest Centroids
def initialize_datapoints(k,DATA):
for i in range(len(DATA)):
newpoint = DataPoint(DATA[i][0], DATA[i][1])
bestMinimum = BIG_NUMBER
data.append(newpoint)
for j in range(k):
distance = get_distance(newpoint.get_x(), newpoint.get_y(), centroids[j].get_x(), centroids[j].get_y())
if(distance < bestMinimum):
bestMinimum = distance
newpoint.set_cluster(j)
return
# Calculating Euclidean distance
def get_distance(dataPointX, dataPointY, centroidX, centroidY):
return math.sqrt(math.pow((centroidY - dataPointY), 2) + math.pow((centroidX - dataPointX), 2))
# Updating Centroid and Clusters till the threshold is met
def update_centroids_n_clusters(k,DATA,Threshold):
dist = 0.0
#print ("a")
for j in range(k):
prev_x = centroids[j].get_x()
prev_y = centroids[j].get_y()
totalX = 0
totalY = 0
totalInCluster = 0
for z in range(len(data)):
if (data[z].get_cluster() == j):
totalX += data[z].get_x()
totalY += data[z].get_y()
totalInCluster += 1
if (totalInCluster > 0):
s_x = (totalX / totalInCluster)
s_y = (totalY / totalInCluster)
centroids[j].set_x(s_x)
centroids[j].set_y(s_y)
x1 = centroids[j].get_x()
y1 = centroids[j].get_y()
x2 = prev_x
y2 = prev_y
dist += get_distance(x1,y1,x2,y2)
conv_val = (1/k)*dist
if(conv_val >= Threshold):
for i in range(len(DATA)):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(k):
distance = get_distance(data[i].get_x(), data[i].get_y(), centroids[j].get_x(), centroids[j].get_y())
if (distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data[i].set_cluster(currentCluster)
update_centroids_n_clusters(k, DATA, Threshold)
return
# Performing K_Means
def Kmeans(k, DATA, Threshold):
initialize_centroids(k,DATA)
initialize_datapoints(k, DATA)
update_centroids_n_clusters(k, DATA, Threshold)
for i in range(k):
p = 0
print()
print("Centroid ", i, " is at")
print("(",centroids[i].get_x(), ",", centroids[i].get_y(), ")")
print("Cluster ", i, " includes:")
for j in range(len(DATA)):
if (data[j].get_cluster() == i):
#print("(", data[j].get_x(), ", ", data[j].get_y(), ")")
p += 1
print(p,"points")
return
Kmeans(3,DATA,0.1)
How should I modify my class Centroid and class DataPoint in this code? Thanks!!
Note: The code is in Python 3
Use arrays instead of x and y.
You want e.g. your distance function to be
def distance(array1, array2):
return (array1 - array2)**2
(assuming you use numpy)

Resources