How can I pass different types of parameters (ex: array) into a functional class? - python-3.x

I am trying to learn how to group functions by class. As an example, I tried to code a generalized least squares method to find the equation of a best-fitting line between a set of (x,y) coordinates. For my particular case, I chose a simple line y = x + 5, so slope should be close to 1 and y-intercept should be close to 5. Running my attempt at a coded solution below produces the error TypeError: set_x() takes 1 positional argument but 2 were given, though I am trying to pass an array of x-points. How can I circumvent this error?
import numpy as np
from scipy.optimize import minimize
class GeneralizedLeastSquares:
def __init__(self, residuals=None, parameters=None, x=None, y_true=None, y_fit=None, weights=None, method=None):
self.residuals = residuals
self.parameters = parameters
self.x = x
self.y_true = y_true
self.y_fit = y_fit
self.weights = weights
self.method = method
def set_residuals(self, residuals):
self.residuals = residuals
def set_parameters(self, parameters):
self.parameters = parameters
def set_x(self, x):
self.x = x
def set_y_true(self, y_true):
self.y_true = y_true
def set_y_fit(self, y_fit):
self.y_fit = y_fit
def set_weights(self, weights):
self.weights = weights
def set_method(self, method):
self.method = method
def get_residuals(self):
return [(self.y_true[idx] - self.y_fit[idx])**2 for idx in range(len(self.y_true)) if len(self.y_true) == len(self.y_fit) ]
def get_parameters(self):
return self.parameters
def get_x(self):
return self.x
def get_y_true(self):
return self.y_true
def get_y_fit(self):
return [self.parameters[0] * self.x[idx] + self.parameters[1] for idx in range(len(self.x))]
def get_weights(self):
return self.weights
def update_weights(self):
inverse_residuals = [1/self.residuals[idx] for idx in range(len(residuals))]
inverse_residuals_abs = [abs(inverse_residual) for inverse_residual in inverse_residuals]
residual_abs_total = sum(inverse_residuals_abs)
return [inverse_residuals_abs[idx]/residual_abs_total for idx in range(len(inverse_residuals_abs))]
def get_method(self):
return self.method
def get_error_by_residuals(self):
return sum([self.weights[idx] * self.residuals[idx] for idx in range(len(self.residuals))])
def get_error_by_std_mean(self):
return np.std(self.y_true)/np.sqrt(len(self.y_true))
def get_linear_fit(self):
"""
"""
if self.parameters == 'estimate':
slope_init = (self.y_true[-1] - self.y_true[0]) / (self.x[-1] - self.x[0])
b_init = np.mean([self.y_true[-1] - slope_init * self.x[-1], self.y_true[0] - slope_init * self.x[0]])
self.parameters = [slope_init, b_init]
elif not isinstance(self.parameters, (list, np.ndarray)):
raise ValueError("parameters = 'estimate' or [slope, y-intercept]")
meths = ['residuals', 'std of mean']
funcs = [get_error_by_residuals, get_error_by_std_mean]
func = dict(zip(meths, funcs))[self.method]
res = minimize(func, x0=self.parameters, args=(self,), method='Nelder-Mead')
self.parameters = [res.x[0], res.x[1]]
self.y_fit = get_y_fit(self)
self.residuals = get_residuals(self)
self.weights = update_weights(self)
return self.parameters, self.y_fit, self.residuals, self.weights
x = np.linspace(0, 4, 5)
y_true = np.linspace(5, 9, 5) ## using slope=1, y-intercept=5
y_actual = np.array([4.8, 6.2, 7, 8.1, 8.9]) ## test data
GLS = GeneralizedLeastSquares()
GLS.set_x(x)
GLS.set_y_true(y_actual)
GLS.set_weights(np.ones(len(x)))
GLS.set_parameters('estimate')
# GLS.set_parameters([1.2, 4.9])
GLS.set_method('residuals')
results = GLS.get_linear_fit()
print(results)

Your method is not taking an argument. It should be:
def set_x(self, x):
self.x = x
Wrapping properties in get/set methods is a very Java / outdated way of doing things. It is much easier to access the underlying property outside of your class. I.e. rather than: GLS.set_x(12), consider the more Pythonic: GLS.x = 12. This way you don't have to write a get and set method for each property.
Also, it might make more sense for the heavy lifting method of your object, get_linear_fit to be put in the __call__ method. This way, you can run the regression using by just typing GLS() rather than GLS.get_linear_fit()

Related

Control the `__getitem__` in custom dataset class based on sampling vector

I have custom datasets that have the __getitem__ method in them.
I created the following DatasetMUX class that it supposed to select random dataset and get the item from that dataset:
class MUXDataset(Dataset):
"""
Defines a dataset class that provides a way to read scenes and also visualization tools
"""
def __init__(self, mux_dataset_params: MultiplexDatasetParams) -> None:
self._params = mux_dataset_params
self.sampling_vec = (
mux_dataset_params.sampling if isinstance(mux_dataset_params.sampling, str) else self.init_sampling()
)
return
def init_sampling(self) -> List[float]:
if self._params.sampling == 'uniform':
num_datasets = len(self._params.datasets)
sampling = [1 / num_datasets for _ in self._params.datasets]
elif self._params.sampling == 'proportional':
size_vec = [len(_d) for _d in self._params.datasets]
sampling = [1 - _s / sum(size_vec) for _s in size_vec]
sampling = [_s / sum(sampling) for _s in sampling]
else:
raise ValueError(f'{self._params.sampling} is not supported')
return sampling
def __len__(self) -> int:
return len(self._params.datasets)
def __getitem__(self, idx):
curr_ds = self._params.datasets[idx]
return curr_ds.__getitem__(random.randint(0, len(curr_ds) - 1))
Now I want the __getitem__ of the MUXDataset will be based on the sampling vector but I couldn't find a way to implement it within the class
I tried the following inside the __getitem__:
def __getitem__(self, idx):
ds_idx = random.choices(population, weights=self.sampling, k=1)
curr_ds = self._params.datasets[ds_idx]
return curr_ds.__getitem__(random.randint(0, len(curr_ds) - 1))

Using self in init part of a class in Python

Is there any difference between the following two codes related to initializing a class in Python?
class summation:
def __init__(self, f, s):
self.first = f
self.second = s
self.summ = self.first + self.second
.
.
.
class summation:
def __init__(self, f, s):
self.first = f
self.second = s
self.summ = f + s
.
.
.
If there exists any difference, what is that, and which code is preferable?
Edit: I am going to write an artificial neural network with Python (and Pytorch). In fact, the above two codes are just some examples. In the actual case, I have seen in various resources that when there exists self.input = input in the initialization of a class, in other parts it is used as self.input, not input.
My questions: What are the differences between these two approaches? Why is the use of self.input preferable, in my case?
Example: (from https://docs.dgl.ai/en/latest/tutorials/models/1_gnn/4_rgcn.html#sphx-glr-tutorials-models-1-gnn-4-rgcn-py)
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
import dgl.function as fn
from functools import partial
class RGCNLayer(nn.Module):
def __init__(self, in_feat, out_feat, num_rels, num_bases=-1, bias=None,
activation=None, is_input_layer=False):
super(RGCNLayer, self).__init__()
self.in_feat = in_feat
self.out_feat = out_feat
self.num_rels = num_rels
self.num_bases = num_bases
self.bias = bias
self.activation = activation
self.is_input_layer = is_input_layer
# sanity check
if self.num_bases <= 0 or self.num_bases > self.num_rels:
self.num_bases = self.num_rels
# weight bases in equation (3)
self.weight = nn.Parameter(torch.Tensor(self.num_bases, self.in_feat,
self.out_feat))
if self.num_bases < self.num_rels:
# linear combination coefficients in equation (3)
self.w_comp = nn.Parameter(torch.Tensor(self.num_rels, self.num_bases))
# add bias
if self.bias:
self.bias = nn.Parameter(torch.Tensor(out_feat))
# init trainable parameters
nn.init.xavier_uniform_(self.weight,
gain=nn.init.calculate_gain('relu'))
if self.num_bases < self.num_rels:
nn.init.xavier_uniform_(self.w_comp,
gain=nn.init.calculate_gain('relu'))
if self.bias:
nn.init.xavier_uniform_(self.bias,
gain=nn.init.calculate_gain('relu'))
def forward(self, g):
if self.num_bases < self.num_rels:
# generate all weights from bases (equation (3))
weight = self.weight.view(self.in_feat, self.num_bases, self.out_feat)
weight = torch.matmul(self.w_comp, weight).view(self.num_rels,
self.in_feat, self.out_feat)
else:
weight = self.weight
if self.is_input_layer:
def message_func(edges):
# for input layer, matrix multiply can be converted to be
# an embedding lookup using source node id
embed = weight.view(-1, self.out_feat)
index = edges.data['rel_type'] * self.in_feat + edges.src['id']
return {'msg': embed[index] * edges.data['norm']}
else:
def message_func(edges):
w = weight[edges.data['rel_type']]
msg = torch.bmm(edges.src['h'].unsqueeze(1), w).squeeze()
msg = msg * edges.data['norm']
return {'msg': msg}
def apply_func(nodes):
h = nodes.data['h']
if self.bias:
h = h + self.bias
if self.activation:
h = self.activation(h)
return {'h': h}
g.update_all(message_func, fn.sum(msg='msg', out='h'), apply_func)
No. there is no difference between these two approaches in your case with this level of information. but could they? Yes. they could. if they have some modifications in their setters or getters. later in my answer I'll show you how.
First of all, I prefer using this one:
class summation:
def __init__(self, f, s):
self.first = f
self.second = s
#property
def summ(self):
return self.first+self.second
the above implementation calculates the summation on demand. so when you change self.first or self.second, summ will be calculated automatically. you can access the sum as you did before.
s = summation(1,9)
print(s.summ)
# 10
s.first = 2
s.second = 3
print(s.summ)
# 5
So, How could they be different?
let's implements them as follows. in setters I doubled the inputs to show you how setters can affect the results. it's just an imaginary example and is not exactly what you wrote.
class summation1:
def __init__(self, f, s):
self.first = f
self.second = s
self.summ = self.first + self.second
#property
def first(self):
return self.__first
#first.setter
def first(self,f):
self.__first = f*2
#property
def second(self):
return self.__second
#second.setter
def second(self,s):
self.__second = s*2
class summation2:
def __init__(self, f, s):
self.first = f
self.second = s
self.summ = f + s
#property
def first(self):
return self.__first
#first.setter
def first(self,f):
self.__first = f*2
#property
def second(self):
return self.__second
#second.setter
def second(self,s):
self.__second = s*2
now let's take a look at the outputs:
a = 3
b = 2
s1 = summation1(a,b)
s2 = summation2(a,b)
print(s1.summ)
# 10
print(s2.summ)
# 5
so, if you are not sure what to choose between those two, maybe the first approach is what you need.

Pytorch autograd: Make gradient of a parameter a function of another parameter

In Pytorch, how can I make the gradient of a parameter a function itself?
Here is a simple code snippet:
import torch
def fun(q):
def result(w):
l = w * q
l.backward()
return w.grad
return result
w = torch.tensor((2.), requires_grad=True)
q = torch.tensor((3.), requires_grad=True)
f = fun(q)
print(f(w))
In the code above, how can I make f(w) have gradient with respect to q?
EDIT: based on the accepted answer I was able to write a code that works. Essentially I am alternating between 2 optimization steps. For dim == 1 it works and for dim == 2 it does not. I get the error "RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time."
import torch
class f_class():
def __init__(self, dim):
self.dim = dim
if self.dim == 1:
self.w = torch.tensor((3.), requires_grad=True)
elif self.dim == 2:
self.w = [torch.tensor((3.), requires_grad=True), torch.tensor((5.), requires_grad=True)]
else:
raise ValueError("dim 1 or 2")
def forward(self, x):
if self.dim == 1:
return torch.mul(self.w, x)
elif self.dim == 2:
return torch.mul(torch.mul(self.w[0], self.w[1]), x)
def set_w(self, w):
self.w = w
def get_w(self):
return self.w
class g_class():
def __init__(self):
self.q = torch.tensor((4.), requires_grad=True)
def forward(self, f):
return torch.mul(self.q, f)
def set_q(self, q):
self.q = q
def get_q(self):
return self.q
def w_new(f, g, dim):
loss_g = g.forward(f.forward(xd))
if dim == 1:
grads = torch.autograd.grad(loss_g, f.get_w(), create_graph=True, only_inputs=True)[0]
temp = f.get_w().detach() + grads
else:
grads = torch.autograd.grad(loss_g, f.get_w(), create_graph=True, only_inputs=True)
temp = [wi.detach() + gi for wi, gi in zip(f.get_w(), grads)]
return temp
def q_new(f, g):
loss_f = 2 * f.forward(xd)
loss_f.backward()
temp = g.get_q().detach() + g.get_q().grad
temp.requires_grad = True
return temp
dim = 1
xd = torch.tensor((2.))
f = f_class(dim)
g = g_class()
for _ in range(3):
print(f.get_w(), g.get_q())
wnew = w_new(f, g, dim)
f.set_w(wnew)
print(f.get_w(), g.get_q())
qnew = q_new(f, g)
g.set_q(qnew)
print(f.get_w(), g.get_q())
When computing gradients, if you want to construct a computation graph for the gradient itself you need to specify create_graph=True to autograd.
A potential source of error in your code is using Tensor.backward within f. The problem here is that w.grad and q.grad will be populated with the gradient of l. This means that when you call f(w).backward(), the gradients of both f and l will be added to w.grad and q.grad. In effect you will end up with w.grad being equal to dl/dw + df/dw and similarly for q.grad. One way to get around this is to zero the gradients after f(w) but before .backward(). A better way is to use torch.autograd.grad within f. Using the latter approach, the grad attribute of w and q will not be populated when calling f, only when calling .backward(). This leaves room for things like gradient accumulation during training.
import torch
def fun(q):
def result(w):
l = w * q
return torch.autograd.grad(l, w, only_inputs=True, retain_graph=True)[0]
return result
w = torch.tensor((2.), requires_grad=True)
q = torch.tensor((3.), requires_grad=True)
f = fun(q)
f(w).backward()
print('w.grad:', w.grad)
print('q.grad:', q.grad)
which results in
w.grad: None
q.grad: tensor(1.)
Note that w.grad was not populated. This is because f(w) = dl/dw = q is not a function of w, and therefore w is not part of the computation graph. If you're using a standard pytorch optimizer this is fine since None gradients are implicitly assumed to be zero.
If l were instead a non-linear function of w, then w.grad would have been populated after f(w).backward(). For example
import torch
def fun(q):
def result(w):
# now dl/dw = 2 * w * q
l = w**2 * q
return torch.autograd.grad(l, w, only_inputs=True, create_graph=True)[0]
return result
w = torch.tensor((2.), requires_grad=True)
q = torch.tensor((3.), requires_grad=True)
f = fun(q)
f(w).backward()
print('w.grad:', w.grad)
print('q.grad:', q.grad)
which results in
w.grad: tensor(6.)
q.grad: tensor(4.)

sigmoid() takes 1 positional argument but 2 were given

Why am I getting this error
sigmoid() takes 1 positional argument but 2 were given
while using function yHat = NN.forward(X)??
class Neural_Networks(object):
def __init__(self):
self.inputLayerSize =2
self.outputLayerSize =1
self.hiddenLayerSize = 3
#weights
self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
def forward(self,X):
#propogates input through network
self.z2 = np.dot(X, self.W1)
self.a2 = self.sigmoid( self.z2 )
self.Z3 = np.dot(self.a2,self.W2)
yHat = self.sigmoid(self.z3)
return yHat
def sigmoid(z):
return 1/(1+np.exp(-z))
You are using it as an instance method so you must include self as the first argument
class Neural_Networks(object):
def __init__(self):
self.inputLayerSize =2
self.outputLayerSize =1
self.hiddenLayerSize = 3
#weights
self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
def forward(self,X):
#propogates input through network
self.z2 = np.dot(X, self.W1)
self.a2 = self.sigmoid( self.z2 )
self.Z3 = np.dot(self.a2,self.W2)
yHat = self.sigmoid(self.z3)
return yHat
def sigmoid(self, z):
return 1/(1+np.exp(-z))
Conversely if you want to use sigmoid as a class method than you'll need to add a #staticmethod decorator to it eg:
#staticmethod
def sigmoid(z):
return 1/(1+np.exp(-z))
Making it a static method is likely the right option since you don't use self in the method.

A weird error with updates in theano

I designed a variable net, but it occurred some problems with theano. The general idea is that different input will get different net with same parameters, something like a recursive neural network with auto-encoder.
There are two cases in my code, one case is run combine_feat_gt1_1() if c > 1, the other case is run combine_feat_gt1_0().
It is weird that the code can run without bugs if I comment updates=updates, which is not my expected (train_test theano function in code). However, if I uncomment updates=updates, an error occurred (train_test_bug theano function in code). The later one is that I'd like to implement.
I have been already spend some days on this bug. Who can help me? I will appreciate that.
import os
import sys
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.ifelse import ifelse
class Test(object):
def __init__(
self,
numpy_rng,
input=None,
output=None,
n_output=6,
n_input=3,
n_group=2,
W_r=None,
b_r=None
):
self.n_output = n_output
self.n_input = n_input
self.n_group = n_group
if not W_r:
initial_W_r = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_input + n_input)),
high=4 * numpy.sqrt(6. / (n_input + n_input)),
size=(n_input, n_input)
),
dtype=theano.config.floatX
)
W_r = theano.shared(value=initial_W_r, name='W_r', borrow=True)
if not b_r:
b_r = theano.shared(
value=numpy.zeros(
n_input,
dtype=theano.config.floatX
),
borrow=True
)
self.W_r = W_r
self.b_r = b_r
if input is None:
self.x = T.tensor4(name='input', dtype=theano.config.floatX)
else:
self.x = input
if output is None:
self.y = T.matrix(name='output', dtype=theano.config.floatX)
else:
self.y = output
self.params = [self.W_r, self.b_r]
def get_output_values(self, input):
a, b, c, d = input.shape
def recusive(x_t, h_tm1, wr, hr):
h_t = T.dot(h_tm1, wr) + T.dot(x_t, wr) + hr
return h_t
def combine_recusive(data):
hidden, _ = theano.scan(fn=recusive,
sequences=data[1:],
outputs_info=data[0],
non_sequences=[self.W_r, self.b_r],
n_steps=data[1:].shape[0],
strict=True)
return hidden[-1]
def combine_feat_gt1_1(input):
feats, _ = theano.scan(fn=combine_recusive,
sequences=input[0],
outputs_info=None,
n_steps=input[0].shape[0])
recusive_flag = T.ones(1)
return T.reshape(feats, (1,-1)) # concatenation
def combine_feat_gt1_0(input):
feats = input[0]
recusive_flag = T.zeros(1)
return T.reshape(feats, (1,-1)) # concatenation
feat = ifelse(T.gt(c, 1), combine_feat_gt1_1(input), combine_feat_gt1_0(input))
# debug code snippet
self.debug_ifelse = theano.function([input], T.gt(c, 1))
self.debug_1_0 = theano.function([input], ifelse(T.gt(c, 1), 1, 0))
return feat
def get_cost_updates(self):
learning_rate = 0.1
self.y_given_x = self.get_output_values(self.x)
cost = T.sum(( self.y_given_x - self.y) ** 2)
gparams = T.grad(cost, self.params)
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(self.params, gparams)
]
return (cost, updates)
if __name__ == "__main__":
toy_data = numpy.array([[[[1,1,1],[2,2,2]], [[3, 4,5],[4,5,6]]]],dtype=theano.config.floatX)
lable = numpy.array([[1,2,3,4,5,6]],dtype=theano.config.floatX)
toy_data2 = numpy.array([[[[1,1,1]], [[3,4,5]]]],dtype=theano.config.floatX)
lable2 = numpy.array([[6,5,4,3,2,1]],dtype=theano.config.floatX)
x = T.tensor4('x', dtype=theano.config.floatX)
y = T.matrix('y', dtype=theano.config.floatX)
newX = T.tensor4(dtype=x.dtype)
newY = T.matrix(dtype=y.dtype)
rng = numpy.random.RandomState(123)
test = Test(
numpy_rng=rng,
input=x,
output=y,
n_group=2,
n_input=3,
n_output=6
)
cost, updates= test.get_cost_updates()
train_test = theano.function(
[newX, newY],
cost,
# updates=updates,
givens={
x : newX,
y : newY
}
)
train_test_bug = theano.function(
[newX, newY],
cost,
updates=updates,
givens={
x : newX,
y : newY
}
)
print train_test(toy_data, lable)
print train_test(toy_data2, lable2)
# code with bug
# print train_test_bug(toy_data, lable)
# print train_test_bug(toy_data2, lable2)
EDIT (by #danielrenshaw)
I've cut the code down to a simpler demonstration of the problem.
The cause is in the gradient computation of a double-nested scan expression. The problem disappears when a modified inner-most recursive expression is used (see comments in first function below).
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t[1:]],
outputs_info=[x_t[0]],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w))
print f(x_value)
if __name__ == "__main__":
main()
I solved this problem edited by danielrenshaw. When I add h0 as outputs_info, it work. Before that I used first element of sequence as outputs_info, I think it caused the error. But I still cannot solve my original problem.
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w, h0):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t],
outputs_info=[h0],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w, h0):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w, h0],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
h0 = theano.shared(value=numpy.zeros(3, dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w, h0))
print f(x_value)
if __name__ == "__main__":
main()
I've encountered the same issue and I fixed it by letting optimizer=fast_compile in theano_flags. Guess that is a bug of theano.

Resources