Pytorch mask like Gluon npx.sequence_mask - nlp

I am trying to do this tutorial: https://d2l.ai/chapter_attention-mechanisms/attention.html but in Pytorch and I'm stuck in this function:
npx.sequence_mask()
I tried using torch.masked_fill and masked_scatter but to no success. Namely, I want:
a = torch.randn(2, 2, 4)
b = torch.randn(2, 3)
and to get a result like npx.sequence_mask()
sequence_mask documentation
([[[0.488994 , 0.511006 , 0. , 0. ],
[0.43654838, 0.56345165, 0. , 0. ]],
[[0.28817102, 0.3519408 , 0.3598882 , 0. ],
[0.29034293, 0.25239873, 0.45725834, 0. ]]])
Could anyone help me out with any ideas?

Maybe this works, but any better solution?
def mask_softmax(vec, mask):
leafs= vec.shape[0]
rows = vec.shape[1]
cols = vec.shape[2]
for k in range(leafs):
stop = int(mask[k])
for j in reversed(range(stop,cols)):
vec[k, :, j] = torch.zeros(rows) #all rows of col i <-- 0
vec = vec - torch.where(vec > 0,
torch.zeros_like(vec),
torch.ones_like(vec)*float('inf')) # switch 0 by -inf
# softmax(-inf) = nan
for k in range(leafs):
for i in range(rows):
vec[k,i] = F.softmax(vec[k, i], dim=0)
vec[vec != vec] = 0 # nan = 0
return vec
# testing
a = torch.rand((2,2,4))
mask = torch.Tensor((1,3))
mask_softmax(a, mask)
>>> tensor([[[0.5027, 0.4973, 0.0000, 0.0000],
[0.6494, 0.3506, 0.0000, 0.0000]],
[[0.3412, 0.3614, 0.2975, 0.0000],
[0.2699, 0.3978, 0.3323, 0.0000]]])

d2l now provides an official PyTorch version, in which the following function is defined as equivalent to npx.sequence_mask:
def sequence_mask(X, valid_len, value=0):
"""Mask irrelevant entries in sequences."""
maxlen = X.size(1)
mask = torch.arange((maxlen), dtype=torch.float32,
device=X.device)[None, :] < valid_len[:, None]
X[~mask] = value
return X
Ref: https://d2l.ai/chapter_recurrent-modern/seq2seq.html#loss-function

Related

Interpolation function for single data point

I'm using the interp1d function for interpolation
from scipy.interpolate import interp1d
x = [0, 3, 6, 10, 15, 20]
y = [1.87, 1.76, 1.27, 1.185, 0.995, 0.855]
f = interp1d(x, y, bounds_error=False)
x_find = [0, 5, 8, 10, 28]
print(f(x_find))
Using bounds_error=False in f = interp1d(x, y, bounds_error=False) returns nan value for x=28 in x_find.
Since interp1d raises an error for single datapoints, I tried the following for single datapoint.
x0 = [1.87]
y0 = [0.93]
f0 = lambda x: y0[0] if np.isclose(x, x0[0]) else np.NaN
print(f0(x0[0]))
This doesn't work when I try f0(x_find).
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Expected output:
f0(x_find) returns nan for values of x in x_find not present in x0like how bounds_error works.
Suggestions on how to do this will be really helpful
EDIT:
Question:
Would it be possible to modify the interpolation function of the single datapoint so that we could do just f(x_find), something similar to the one returned by f = interp1d() ?
I just only guess that you are missing a very simple thing: to put a single value to the f0() function using a list comprehension to get all values in case of a list with values.
Does the following code:
import numpy as np
from scipy.interpolate import interp1d
x = [0, 3, 6, 10, 15, 20]
y = [1.87, 1.76, 1.27, 1.185, 0.995, 0.855]
f = interp1d(x, y, bounds_error=False)
x_find = [0, 5, 8, 10, 28]
print(f(x_find))
x0 = [1.87]
y0 = [0.93]
f0 = lambda x: y0[0] if np.isclose(x, x0[0]) else np.NaN
print(f0(x0[0]))
print([f0(x) for x in x_find])
which prints:
[1.87 1.43333333 1.2275 1.185 nan]
0.93
[nan, nan, nan, nan, nan]
meet your expectations?
You can also redefine f0 to cover the case of passing a list of values to it as follows:
def f0(x):
import numpy as np
x0 = [1.87]
y0 = [0.93]
f = lambda x: y0[0] if np.isclose(x, x0[0]) else np.NaN
if isinstance(x, list):
return [f(z) for z in x]
elif isinstance(x, float):
return f(x)
else:
return "f0() accepts only float and lists of floats as parameter"
print('---')
print(f0(1.87))
print(f0(x_find))
print(f0("5"))
The output of the code above is:
---
0.93
[nan, nan, nan, nan, nan]
f0() accepts only float and lists of floats as parameter
FINALLY you can also redefine f0 as f_i which is a bit complex code simulating the behavior of scipy interp1d as follows:
def f_i(X=[0, 1.87], Y=[1.87, 0.93], bounds_error=False):
# ToDo: implementation of bounds_error evaluation
def f_interpolate(u):
assert len(X) > 1
XY = list(zip(X, Y))
XY.sort()
if not (XY[0][0] <= u <= XY[-1][0]):
return None
x_new = u
for i in range(len(XY)-1):
if XY[i][0] <= u <= XY[i+1][0]:
x_lo = XY[i ][0]
x_hi = XY[i+1][0]
y_lo = XY[i ][1]
y_hi = XY[i+1][1]
if x_new == x_lo:
return y_lo
if x_new == x_hi:
return y_hi
slope = (y_hi - y_lo) / (x_hi - x_lo)
y_new = y_lo + slope*(x_new - x_lo)
return y_new
return None
def f(v):
assert len(X) == 1
if v == X[0]:
return Y[0]
else:
return None
def r_f(w):
f_l = f_interpolate if len(X) > 1 else f
if isinstance(w, list):
return [f_l(z) for z in w]
elif isinstance(w, float):
return f_l(w)
else:
return "ValueErrorMessage: param. not float or list of floats"
return r_f
y = [1.87, 1.76, 1.27, 1.185, 0.995, 0.855]
x = [ 0, 3, 6, 10, 15, 20 ]
y0 = [0.93]
x0 = [1.87]
print('---')
f = f_i(x0, y0)
print(f(1.87))
f = f_i(x, y)
x_find = [0, 5, 8, 10, 28]
print(f(x_find))
print(f("5"))
which gives following output:
---
0.93
[1.87, 1.4333333333333333, 1.2275, 1.185, None]
ValueErrorMessage: param. not float or list of floats

I'm stuck with GurobiError: Unable to convert argument to an expression

Could you help me with the
GurobiError: Unable to convert argument to an expression
I tried to declare variables as single variables (e.g x1, x2, x3 etc.), but then I thought the iterable objects will work better (as there was 'Non-itereable object' error), but still Gurobi cannot convert the expression; It now only throws an error at the #96, but I still can't find What should I do?
My code:
import gurobipy as grb
f = [5.5, 5.2, 5]
s = 3.8
lX = [0, 0, 0]
uX = [45000, 4000, 1000000]
lV = [0, 0]
lY = [0, 0]
uY = [1000000, 30000]
r = [3.25, 3.4]
pProc = 0.35
pConv = 0.25
p = [5.75, 4.0]
OreProcessingModel = grb.Model(name="MIP Model")
OreProcessingModel.ModelSense = grb.GRB.MAXIMIZE
x = {i: OreProcessingModel.addVar(vtype=grb.GRB.CONTINUOUS,
lb=lX[i],
ub= uX[i],
name="x_{0}".format(i))
for i in range(3)}
v = {i: OreProcessingModel.addVar(vtype=grb.GRB.CONTINUOUS,
lb=lV[i],
name="v_{0}".format(i))
for i in range(2)}
y = {i: OreProcessingModel.addVar(vtype=grb.GRB.CONTINUOUS,
lb=lY[i],
ub=uY[i],
name="v_{0}".format(i))
for i in range(2)}
conv = OreProcessingModel.addVar(vtype=grb.GRB.CONTINUOUS, lb=0, ub=50000, name="conv")
vlms2 = [v[0], v[1]]
vlmsitrtr = 2
constraint_1 = {1:
OreProcessingModel.addConstr(
lhs=grb.quicksum(y[i] for i in range(2)),
sense=grb.GRB.LESS_EQUAL,
rhs=100000,
name="constraint_{1}")
}
constraint_2 = {1:
OreProcessingModel.addConstr(
lhs=grb.quicksum(v[i] for i in range(2)),
sense=grb.GRB.LESS_EQUAL,
rhs=50000,
name="constraint_{2}")
}
OreProcessingModel.setObjective(grb.quicksum((f[i] * x[i] + s * v[i]) - (y[i]*r[i] + pProc*(y[i]) + pConv*conv))for i in range(3))
OreProcessingModel.optimize()
print(OreProcessingModel)
You should not work with dictionaries like that. You can turn this code
x = {i: OreProcessingModel.addVar(vtype=grb.GRB.CONTINUOUS,
lb=lX[i],
ub= uX[i],
name="x_{0}".format(i))
for i in range(3)}
into this:
x = OreProcessingModel.addVars(len(lX), vtype=grb.GRB.CONTINUOUS, lb=lX, ub=uX, name="x")
Furthermore, the dimensions of your variables don't match. You are looping over range(3) in the setObjective() call and try to access v[i] and y[i] but these dicts are only of length 2.

What is the range for nll_loss?

I thought the range was only within the positive domain, but I am getting both negative numbers and positive numbers for F.nll_loss?
Why is that? I am confused. Softmax ranges from 0 to 1 and -log(0 to 1) is from infinity to 0. So, why am I getting negative numbers?
Here is how the function is implemented:
f = F.nll_loss
def NLLLoss(logs, targets):
out = logs[range(len(targets)), targets]
return -out.sum()/len(out)
i = torch.randn(3, 5)
print(i)
t = torch.empty(3).random_(0, 5).to(dtype=torch.long)
print(t)
o = f(i,t)
print(o)
f = NLLLoss
o = f(i,t)
print(o)
# tensor([[ 0.0684, 0.9493, -0.9932, -1.9325, -0.1642],
# [ 1.7073, 0.8153, -0.6435, -1.0128, 0.9894],
# [ 0.6948, -1.3770, -0.0932, -0.0951, -1.4226]])
# tensor([2, 0, 3])
# tensor(-0.2063)
# tensor(-0.2063)
It is just a sum of values and range can be from -inf to inf.

during conversion of XYZ to linear sRGB answer is out of range [0,1]

My task was to convert the RGB image into LuvImage.
Perform linear stretching in this domain. And than convert it back in the RGB domain.
Original Image:
[[ 0 0 0]
[255 0 0]
[100 100 100]
[ 0 100 100]]
Luv image after linear stretching in Luv Domain
[[0 , 0, 0],
[100 , 175, 37.7],
[79.64, 0, 0],
[71.2 ,-29.29,-6.339]]
Now, I am converting it into XYZ image. The answer is,
[[0,0, 0],
[1.5, 1, 0.53],
[0.533, 0.56, 0.61],
[0.344, 0.425, 0.523]]
Now, after that I am converting it into linear sRGB image
by multiplying image with matrix:
[[3.240479, -1.53715, -0.498535],
[-0.969256, 1.875991, 0.041556],
[0.055648, -0.204043, 1.057311]]
The answer for this conversion - linear sRGB image,
[[0. 0. 0. ],
[3.07132001 0.44046801 0.44082034],
[0.55904669 0.55972465 0.55993322],
[0.20106868 0.4850426 0.48520307]]
The problem here is that for the 2nd pixel sRGB values are not in the range of [0,1]. For all other pixels I am getting the correct value.
def XYZToLinearRGB(self, XYZImage):
'''
to find linearsRGBImage, we multiply XYZImage with static array
[[3.240479, -1.53715, -0.498535],
[-0.969256, 1.875991, 0.041556],
[0.055648, -0.204043, 1.057311]]
'''
rows, cols, bands = XYZImage.shape # bands == 3
linearsRGBImage = np.zeros([rows, cols, bands], dtype=float)
multiplierMatrix = np.array([[3.240479, -1.53715, -0.498535],
[-0.969256, 1.875991, 0.041556],
[0.055648, -0.204043, 1.057311]])
for i in range(0, rows):
for j in range(0, cols):
X,Y,Z = XYZImage[i,j]
linearsRGBImage[i,j] = np.matmul(multiplierMatrix, np.array([X,Y,Z]))
#for j -ends
#for i -ends
return linearsRGBImage
The code for this conversion is as per above. Can someone point out what I am doing wrong for 2nd pixel, and how to fix it?
Well, one simple solution I found after research is just clip the values.
So, if the value is out of the range, say if r<0 than we will assign r as 0.
Same for the larger values. If r>1 (in my case 3.07) than we will assign r as 1.
So the latest version of my code:
def XYZToLinearRGB(self, XYZImage):
'''
to find linearsRGBImage, we multiply XYZImage with static array
[[3.240479, -1.53715, -0.498535],
[-0.969256, 1.875991, 0.041556],
[0.055648, -0.204043, 1.057311]]
'''
rows, cols, bands = XYZImage.shape # bands == 3
linearsRGBImage = np.zeros([rows, cols, bands], dtype=float)
multiplierMatrix = np.array([[3.240479, -1.53715, -0.498535],
[-0.969256, 1.875991, 0.041556],
[0.055648, -0.204043, 1.057311]])
for i in range(0, rows):
for j in range(0, cols):
X,Y,Z = XYZImage[i,j]
rgbList = np.matmul(multiplierMatrix, np.array([X,Y,Z]))
for index, val in enumerate(rgbList):
if val<0:
rgbList[index]=0
#if val -ends
if val>1:
rgbList[index]=1
#if val -ends
#for index, val -ends
linearsRGBImage[i,j]=rgbList
#for j -ends
#for i -ends
return linearsRGBImage
Though if anyone have better suggestion, it is most welcomed.

finding optimum lambda and features for polynomial regression

I am new to Data Mining/ML. I've been trying to solve a polynomial regression problem of predicting the price from given input parameters (already normalized within range[0, 1])
I'm quite close as my output is in proportion to the correct one, but it seems a bit suppressed, my algorithm is correct, just don't know how to reach to an appropriate lambda, (regularized parameter) and how to decide to what extent I should populate features as the problem says : "The prices per square foot, are (approximately) a polynomial function of the features. This polynomial always has an order less than 4".
Is there a way we could visualize data to find optimum value for these parameters, like we find optimal alpha (step size) and number of iterations by visualizing cost function in linear regression using gradient descent.
Here is my code : http://ideone.com/6ctDFh
from numpy import *
def mapFeature(X1, X2):
degree = 2
out = ones((shape(X1)[0], 1))
for i in range(1, degree+1):
for j in range(0, i+1):
term1 = X1**(i-j)
term2 = X2 ** (j)
term = (term1 * term2).reshape( shape(term1)[0], 1 )
"""note that here 'out[i]' represents mappedfeatures of X1[i], X2[i], .......... out is made to store features of one set in out[i] horizontally """
out = hstack(( out, term ))
return out
def solve():
n, m = input().split()
m = int(m)
n = int(n)
data = zeros((m, n+1))
for i in range(0, m):
ausi = input().split()
for k in range(0, n+1):
data[i, k] = float(ausi[k])
X = data[:, 0 : n]
y = data[:, n]
theta = zeros((6, 1))
X = mapFeature(X[:, 0], X[:, 1])
ausi = computeCostVect(X, y, theta)
# print(X)
print("Results usning BFGS : ")
lamda = 2
theta, cost = findMinTheta(theta, X, y, lamda)
test = [0.05, 0.54, 0.91, 0.91, 0.31, 0.76, 0.51, 0.31]
print("prediction for 0.31 , 0.76 (using BFGS) : ")
for i in range(0, 7, 2):
print(mapFeature(array([test[i]]), array([test[i+1]])).dot( theta ))
# pyplot.plot(X[:, 1], y, 'rx', markersize = 5)
# fig = pyplot.figure()
# ax = fig.add_subplot(1,1,1)
# ax.scatter(X[:, 1],X[:, 2], s=y) # Added third variable income as size of the bubble
# pyplot.show()
The current output is:
183.43478288
349.10716957
236.94627602
208.61071682
The correct output should be:
180.38
1312.07
440.13
343.72

Resources