I define a loss function but backward present error to me could someone tell me how to fix it - pytorch

class loss(Function):
#staticmethod
def forward(ctx,x,INPUT):
batch_size = x.shape[0]
X = x.detach().numpy()
input = INPUT.detach().numpy()
Loss = 0
for i in range(batch_size):
t_R_r = input[i,0:4]
R_r = t_R_r[np.newaxis,:]
t_R_i = input[i,4:8]
R_i = t_R_i[np.newaxis,:]
t_H_r = input[i,8:12]
H_r = t_H_r[np.newaxis,:]
t_H_i = input[i,12:16]
H_i = t_H_i[np.newaxis,:]
t_T_r = input[i, 16:32]
T_r = t_T_r.reshape(4,4)
t_T_i = input[i, 32:48]
T_i = t_T_i.reshape(4,4)
R = np.concatenate((R_r, R_i), axis=1)
H = np.concatenate((H_r, H_i), axis=1)
temp_t1 = np.concatenate((T_r,T_i),axis=1)
temp_t2 = np.concatenate((-T_i,T_r),axis=1)
T = np.concatenate((temp_t1,temp_t2),axis=0)
phi_r = np.zeros((4,4))
row, col = np.diag_indices(4)
phi_r[row,col] = X[i,0:4]
phi_i = np.zeros((4, 4))
row, col = np.diag_indices(4)
phi_i[row, col] = 1 - np.power(X[i, 0:4],2)
temp_phi1 = np.concatenate((phi_r,phi_i),axis=1)
temp_phi2 = np.concatenate((-phi_i, phi_r), axis=1)
phi = np.concatenate((temp_phi1,temp_phi2),axis=0)
temp1 = np.matmul(R,phi)
temp2 = np.matmul(temp1,T) # error
H_hat = H + temp2
t_Q_r = np.zeros((4,4))
t_Q_r[np.triu_indices(4,1)] = X[i,4:10]
Q_r = t_Q_r + t_Q_r.T
row,col = np.diag_indices(4)
Q_r[row,col] = X[i,10:14]
Q_i = np.zeros((4,4))
Q_i[np.triu_indices(4,1)] = X[i,14:20]
Q_i = Q_i - Q_i.T
temp_Q1 = np.concatenate((Q_r,Q_i),axis=1)
temp_Q2 = np.concatenate((-Q_i,Q_r),axis=1)
Q = np.concatenate((temp_Q1,temp_Q2),axis=0)
t_H_hat_r = H_hat[0,0:4]
H_hat_r = t_H_hat_r[np.newaxis,:]
t_H_hat_i= H_hat[0,4:8]
H_hat_i = t_H_hat_i[np.newaxis,:]
temp_H1 = np.concatenate((-H_hat_i.T,H_hat_r.T),axis=0)
H_hat_H = np.concatenate((H_hat.T,temp_H1),axis=1)
temp_result1 = np.matmul(H_hat,Q)
temp_result2 = np.matmul(temp_result1,H_hat_H)
Loss += np.log10(1+temp_result2[0][0])
Loss = t.from_numpy(np.array(Loss / batch_size))
return Loss
#staticmethod
def backward(ctx,grad_output):
print('gradient')
return grad_output
def criterion(output,input):
return loss.apply(output,input)
This is my loss function. But it present the error:
Traceback (most recent call last):
File "/Users/mrfang/channel_capacity/training.py", line 24, in
loss.backward() File "/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/tensor.py",
line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph) File
"/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py",
line 99, in backward
allow_unreachable=True) # allow_unreachable flag RuntimeError: function lossBackward returned an incorrect number of gradients
(expected 2, got 1)
How could I fix it. Thanks very much

Your forward(ctx,x,INPUT) takes two inputs, x and INPUT, thus backward should output two gradients as well, grad_x and grad_INPUT.
In addition, in your snippet, you're not really computing a custom gradient, so you could compute that with Pytorch's autograd, without having to define a special Function.
If this is working code and you're going to define the custom loss, here's a quick boilerplate of what backward should comprise:
#staticmethod
def forward(ctx, x, INPUT):
# this is required so they're available during the backwards call
ctx.save_for_backward(x, INPUT)
# custom forward
#staticmethod
def backward(ctx, grad_output):
x, INPUT = ctx.saved_tensors
grad_x = grad_INPUT = None
# compute grad here
return grad_x, grad_INPUT
You don't need to return gradients for inputs that don't require it, thus you can return None for them.
More info here and here.

Related

How to remove inplace operation error in Pytorch?

I get this error from the following Pytorch code:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [3]] is at version 10; expected version 9 instead.
As it is seen the code does not have inplace operations.
import torch
device = torch.device('cpu')
class MesNet(torch.nn.Module):
def __init__(self):
super(MesNet, self).__init__()
self.cov_lin = torch.nn.Sequential(torch.nn.Linear(6, 5)).double()
def forward(self, u):
z_cov = self.cov_lin(u.transpose(0, 2).squeeze(-1))
return z_cov
class UpdateModel(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.P_dim = 18
self.Id3 = torch.eye(3).double()
def run_KF(self):
N = 10
u = torch.randn(N, 6).double()
v = torch.zeros(N, 3).double()
model = MesNet()
measurements_covs_l = model(u.t().unsqueeze(0))
# remember to remove this afterwards
torch.autograd.set_detect_anomaly(True)
for i in range(1, N):
v[i] = self.update_pos(v[i].detach(), measurements_covs_l[i-1])
criterion = torch.nn.MSELoss(reduction="sum")
targ = torch.rand(10, 3).double()
loss = criterion(v, targ)
loss = torch.mean(loss)
loss.backward()
return v, p
def update_pos(self, v, measurement_cov):
Omega = torch.eye(3).double()
H = torch.ones((5, self.P_dim)).double()
R = torch.diag(measurement_cov)
Kt = H.t().mm(torch.inverse(R))
# it is indicating inplace error even with this:
# Kt = H.t().mm(R)
dx = Kt.mv(torch.ones(5).double())
dR = self.trans(dx[:9].clone())
v_up = dR.mv(v)
return v_up
def trans(self, xi):
phi = xi[:3].clone()
angle = torch.norm(phi.clone())
if angle.abs().lt(1e-10):
skew_phi = torch.eye(3).double()
J = self.Id3 + 0.5 * skew_phi
Rot = self.Id3 + skew_phi
else:
axis = phi / angle
skew_axis = torch.eye(3).double()
s = torch.sin(angle)
c = torch.cos(angle)
Rot = c * self.Id3
return Rot
net = UpdateModel()
net.run_KF()
I think the issue is that you are overwriting v[i] elements.
You could instead construct an auxiliary list v_ from the loop, then convert it tensor:
v_ = [v[0]]
for i in range(1, N):
v_.append(self.update_pos(v[i].detach(), measurements_covs_l[i-1]))
v = torch.stack(v_)

How to pass model input to loss function in tensorflow keras?

I am training a neural networks with three different output prediction. For computing the loss of one output I need one of the input that is passed into the network. I am not able to access it as the training data is feed into the network by a keras data generator object. Is there any workaround for this problem.
This is the Generator class that feds data into the model
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self,list_ID,centers,sizes,batch_size=2,dims=(512,512),n_channels=3,n_classes=10,shuffle=True) -> None:
assert len(list_ID) == len(centers)
self.dims = dims
self.batch_size = batch_size
self.list_ID = list_ID
self.centers = centers
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.sizes = sizes
self.on_epoch_end()
self.mask = None
def __len__(self):
return int(np.floor(len(self.list_ID) / self.batch_size))
def on_epoch_end(self):
self.indexes = np.arange(len(self.list_ID))
if self.shuffle:
np.random.shuffle(self.indexes)
def __getitem__(self, index):
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
list_ID_temp = [self.list_ID[k] for k in indexes]
centers_temp = [self.centers[k] for k in indexes]
sizes_temp = [self.sizes[k] for k in indexes]
X, y = self.__datageneration(list_ID_temp, centers_temp,sizes_temp)
return X, y
def __datageneration(self, list_ID_temp,centers_temp,sizes_temp):
X = np.empty((self.batch_size,*self.dims,self.n_channels))
Y_center = np.empty((self.batch_size,128,128,1))
Y_dimension = np.empty((self.batch_size,128,128,2))
Y_offset = np.empty((self.batch_size,128,128,2))
self.mask = np.empty((self.batch_size,128,128,1))
for i,ID in enumerate(list_ID_temp):
image = cv2.imread(path+'/'+ID) / 255.0
heat_center, self.mask[i,] = gaussian_2d(centers_temp[i],image.shape)
'''Here I tried to save mask which is what I need,
as an attribute to data generator but when accessed by loss function
the value is just None which is what I initialized it as in init method'''
heat_size,heat_off = size_off_heatmap(sizes_temp[i], centers_temp[i],image.shape)
image = cv2.resize(image,(512,512))
X[i,] = image
Y_center[i,] = heat_center
Y_dimension[i,] = heat_size
Y_offset[i,] = heat_off
return (X,{'center_output':Y_center,'size_output':Y_dimension,'offset_output':Y_offset})
This is the generator class I implemented and I needed the mask , which I tried to write as an attribute of data generator object(I have commented the code. For reference I will also include the function that will return the mask and the error function that requires the mask.
Function returning mask
def gaussian_2d(centers, img_shape):
heatmap = []
y_index = np.tile(np.arange(128), (128, 1))
mask = np.zeros((128,128,1))
width = img_shape[1]
height = img_shape[0]
for x_o, y_o in centers:
x = int(x_o / width * 128)
y = int(y_o / height * 128)
mask[y,x] = 1
gauss = np.exp(-((y_index.T - y) ** 2 + (y_index - x) ** 2) / 2 * 0.2 ** 2)
heatmap.append(gauss)
if len(heatmap) > 1:
heatmap = np.stack(heatmap)
heatmap = np.max(heatmap, axis=0)
else:
heatmap = np.array(heatmap)
heatmap = heatmap.reshape((128, 128,1))
return heatmap,mask
Loss function
def final_loss(mask):
def l1_loss(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
tot_loss = tf.reduce_sum(tf.abs(y_pred - y_true))
if tf.greater(n,0):
loss = tot_loss / (n)
else:
loss = tot_loss
return loss
return l1_loss
The error show is as below
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-27-74a28b075f52> in <module>()
----> 1 model.fit(gen,epochs=10,verbose=1,callbacks=Callback(patience=4))
9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
<ipython-input-24-c45fe131feb7>:5 l1_loss *
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1679 equal
return gen_math_ops.equal(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py:3179 equal
name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:540 _apply_op_helper
(input_name, err))
ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported.
'''

please let me know why i get this error UnboundLocalError: local variable 'top_performer' referenced before assignment

Here I have my Python code code, I don't understand why I am getting the following error. Any guidance or help would be much appreciated.
UnboundLocalError: local variable 'top_performer' referenced before assignment
def create(X, y, **kwargs):
method = kwargs.get("method", None)
#method = kwargs.get("method", "Binary_operators")
#method = kwargs.get("method", "Binning")
#method = kwargs.pop("method", "Cluster")
#categorical_cols = [c for c, t in zip(X.columns, X_column_types) if t in [DATATYPE_CATEGORY_INT, DATATYPE_CATEGORY_STRING]]
#numerical_cols = [c for c, t in zip(X.columns, X_column_types) if t == DATATYPE_NUMBER]
#categorical = X[categorical_cols]
#numerical = X[numerical_cols]
categorical = X.select_dtypes(include=[object])
numerical = X.select_dtypes(exclude=[object])
# feature selection using Genetic Algorithm
if method == "fs_GA":
print("fs_GA")
enc = OneHotEncoder()
enc.fit(categorical)
Data_cat=pd.DataFrame(enc.transform(categorical).toarray())
X_data = pd.concat([numerical, Data_cat], axis=1)
if y.dtype == int:
y = y
else:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y)
y = le.transform(y)
X_train, X_test, y_train, y_test = train_test_split(X_data, y, train_size=0.8, random_state=42)
def get_fitness(individual):
if y.dtype == int:
rg = RandomForestRegressor(random_state=42)
else:
rg = RandomForestClassifier(random_state=42)
columns = [column for (column, binary_value) in zip(X_train.columns, individual) if binary_value]
training_set = X_train[columns]
test_set = X_test[columns]
rg.fit(training_set.values, y_train)
preds = rg.predict(test_set.values)
return 100 / np.sqrt(mean_squared_error(y_test, preds))
individual = [1] * 100
get_fitness(individual)
def get_population_fitness(population):
return sorted([(individual, get_fitness(individual)) for individual in population], key=lambda tup: tup[1], reverse=True)
def crossover(individual_a, individual_b):
crossing_point = random.randint(0, 99)
offspring_a = individual_a[0:crossing_point] + individual_b[crossing_point:100]
offspring_b = individual_b[0:crossing_point] + individual_a[crossing_point:100]
return offspring_a, offspring_b
def tournament(current_population):
index = sorted(random.sample(range(0, 20), 5))
tournament_members = [current_population[i] for i in index]
total_fitness = sum([individual[1] for individual in tournament_members])
probabilities = [individual[1] / total_fitness for individual in tournament_members]
index_a, index_b = np.random.choice(5, size=2, p=probabilities)
return crossover(tournament_members[index_a][0], tournament_members[index_b][0])
def mutation(individual):
mutation_point = random.randint(0, 99)
if(individual[mutation_point]):
individual[mutation_point] = 0
else:
individual[mutation_point] = 1
def build_next_generation(current_population, mutation_rate):
next_generation = []
next_generation.append(current_population[0][0]) # elitism
next_generation.append(current_population[random.randint(1,19)][0]) # randomness
for i in range(9): # tournaments
offspring_a, offspring_b = tournament(current_population)
next_generation.append(offspring_a)
next_generation.append(offspring_b)
for individual in next_generation: # mutation
if(random.randint(1,mutation_rate) == 1):
mutation(individual)
return next_generation
def run_ga(current_population, num_of_generations, mutation_rate=1000):
fittest_individuals = []
for i in range(num_of_generations):
current_population = get_population_fitness(current_population) # get pop fitness
fittest_individuals.append(current_population[0]) # record fittest individual (for graphing and analysis)
current_population = build_next_generation(current_population, mutation_rate) # make new population
return fittest_individuals
initial_population = [[random.randint(0, 1) for i in range(100)] for i in range(20)]
high_mutation_fittest = run_ga(initial_population, 100, mutation_rate=5)
high_mutation_fitness = [ind[1] for ind in high_mutation_fittest]
for item in high_mutation_fittest[:-1]:
if item[1] == max(high_mutation_fitness):
top_performer = item
break
print("Total features included: " + str(top_performer[0].count(1)))
selected_features = [column for (column, binary_value) in zip(X.columns, top_performer[0]) if binary_value]
excluded_features = [column for (column, binary_value) in zip(X.columns, top_performer[0]) if not binary_value]
X = X[selected_features]
if method == "Binary_operators":
print("binaryoperators")
if method == "Binning":
print("binning")
else:
print("Discretization")
if method == "Cluster":
print("clustering")
else:
print("no-cluster")
print("normal_autocross")
So when I run the code I get the following error and I don't seem to understand what it means. Can someone please explain to me why i'm getting this error?
create(X, y, method="fs_GA")
fs_GA
UnboundLocalError Traceback (most recent call last)
in
----> 1 create(X, y, method="fs_GA")
in create(X, y, **kwargs)
107 top_performer = item
108 break
--> 109 print("Total features included: " + str(top_performer[0].count(1)))
110
111 selected_features = [column for (column, binary_value) in zip(X.columns, top_performer[0]) if binary_value]
UnboundLocalError: local variable 'top_performer' referenced before assignment
top_performer = 0
for item in high_mutation_fittest[:-1]:
if item[1] == max(high_mutation_fitness):
top_performer = item
break
print("Total features included: " + str(top_performer[0].count(1)))
According to your code top_performer is an int variable, not an array, str(top_performer) is correct way of using it. str(top_performer).count('1') , this could be what you are looking for. count is for string not int

AttributeError: 'NoneType' object has no attribute '_inbound_nodes'

I want to implement the
loss function defined here.
I use fcn-VGG16 to obtain a map x, and add a activation layer.(x is the output of the fcn vgg16 net). And then just some operations to get extracted features.
co_map = Activation('sigmoid')(x)
#add mean values
img = Lambda(AddMean, name = 'addmean')(img_input)
#img map multiply
img_o = Lambda(HighLight, name='highlightlayer1')([img, co_map])
img_b = Lambda(HighLight, name='highlightlayer2')([img, 1-co_map])
extractor = ResNet50(weights = 'imagenet', include_top = False, pooling = 'avg')
extractor.trainable = False
extractor.summary()
o_feature = extractor(img_o)
b_feature = extractor(img_b)
loss = Lambda(co_attention_loss,name='name')([o_feature,b_feature])
model = Model(inputs=img_input, outputs= loss ,name='generator')
The error i get is at this line model = Model(inputs=img_input, outputs= loss ,name='generator')
I think is because the way i calculate the loss makes it not an accepted output to keras models.
def co_attention_loss(args):
loss = []
o_feature,b_feature = args
c = 2048
for i in range(5):
for j in range(i,5):
if i!=j:
print("feature shape : "+str(o_feature.shape))
d1 = K.sum(K.pow(o_feature[i] - o_feature[j],2))/c
d2 = K.sum(K.pow(o_feature[i] - b_feature[i],2))
d3 = K.sum(K.pow(o_feature[j] - b_feature[j],2))
d4 = d2 + d3/(2*c)
p = K.exp(-d1)/K.sum([K.exp(-d1),K.exp(-d4)])
loss.append(-K.log(p))
return K.sum(loss)
How can i modify my loss function to make this work?
loss = Lambda(co_attention_loss,name='name')([o_feature,b_feature])
means the args you input is a list, but you call args as a tuple
o_feature,b_feature = args
you could change the loss code to
def co_attention_loss(args):
loss = []
o_feature = args[0]
b_feature = args[1]
c = 2048
for i in range(5):
for j in range(i,5):
if i!=j:
print("feature shape : "+str(o_feature.shape))
d1 = K.sum(K.pow(o_feature[i] - o_feature[j],2))/c
d2 = K.sum(K.pow(o_feature[i] - b_feature[i],2))
d3 = K.sum(K.pow(o_feature[j] - b_feature[j],2))
d4 = d2 + d3/(2*c)
p = K.exp(-d1)/K.sum([K.exp(-d1),K.exp(-d4)])
loss.append(-K.log(p))
return K.sum(loss)
NOTICE: NOT TEST

DMN neural network with poor validation results -- only 50%

I have this problem with my Neural Network. I'm trying to implement what's called a DMN (Dynamic Memory Network) for the babi data set. A paper about the DMN model can be found here: http://arxiv.org/abs/1506.07285 Another paper about DMNs can be found here: https://yerevann.github.io/2016/02/05/implementing-dynamic-memory-networks/
Here's my problem. btw I'm using PyTorch.
I split the training and testing data into parts for training, testing, and validation. I use 1000 parts for training, 500 parts for testing and 500 parts for validation. I run into a problem. I can train successfully but when I go to the validation step I never get a score above 50% accuracy. With the babi data set it is documented that you should be able to get 100% accuracy with the first test set. (There are 20 test sets in all). I can get 100% accuracy during training, but only 50% in validation. My question to you is what part of the program would be responsible for this kind of behavior? In other words, can you tell me why I'm always getting 50% ?? Thanks for your time. I'm limiting my experiments to the first babi test for now.
I thought I had this all figured out but my problem has cropped up again. I really don't have a clue what it is. Here is a link to the code. If you could take a look I would be most grateful. https://github.com/radiodee1/awesome-chatbot/blob/master/model/babi_iv.py
Some code is included below.
class WrapMemRNN(nn.Module):
def __init__(self,vocab_size, embed_dim, hidden_size, n_layers, dropout=0.3, do_babi=True, bad_token_lst=[], freeze_embedding=False, embedding=None, print_to_screen=False):
super(WrapMemRNN, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.do_babi = do_babi
self.print_to_screen = print_to_screen
self.bad_token_lst = bad_token_lst
self.embedding = embedding
self.freeze_embedding = freeze_embedding
self.teacher_forcing_ratio = hparams['teacher_forcing_ratio']
gru_dropout = dropout * 0
self.model_1_enc = Encoder(vocab_size, embed_dim, hidden_size, n_layers, dropout=dropout,embedding=embedding, bidirectional=False)
self.model_2_enc = Encoder(vocab_size, embed_dim, hidden_size, n_layers, dropout=gru_dropout, embedding=embedding, bidirectional=False)
self.model_3_mem_a = MemRNN(hidden_size, dropout=gru_dropout)
self.model_3_mem_b = MemRNN(hidden_size, dropout=gru_dropout)
self.model_4_att = EpisodicAttn(hidden_size, dropout=gru_dropout)
self.model_5_ans = AnswerModule(vocab_size, hidden_size,dropout=dropout)
self.input_var = None # for input
self.q_var = None # for question
self.answer_var = None # for answer
self.q_q = None # extra question
self.inp_c = None # extra input
self.inp_c_seq = None
self.all_mem = None
self.last_mem = None # output of mem unit
self.prediction = None # final single word prediction
self.memory_hops = hparams['babi_memory_hops']
self.reset_parameters()
if self.freeze_embedding or self.embedding is not None:
self.new_freeze_embedding()
#self.criterion = nn.CrossEntropyLoss()
pass
def reset_parameters(self):
#print('reset')
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
#print('here...')
weight.data.uniform_(-stdv, stdv)
if len(weight.size()) > 1:
init.xavier_normal_(weight)
def forward(self, input_variable, question_variable, target_variable, criterion=None):
self.new_input_module(input_variable, question_variable)
self.new_episodic_module()
outputs, ans = self.new_answer_module_simple()
return outputs, None, ans, None
def new_freeze_embedding(self):
self.model_1_enc.embed.weight.requires_grad = False
self.model_2_enc.embed.weight.requires_grad = False
print('freeze embedding')
pass
def new_input_module(self, input_variable, question_variable):
prev_h1 = []
for ii in input_variable:
ii = self.prune_tensor(ii, 2)
out1, hidden1 = self.model_1_enc(ii, None)
prev_h1.append(hidden1)
self.inp_c_seq = prev_h1
self.inp_c = prev_h1[-1]
prev_h2 = []
for ii in question_variable:
ii = self.prune_tensor(ii, 2)
out2, hidden2 = self.model_2_enc(ii, None)
prev_h2.append(hidden2)
self.q_q = hidden2[:,-1,:]
return
def new_episodic_module(self):
if True:
mem_list = []
sequences = self.inp_c_seq
for i in range(len(sequences)):
m_list = [self.q_q.clone()]
#print(sequences[i].size(),'seq')
for iter in range(self.memory_hops):
x = self.new_attention_step(sequences[i], None, m_list[iter], self.q_q)
if self.print_to_screen and not self.training:
print(x,'x -- after', len(x), sequences[i].size())
e, _ = self.new_episode_small_step(sequences[i], x.permute(1,0), None)
assert len(sequences[i].size()) == 3
#print(e.size(),'e')
ee = e[:, 0, -1]#.permute(2,1,0)
_, out = self.model_3_mem_a(ee.unsqueeze(0), self.prune_tensor(m_list[iter], 3))
m_list.append(out)
mem_list.append(m_list[self.memory_hops])
mm_list = torch.cat(mem_list, dim=1)
self.last_mem = mm_list
#print(self.last_mem.size(),'lm')
return None
def new_episode_small_step(self, ct, g, prev_h):
assert len(ct.size()) == 3
bat, sen, emb = ct.size()
#print(ct.size(),'ct')
#print(sen,'sen', g.size())
last = [prev_h]
ep = []
for iii in range(sen):
c = ct[0,iii,:].unsqueeze(0)
if prev_h is not None:
prev_h = self.prune_tensor(prev_h, 3)
out, gru = self.model_3_mem_b(c, last[iii] )
last.append(out)
g = g.squeeze(0)
gru = gru.squeeze(0).permute(1,0)
#if not self.training: print(g.size(),'g', iii)
#ggg = g[:, iii]
ggg = g[iii]
h = torch.mul(ggg , gru)# + torch.mul((1 - g[iii]) , prev_h.permute(1,0))
index = -1 #-1 # -2
if last[iii + index] is not None:
#print(last[iii].size(),'last -',ggg.size(), ggg, sen)
if False: h = h + torch.mul((1 - ggg), last[iii + index])
#print(h.size(),'hsize')
if iii == sen - 1 : ep.append(h.unsqueeze(1))
h = torch.cat(ep, dim=1)
#print(h.size(),ep[0].size(),'h',sen, gru.size())
return h, gru
def new_attention_step(self, ct, prev_g, mem, q_q):
q_q = self.prune_tensor(q_q,3)
mem = self.prune_tensor(mem,3)
assert len(ct.size()) == 3
bat, sen, emb = ct.size()
#print(sen,'len sen')
att = []
for iii in range(sen):
c = ct[0,iii,:]
concat_list = [
c.unsqueeze(0),
mem.squeeze(0),
q_q.squeeze(0),
(c * q_q).squeeze(0),
(c * mem).squeeze(0),
(torch.abs(c - q_q) ).squeeze(0),
(torch.abs(c - mem) ).squeeze(0)
]
#for ii in concat_list: print(ii.size())
#print(sen,'sen')
#exit()
#z = F.sigmoid(z)
concat_list = torch.cat(concat_list, dim=1)
#print(concat_list.size(),'cl')
att.append(concat_list)
att = torch.cat(att, dim=0)
#z = torch.cat(att, dim=0)
z = self.model_4_att(att)
z = F.sigmoid(z)
#z = F.softmax(z, dim=1) #F.sigmoid(z)
#print(z.size(),'z')
return z
def prune_tensor(self, input, size):
if len(input.size()) < size:
input = input.unsqueeze(0)
if len(input.size()) > size:
input = input.squeeze(0)
return input
def new_answer_module_simple(self):
#outputs
ansx = self.model_5_ans(self.last_mem, None)
#ansx = F.softmax(ansx, dim=0)
if self.print_to_screen:
print(ansx, 'ansx printed')
print(ansx.size(), 'ansx')
vocab, sen = ansx.size()
aa = torch.argmax(ansx, dim=0)
print(aa.size(),'aa')
for i in range(sen):
zz = aa[i]
z = ansx[:, i]
a = torch.argmax(z, dim=0)
print(a.item(), zz.item())
print('----')
#ans = torch.argmax(ansx,dim=1)#[0]
return [None], ansx
pass

Resources