how to print TensorVariable - theano
I am a beginner of theano. I am studying it now. I'd like to print 'value' and 'shape' of TensorVariable while operating theano.function. When I used print fuction of python, print function ran before compiling theano function. So I learned using print fuction is useless. Thereofre I tried my another hand. I added a following syntax to execute theano.printing.Print.
(cce is return value of theano.scan. Therefore, Maybe it is not symbolic vaiable.
Actually, I am confused by the concept of TensorVariable and shared variable. TensorVariable is a sort of shared variable?)
x = theano.tensor.tensor3() # define data type
t_print =theano.printing.Print("cce value is : ")(x)
f = theano.function([x], t_print) # define theano.function
f(cce) # call f (print value of cce)
Then, Following Error Occured
TypeError: ('Bad input argument to theano function with name "seq2seq.py : 98" at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')
Could you possibly let me know how to correct this code to print value of cce(TensorVariable) ? Or, is it impossible to print the value of TensorVariable when theano.function is on progress ?
Thank you for reading my question.
ADDED -
here is my source code. this is a large picture. theano.function() starts with last line. loss_func is 'categorical_crossentropy function'. last 4 line is about theano function
def categorical_crossentropy(y_true, y_pred):
y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
y_pred = y_pred.reshape( (-1, voca_dim_g) )
y_true = y_true.reshape( (-1, voca_dim_g) )
cce, updates = theano.scan(
fn=T.nnet.categorical_crossentropy,
sequences=[y_pred,y_true]
)
##### I want to print cce HERE #######
return T.mean(cce)
#staticmethod
def step(
x_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
):
"""
x_t.shape = (timestep=1, dim)
x_t.shape = (n_samples, timestep=1, dim)
"""
i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi)
f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf)
o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo)
g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg)
c_t = c_tm1 * f_t + g_t * i_t
h_t = T.tanh(c_t) * o_t
return h_t, c_t
#########################################################################################################################
def forward(self, X):
states, updates = theano.scan(
fn=self.step,
sequences=[ X ],
outputs_info=[self.h_tm1, self.c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
)
updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])]
return states, updates
#########################################################################################################################
def encode(self, X):
states, updates = self.forward(X)
h_t = states[0][-1]
c_t = states[1][-1]
return h_t, c_t, updates
def decode_step(
self, y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg,
Wh, bh
):
h_t, c_t = self.step(
y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
)
y_t = T.dot(h_t, Wh) + bh
return y_t, h_t, c_t
def decode(self, h_tm1, c_tm1, timesteps):
outputs, updates = theano.scan(
fn=self.decode_step,
outputs_info=[self.y_t, h_tm1, c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg,
self.Wh, self.bh
],
n_steps=timesteps
)
updates = [
(self.h_tm1, outputs[1][-1]),
(self.c_tm1, outputs[2][-1])
]
return outputs[0], updates
h_tm1, c_tm1, updates_encode = encode(seq_input)
seq_predict, updates_decode = decode(h_tm1, c_tm1, T.shape(seq_target)[0])
loss = loss_func(seq_predict, seq_target)
self._train = theano.function([seq_input, seq_target], loss, updates = updates)
below is full source code
# -*- coding: utf-8 -*-
__modifier__ = "Lee Guk Beom, Lee Jae Sang, Jang Jae Kwang (alphabetical Order)"
import readFile
import numpy as np
import theano
import theano.tensor as T
from six.moves import zip
from theano.compile.debugmode import DebugMode
import nltk
import sys
import os
from nltk.tokenize import sent_tokenize
import codecs
#theano.config.optimizer='fast_compile'
#theano.config.exception_verbosity='high'
#theano.config.compute_test_value = 'warn'
epsilon = 1e-6
dtype = theano.config.floatX
minibatch_size_g = 0
longest_seq_g = 0
voca_dim_g = 0
n_time_step_input_g = 0
n_timestep_target_g = 0
word_to_index_input_g = dict()
word_to_index_targrt_g = dict()
index_to_word_target_g = dict()
#########################################################################################################################
def shared(value, name=None):
return theano.shared(value.astype(dtype), name=name)
#########################################################################################################################
def shared_zeros(shape, name=None):
return shared(value=np.zeros(shape), name=name)
#########################################################################################################################
def shared_zeros_like(x, name=None):
return shared_zeros(shape=x.shape, name=name)
#########################################################################################################################
def init_weights(shape, name=None):
bound = np.sqrt(1.0/shape[1])
w = np.random.uniform(-bound, bound, shape)
return shared(value=w, name=name)
#########################################################################################################################
def adadelta(params, cost, lr=1.0, rho=0.95):
# from https://github.com/fchollet/keras/blob/master/keras/optimizers.py
cost = cost.astype('float32')
grads = T.grad(cost, params)
accus = [shared_zeros_like(p.get_value()) for p in params]
delta_accus = [shared_zeros_like(p.get_value()) for p in params]
updates = []
for p, g, a, d_a in zip(params, grads, accus, delta_accus):
new_a = rho * a + (1.0 - rho) * T.square(g)
updates.append((a, new_a))
update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon)
new_p = p - lr * update
updates.append((p, new_p))
new_d_a = rho * d_a + (1.0 - rho) * T.square(update)
updates.append((d_a, new_d_a))
return updates
#########################################################################################################################
def categorical_crossentropy(y_true, y_pred):
# from https://github.com/fchollet/keras/blob/master/keras/objectives.py
y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
# y_true = y_true.reshape( (-1, minibatch_size_g, voca_dim_g) )
'''
cce = T.nnet.categorical_crossentropy(y_pred,y_true)
# only matrix can be calculated
'''
# Y_PRED SOFTMAX
y_pred = y_pred.reshape( (-1, voca_dim_g) )
# y_pred_flat = T.nnet.softmax(y_pred)
y_true = y_true.reshape( (-1, voca_dim_g) )
cce, updates = theano.scan(
fn=T.nnet.categorical_crossentropy,
sequences=[y_pred,y_true]
)
return T.mean(cce)
#########################################################################################################################
def mean_square_error(y_true, y_pred):
return T.mean(T.square(y_pred - y_true))
#########################################################################################################################
class LSTM(object):
def __init__(self, size, dim):
self.size = size
self.dim = dim
shape_b = (minibatch_size_g, size)
shape_U = (dim, size)
shape_W = (size, size)
self.h_tm1 = shared_zeros(shape_b, "h_tm1")
self.c_tm1 = shared_zeros(shape_b, "c_tm1")
self.Ui = init_weights(shape_U, "Ui")
self.Wi = init_weights(shape_W, "Wi")
self.bi = shared_zeros(shape_b, "bi")
self.Uf = init_weights(shape_U, "Uf")
self.Wf = init_weights(shape_W, "Wf")
self.bf = shared_zeros(shape_b, "bf")
self.Uo = init_weights(shape_U, "Uo")
self.Wo = init_weights(shape_W, "Wo")
self.bo = shared_zeros(shape_b, "bo")
self.Ug = init_weights(shape_U, "Ug")
self.Wg = init_weights(shape_W, "Wg")
self.bg = shared_zeros(shape_b, "bg")
self.params = [
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
def set_state(self, h, c):
self.h_tm1.set_value(h.get_value())
self.c_tm1.set_value(c.get_value())
def reset_state(self):
self.h_tm1 = shared_zeros((1, self.size), "h_tm1")
self.c_tm1 = shared_zeros((1, self.size), "c_tm1")
#########################################################################################################################
#staticmethod
def step(
x_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
):
"""
x_t.shape = (timestep=1, dim)
x_t.shape = (n_samples, timestep=1, dim)
"""
i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi)
f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf)
o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo)
g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg)
c_t = c_tm1 * f_t + g_t * i_t
h_t = T.tanh(c_t) * o_t
return h_t, c_t
#########################################################################################################################
def forward(self, X):
states, updates = theano.scan(
fn=self.step,
sequences=[ X ],
outputs_info=[self.h_tm1, self.c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
)
updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])]
return states, updates
#########################################################################################################################
class LSTMEncoder(LSTM):
def encode(self, X):
states, updates = self.forward(X)
h_t = states[0][-1]
c_t = states[1][-1]
return h_t, c_t, updates
class LSTMDecoder(LSTM):
def __init__(self, size, dim, h_tm1=None, c_tm1=None):
super(LSTMDecoder, self).__init__(size=size, dim=dim)
self.Wh = init_weights((size, dim), "Wh")
self.bh = shared_zeros((minibatch_size_g, dim), "bh")
self.h_tm1 = h_tm1 or shared_zeros((minibatch_size_g, size), "h_tm1")
self.c_tm1 = c_tm1 or shared_zeros((minibatch_size_g, size), "c_tm1")
self.y_t = shared_zeros((minibatch_size_g, dim), "y_t")
# self.decode_length = theano.shared(decode_length)
self.params.append(self.Wh)
self.params.append(self.bh)
def decode_step(
self, y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg,
Wh, bh
):
h_t, c_t = self.step(
y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
)
y_t = T.dot(h_t, Wh) + bh
return y_t, h_t, c_t
def decode(self, h_tm1, c_tm1, timesteps):
outputs, updates = theano.scan(
fn=self.decode_step,
outputs_info=[self.y_t, h_tm1, c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg,
self.Wh, self.bh
],
n_steps=timesteps
)
updates = [
(self.h_tm1, outputs[1][-1]),
(self.c_tm1, outputs[2][-1])
]
# return T.flatten(outputs[0], 3), updates
return outputs[0], updates
#staticmethod
def argmax(seq):
seq = T.argmax(seq, axis=2)
return seq
#########################################################################################################################
class Seq2Seq(object):
def __init__(self, size, dim):
self.encoder = LSTMEncoder(size, dim)
self.decoder = LSTMDecoder(size, dim)
self.params = []
self.params += self.encoder.params
self.params += self.decoder.params
self._predict = None
self._train = None
self._test = None
def compile(self, loss_func, optimizer):
seq_input = T.tensor3()
seq_target = T.tensor3()
decode_timesteps = T.iscalar()
h_tm1, c_tm1, updates_encode = self.encoder.encode(seq_input)
seq_predict_flex, updates_decode_flex = self.decoder.decode(h_tm1, c_tm1, decode_timesteps)
seq_argmax = self.decoder.argmax(seq_predict_flex)
seq_predict, updates_decode = self.decoder.decode(h_tm1, c_tm1, T.shape(seq_target)[0])
loss = loss_func(seq_predict, seq_target)
self._predict = theano.function([seq_input, decode_timesteps], seq_argmax, updates=updates_encode+updates_decode_flex)
self._test = theano.function([seq_input, seq_target], loss, updates=updates_encode+updates_decode)
updates = []
updates += updates_encode
updates += updates_decode
updates += optimizer(self.params, loss)
self._train = theano.function([seq_input, seq_target], loss, updates = updates)
def predict(self, seq_input, decode_timesteps):
self.encoder.reset_state()
self.decoder.reset_state()
return self._predict(seq_input, decode_timesteps)
def train(self, seq_input, seq_target):
self.encoder.reset_state()
self.decoder.reset_state()
return self._train(seq_input, seq_target)
def test(self, seq_input, seq_target):
self.encoder.reset_state()
self.decoder.reset_state()
return self._test(seq_input, seq_target)
#########################################################################################################################
def train(x, target):
for mini_batch, target in zip(x,target):
mini_batch = mini_batch.astype(dtype)
target = target.astype(dtype)
print("result of train function(loss or update) :", seq2seq.train(mini_batch, target))
#########################################################################################################################
# make weight information to pickle file
# information of Encooder class and decoder class of Seq2Seq class
# Encooder and decoder class should have function that returns value of weight variables
# one list contains elements that save weights' information
def save_weight():
None
#########################################################################################################################
def gen_processed_seq(input_sentence):
tokenized_seq = nltk.word_tokenize( input_sentence )
input_sentences = [ None for _ in range(1) ]
input_sentences[0] = tokenized_seq
seq_input = readFile.word_to_idx(input_sentences, word_to_index_input_g )
sorted_seq_input = [ None for _ in range(minibatch_size_g) ]
sorted_seq_input[0] = seq_input[0]
input_len = len(seq_input[0])
for i in range(minibatch_size_g-1):
for j in range(input_len):
sorted_seq_input[i+1] = [-1]
input_finally = []
input_finally.append(sorted_seq_input)
return input_finally
#########################################################################################################################
def gen_one_hot(input_len, input_seq):
one_hot = readFile.seq_to_1hot(n_time_step_input_g, input_seq, "predict", 1, 1)
one_hot[0] = one_hot[0].astype(dtype)
print("one_hot : ", one_hot)
return one_hot
def get_idx(argmax, num_of_word):
idx_list = argmax[ : num_of_word, 0]
return idx_list
#########################################################################################################################
def predict():
input_sentence = raw_input("Input the English Sentence You Want to Translate into Spanish : ")
input_seq = gen_processed_seq(input_sentence)
print("input_seq[0][0] : ",input_seq[0][0])
num_of_word = len(input_seq[0][0])
one_hot = gen_one_hot(n_time_step_input_g, input_seq)
argmax = seq2seq.predict(one_hot[0] , n_time_step_input_g )
print("argmax_fin shape : ", argmax.shape)
print("argmax_fin : ", argmax)
idx_list_np = get_idx(argmax, num_of_word)
idx_list_py = idx_list_np.tolist()
print("index_to_word_target_g : ",index_to_word_target_g)
print("index_to_word_target_g[6] :", index_to_word_target_g[6])
result = readFile.idx_to_word(idx_list_py, index_to_word_target_g)
translated = ""
for elem in result :
translated += elem
translated += " "
print("translated : " , translated)
print("Translation End")
#########################################################################################################################
def gen_global_var(word_to_index_input, word_to_index_targrt, voca_dim, si, st, index_to_word_target):
global word_to_index_input_g
global word_to_index_targrt_g
global voca_dim_g
global minibatch_size_g
global n_time_step_input_g
global n_timestep_target_g
global index_to_word_target_g
word_to_index_input_g = word_to_index_input
word_to_index_targrt_g = word_to_index_targrt
voca_dim_g = voca_dim + 2
minibatch_size_g = si[0].shape[1]
n_time_step_input_g = si[0].shape[0]
n_timestep_target_g = st[0].shape[0]
index_to_word_target_g = index_to_word_target
return
#########################################################################################################################
def menu(si, st):
None
#########################################################################################################################
def gen_object():
return None
#########################################################################################################################
if __name__ == "__main__":
si, st, maxlen_input, minibatch_size, voca_dim, word_to_index_input, word_to_index_targrt, index_to_word_target = readFile.preprocessing()
gen_global_var(word_to_index_input, word_to_index_targrt, voca_dim, si, st, index_to_word_target)
seq2seq = Seq2Seq(n_time_step_input_g, voca_dim_g )
seq2seq.compile(loss_func=categorical_crossentropy, optimizer=adadelta)
while(True):
print("select a menu")
print("1. Training")
print("2. Translate specific English sentence into Spanish.")
val = input("selection : ")
if val == 1:
train(si, st)
elif val == 2:
predict()
and readfile.py is
import numpy as np
import itertools
import nltk
import sys
import os
from nltk.tokenize import sent_tokenize
import codecs
unknown_token = 'UNKNOWN_TOKEN'
start_token = '_S'
end_token = '__E'
num_of_seq = 0
input_path = "./europarl-v7.es-en.en"
target_path = "./europarl-v7.es-en.es"
minibatch_unit = 100
voca_dim = 3000
SEQ_NUM_LIMIT = 1000
##########################################################################################
def file_tokenize(file):
f = codecs.open( file, "r", "utf-8" )
tokenized_seq = []
sentences = []
total_sentence_num = 0
# sequence tokenize
for i,line in enumerate(f):
print("tokenized Sentence No." , i)
# strip() method to remove the newline character at the end of the input line.
tokenized_seq = nltk.word_tokenize( line.strip() )
tokenized_seq.insert(0, start_token)
tokenized_seq.append(end_token)
sentences.append(tokenized_seq)
total_sentence_num += 1;
if(total_sentence_num == SEQ_NUM_LIMIT):
break
return sentences,total_sentence_num
##########################################################################################
# Count the word frequencies
def cntWordFreq(sentences):
word_freq = nltk.FreqDist(itertools.chain(*sentences))
return word_freq
##########################################################################################
# Get the most common words and build index_to_word and word_to_index vectors
def build_WordToIdx_IdxtoWord(word_freq):
vocab = word_freq.most_common(voca_dim-1)
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])
return index_to_word, word_to_index
##########################################################################################
# change word to index
def word_to_idx(sequences, word_to_index ) :
for i, sent in enumerate(sequences):
sequences[i] = [w if w in word_to_index else unknown_token for w in sent]
sequences[i] = [word_to_index[w] if w in word_to_index else -1 for w in sequences[i]]
return sequences
##########################################################################################
def idx_to_word(seq, index_to_word):
for i, sent in enumerate(seq):
seq[i] = index_to_word[sent]
#seq[i] = [index_to_word[sent] if sent in index_to_word else '?' ]
return seq
##########################################################################################
def sortByLen(seqs_input, seqs_target) :
# check maximum sentence length
max_len_input = 0
max_len_target = 0
for sentence in seqs_input :
tmp = len(sentence)
if max_len_input < tmp:
max_len_input = tmp
for sentence in seqs_target :
tmp = len(sentence)
if max_len_target < tmp:
max_len_target = tmp
seqs_sorted_input = [ [] for _ in range(max_len_input+1) ]
seqs_sorted_target = [ [] for _ in range(max_len_input+1) ]
i = 0
for sentence_input, sentence_target in zip(seqs_input, seqs_target) :
sentence_len = len(sentence_input)
seqs_sorted_input[sentence_len].append(sentence_input)
seqs_sorted_target[sentence_len].append(sentence_target)
i+=1
return seqs_sorted_input, seqs_sorted_target, max_len_input, max_len_target
##########################################################################################
def find_maxlen(sentence_group):
max_seq_len = 0
for seq in sentence_group :
if len(seq) > max_seq_len :
max_seq_len = len(seq)
return max_seq_len
##########################################################################################
def sort_by_timestep(sentence_group):
same_len_seq = np.asarray(sentence_group)
same_len_seq = apply_to_m1(same_len_seq)
sorted_seq = same_len_seq.transpose()
return sorted_seq
##########################################################################################
def seq_to_1hot(max_len, sorted_sentences, type, minibatch_unit, num_of_seq):
one_hot = [None for _ in range( len(sorted_sentences) )]
for i, sentence_group in enumerate(sorted_sentences):
if sentence_group and len(sentence_group[0]) != 0 :
max_seq_len = find_maxlen(sentence_group)
row = max_seq_len * minibatch_unit
one_hot[i] = np.zeros( (row, voca_dim + 2) )
time_step_seq = sort_by_timestep(sentence_group)
j = 0
for word_idx in np.nditer( time_step_seq ) :
if word_idx != -1:
one_hot[i][j][word_idx] = 1
j+=1
one_hot[i] = np.reshape(one_hot[i], ( max_seq_len, -1, voca_dim+2) )
return one_hot
##########################################################################################
def apply_to_m1(lst, dtype=np.int64):
inner_max_len = max(map(len, lst))
result = np.zeros( [len(lst), inner_max_len], dtype )
result[:] = -1
for i, row in enumerate(lst):
for j, val in enumerate(row):
result[i][j] = val
return result
##########################################################################################
def seq_group_by_mini_batch_size(minibatch_unit, sorted_seq, num_of_seq):
idx = 0
cnt = 0
minibatch_seq = [ [] for _ in range( (num_of_seq/minibatch_unit)+1) ]
for seqs in sorted_seq :
if seqs :
for seq in seqs :
if seq:
minibatch_seq[idx].append(seq)
cnt+=1
if minibatch_unit == cnt:
cnt = 0
idx+= 1
for i, seq in enumerate (minibatch_seq):
if seq == []:
minibatch_seq = minibatch_seq[: i- 1]
break
return minibatch_seq
##########################################################################################
def preprocessing():
global num_of_seq
global minibatch_unit
global input_path
global target_path
print("Start Preprocessing")
sentences_input, total_sentence_num = file_tokenize(input_path)
sentences_target, total_sentence_num_target = file_tokenize(target_path)
print("FINISHED : file_tokenize ")
word_freq_input = cntWordFreq(sentences_input)
word_freq_target = cntWordFreq(sentences_target)
print("FINISHED : cntWordFreq ")
index_to_word_input, word_to_index_input = build_WordToIdx_IdxtoWord(word_freq_input)
index_to_word_target, word_to_index_targrt = build_WordToIdx_IdxtoWord(word_freq_target)
print("FINISHED : build_WordToIdx_IdxtoWord ")
seqs_input = word_to_idx(sentences_input, word_to_index_input)
seqs_target = word_to_idx(sentences_target, word_to_index_targrt)
print("FINISHED : word_to_idx ")
seqs_sorted_input, seqs_sorted_target, maxlen_input, maxlen_target = sortByLen(seqs_input, seqs_target)
print("FINISHED : sortByLen ")
for seqs in seqs_input:
if seqs:
for seq in seqs:
if seq:
num_of_seq+=1
seq_by_mini_batch_size_input = seq_group_by_mini_batch_size(minibatch_unit, seqs_sorted_input, num_of_seq)
seq_by_mini_batch_size_target = seq_group_by_mini_batch_size(minibatch_unit, seqs_sorted_target, num_of_seq)
print("FINISHED : seq_group_by_mini_batch_size ")
_1hot_input = seq_to_1hot(maxlen_input, seq_by_mini_batch_size_input, "input",minibatch_unit, num_of_seq)
_1hot_target = seq_to_1hot(maxlen_target, seq_by_mini_batch_size_target, "target",minibatch_unit, num_of_seq)
print("FINISHED : seq_to_1hot ")
if minibatch_unit > total_sentence_num:
minibatch_unit = total_sentence_num
print("exit preprocessing")
return _1hot_input, _1hot_target, maxlen_input, minibatch_unit, voca_dim, word_to_index_input, word_to_index_targrt, index_to_word_target
Related
RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x726 and 1000x1000)
I'm trying to measure the latent space clustering but the error raised. class AutoEncoder(nn.Module): def __init__(self, input_dim1, input_dim2, hidden_dims, agg, sep_decode): super(AutoEncoder, self).__init__() self.agg = agg self.sep_decode = sep_decode print("hidden_dims:", hidden_dims) self.encoder_layers = [] self.encoder2_layers = [] dims = [[input_dim1, input_dim2]] + hidden_dims for i in range(len(dims) - 1): if i == 0: layer = nn.Sequential(nn.Linear(dims[i][0], dims[i+1]), nn.ReLU()) layer2 = nn.Sequential(nn.Linear(dims[i][1], dims[i+1]), nn.ReLU()) elif i != 0 and i < len(dims) - 2: layer = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU()) layer2 = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU()) else: layer = nn.Linear(dims[i], dims[i+1]) layer2 = nn.Linear(dims[i], dims[i+1]) self.encoder_layers.append(layer) self.encoder2_layers.append(layer2) self.encoder = nn.Sequential(*self.encoder_layers) self.encoder2 = nn.Sequential(*self.encoder2_layers) self.decoder_layers = [] self.decoder2_layers = [] hidden_dims.reverse() dims = hidden_dims + [[input_dim1, input_dim2]] if self.agg == "concat" and not self.sep_decode: dims[0] = 2 * dims[0] for i in range(len(dims) - 1): if i < len(dims) - 2: layer = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU()) layer2 = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU()) else: layer = nn.Linear(dims[i], dims[i+1][0]) layer2 = nn.Linear(dims[i], dims[i+1][1]) self.decoder_layers.append(layer) self.decoder2_layers.append(layer2) self.decoder = nn.Sequential(*self.decoder_layers) self.decoder2 = nn.Sequential(*self.decoder2_layers) def forward(self, x1, x2): z1 = self.encoder(x1) z2 = self.encoder2(x2) if self.agg == "max": z = torch.max(z1, z2) elif self.agg == "multi": z = z1 * z2 elif self.agg == "sum": z = z1 + z2 elif self.agg == "concat": z = torch.cat([z1, z2], dim=1) if self.sep_decode: x_bar1 = self.decoder(z1) x_bar1 = F.normalize(x_bar1, dim=-1) x_bar2 = self.decoder2(z2) x_bar2 = F.normalize(x_bar2, dim=-1) else: x_bar1 = self.decoder(z) x_bar1 = F.normalize(x_bar1, dim=-1) x_bar2 = self.decoder2(z) x_bar2 = F.normalize(x_bar2, dim=-1) return x_bar1, x_bar2, z class TopicCluster(nn.Module): def __init__(self, args): super(TopicCluster, self).__init__() self.alpha = 1.0 self.dataset_path = args.dataset_path self.args = args self.device = args.device self.temperature = args.temperature self.distribution = args.distribution self.agg_method = args.agg_method self.sep_decode = (args.sep_decode == 1) input_dim1 = args.input_dim1 input_dim2 = args.input_dim2 hidden_dims = eval(args.hidden_dims) self.model = AutoEncoder(input_dim1, input_dim2, hidden_dims, self.agg_method, self.sep_decode) if self.agg_method == "concat": self.topic_emb = Parameter(torch.Tensor(args.n_clusters, 2*hidden_dims[-1])) else: self.topic_emb = Parameter(torch.Tensor(args.n_clusters, hidden_dims[-1])) torch.nn.init.xavier_normal_(self.topic_emb.data) def pretrain(self, input_data, pretrain_epoch=200): pretrained_path = os.path.join(self.dataset_path, f"pretrained_{args.suffix}.pt") if os.path.exists(pretrained_path) and self.args.load_pretrain: # load pretrain weights print(f"loading pretrained model from {pretrained_path}") self.model.load_state_dict(torch.load(pretrained_path)) else: train_loader = DataLoader(input_data, batch_size=self.args.batch_size, shuffle=True) optimizer = Adam(self.model.parameters(), lr=self.args.lr) for epoch in range(pretrain_epoch): total_loss = 0 for batch_idx, (x1, x2, _, weight) in enumerate(train_loader): x1 = x1.to(self.device) x2 = x2.to(self.device) weight = weight.to(self.device) optimizer.zero_grad() x_bar1, x_bar2, z = self.model(x1, x2) loss = cosine_dist(x_bar1, x1) + cosine_dist(x_bar2, x2) #, weight) total_loss += loss.item() loss.backward() optimizer.step() print(f"epoch {epoch}: loss = {total_loss / (batch_idx+1):.4f}") torch.save(self.model.state_dict(), pretrained_path) print(f"model saved to {pretrained_path}") def cluster_assign(self, z): if self.distribution == 'student': p = 1.0 / (1.0 + torch.sum( torch.pow(z.unsqueeze(1) - self.topic_emb, 2), 2) / self.alpha) p = p.pow((self.alpha + 1.0) / 2.0) p = (p.t() / torch.sum(p, 1)).t() else: self.topic_emb.data = F.normalize(self.topic_emb.data, dim=-1) z = F.normalize(z, dim=-1) sim = torch.matmul(z, self.topic_emb.t()) / self.temperature p = F.softmax(sim, dim=-1) return p def forward(self, x1, x2): x_bar1, x_bar2, z = self.model(x1, x2) p = self.cluster_assign(z) return x_bar1, x_bar2, z, p def target_distribution(self, x1, x2, freq, method='all', top_num=0): _, _, z = self.model(x1, x2) p = self.cluster_assign(z).detach() if method == 'all': q = p**2 / (p * freq.unsqueeze(-1)).sum(dim=0) q = (q.t() / q.sum(dim=1)).t() elif method == 'top': assert top_num > 0 q = p.clone() sim = torch.matmul(self.topic_emb, z.t()) _, selected_idx = sim.topk(k=top_num, dim=-1) for i, topic_idx in enumerate(selected_idx): q[topic_idx] = 0 q[topic_idx, i] = 1 return p, q def cosine_dist(x_bar, x, weight=None): if weight is None: weight = torch.ones(x.size(0), device=x.device) cos_sim = (x_bar * x).sum(-1) cos_dist = 1 - cos_sim cos_dist = (cos_dist * weight).sum() / weight.sum() return cos_dist def train(args, emb_dict): # ipdb.set_trace() inv_vocab = {k: " ".join(v) for k, v in emb_dict["inv_vocab"].items()} vocab = {" ".join(k):v for k, v in emb_dict["vocab"].items()} print(f"Vocab size: {len(vocab)}") embs = F.normalize(torch.tensor(emb_dict["vs_emb"]), dim=-1) embs2 = F.normalize(torch.tensor(emb_dict["oh_emb"]), dim=-1) freq = np.array(emb_dict["tuple_freq"]) if not args.use_freq: freq = np.ones_like(freq) input_data = TensorDataset(embs, embs2, torch.arange(embs.size(0)), torch.tensor(freq)) topic_cluster = TopicCluster(args).to(args.device) topic_cluster.pretrain(input_data, args.pretrain_epoch) train_loader = DataLoader(input_data, batch_size=args.batch_size, shuffle=False) optimizer = Adam(topic_cluster.parameters(), lr=args.lr) # topic embedding initialization embs = embs.to(args.device) embs2 = embs2.to(args.device) x_bar1, x_bar2, z = topic_cluster.model(embs, embs2) z = F.normalize(z, dim=-1) print(f"Running K-Means for initialization") kmeans = KMeans(n_clusters=args.n_clusters, n_init=5) if args.use_freq: y_pred = kmeans.fit_predict(z.data.cpu().numpy(), sample_weight=freq) else: y_pred = kmeans.fit_predict(z.data.cpu().numpy()) print(f"Finish K-Means") freq = torch.tensor(freq).to(args.device) y_pred_last = y_pred topic_cluster.topic_emb.data = torch.tensor(kmeans.cluster_centers_).to(args.device) topic_cluster.train() i = 0 for epoch in range(50): if epoch % 5 == 0: _, _, z, p = topic_cluster(embs, embs2) z = F.normalize(z, dim=-1) topic_cluster.topic_emb.data = F.normalize(topic_cluster.topic_emb.data, dim=-1) if not os.path.exists(os.path.join(args.dataset_path, f"clusters_{args.suffix}")): os.makedirs(os.path.join(args.dataset_path, f"clusters_{args.suffix}")) embed_save_path = os.path.join(args.dataset_path, f"clusters_{args.suffix}/embed_{epoch}.pt") torch.save({ "inv_vocab": emb_dict['inv_vocab'], "embed": z.detach().cpu().numpy(), "topic_embed": topic_cluster.topic_emb.detach().cpu().numpy(), }, embed_save_path) f = open(os.path.join(args.dataset_path, f"clusters_{args.suffix}/{epoch}.txt"), 'w') pred_cluster = p.argmax(-1) result_strings = [] for j in range(args.n_clusters): if args.sort_method == 'discriminative': word_idx = torch.arange(embs.size(0))[pred_cluster == j] sorted_idx = torch.argsort(p[pred_cluster == j][:, j], descending=True) word_idx = word_idx[sorted_idx] else: sim = torch.matmul(topic_cluster.topic_emb[j], z.t()) _, word_idx = sim.topk(k=30, dim=-1) word_cluster = [] freq_sum = 0 for idx in word_idx: freq_sum += freq[idx].item() if inv_vocab[idx.item()] not in word_cluster: word_cluster.append(inv_vocab[idx.item()]) if len(word_cluster) >= 10: break result_strings.append((freq_sum, f"Topic {j} ({freq_sum}): " + ', '.join(word_cluster)+'\n')) result_strings = sorted(result_strings, key=lambda x: x[0], reverse=True) for result_string in result_strings: f.write(result_string[1]) for x1, x2, idx, weight in train_loader: if i % args.update_interval == 0: p, q = topic_cluster.target_distribution(embs, embs2, freq.clone().fill_(1), method='all', top_num=epoch+1) y_pred = p.cpu().numpy().argmax(1) delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0] y_pred_last = y_pred if i > 0 and delta_label < args.tol: print(f'delta_label {delta_label:.4f} < tol ({args.tol})') print('Reached tolerance threshold. Stopping training.') return None i += 1 x1 = x1.to(args.device) x2 = x2.to(args.device) idx = idx.to(args.device) weight = weight.to(args.device) x_bar1, x_bar2, _, p = topic_cluster(x1, x2) reconstr_loss = cosine_dist(x_bar1, x1) + cosine_dist(x_bar2, x2) #, weight) kl_loss = F.kl_div(p.log(), q[idx], reduction='none').sum(-1) kl_loss = (kl_loss * weight).sum() / weight.sum() loss = args.gamma * kl_loss + reconstr_loss if i % args.update_interval == 0: print(f"KL loss: {kl_loss}; Reconstruction loss: {reconstr_loss}") optimizer.zero_grad() loss.backward() optimizer.step() return None if __name__ == "__main__": # CUDA_VISIBLE_DEVICES=0 python3 latent_space_clustering.py --dataset_path ./pandemic --input_emb_name po_tuple_features_all_svos.pk parser = argparse.ArgumentParser( description='train', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--dataset_path', type=str) parser.add_argument('--input_emb_name', type=str) parser.add_argument('--lr', type=float, default=5e-4) parser.add_argument('--n_clusters', default=30, type=int) parser.add_argument('--input_dim1', default=1000, type=int) parser.add_argument('--input_dim2', default=1000, type=int) parser.add_argument('--agg_method', default="multi", choices=["sum", "multi", "concat", "attend"], type=str) parser.add_argument('--sep_decode', default=0, choices=[0, 1], type=int) parser.add_argument('--pretrain_epoch', default=100, type=int) parser.add_argument('--load_pretrain', default=False, action='store_true') parser.add_argument('--temperature', default=0.1, type=float) parser.add_argument('--sort_method', default='generative', choices=['generative', 'discriminative']) parser.add_argument('--distribution', default='softmax', choices=['softmax', 'student']) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--use_freq', default=False, action='store_true') parser.add_argument('--hidden_dims', default='[1000, 2000, 1000, 100]', type=str) parser.add_argument('--suffix', type=str, default='') parser.add_argument('--gamma', default=5, type=float, help='weight of clustering loss') parser.add_argument('--update_interval', default=100, type=int) parser.add_argument('--tol', default=0.001, type=float) args = parser.parse_args() args.cuda = torch.cuda.is_available() print("use cuda: {}".format(args.cuda)) args.device = torch.device("cuda" if args.cuda else "cpu") print(args) with open(os.path.join(args.dataset_path, args.input_emb_name), "rb") as fin: emb_dict = pk.load(fin) candidate_idx = train(args, emb_dict) print(candidate_idx) The error I'm getting is: RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x726 and 1000x1000). I cannot figure out which part is the problem. Please help me.. Thank you so much for the images runtime error like enter image description here
Building a dataset with dataloader pytorch getting error cannot import name 'read_data_sets'
Loading data into dataset using pytorch dataloader. Getting error cannot import name 'read_data_sets' Tried searaching for results from similar issues. If there is confusion about file instead of module and it can't find read_data_sets in your file How do i change to fix? class MRDataset(data.Dataset): def __init__(self, root_dir, task, plane, train=True, transform=None, weights=None): super().__init__() self.task = task self.plane = plane self.root_dir = root_dir self.train = train if self.train: self.folder_path = self.root_dir + 'train/{0}/'.format(plane) self.records = pd.read_csv( self.root_dir + 'train-{0}.csv'.format(task), header=None, names=['id', 'label']) else: transform = None self.folder_path = self.root_dir + 'valid/{0}/'.format(plane) self.records = pd.read_csv( self.root_dir + 'valid-{0}.csv'.format(task), header=None, names=['id', 'label']) self.records['id'] = self.records['id'].map( lambda i: '0' * (4 - len(str(i))) + str(i)) self.paths = [self.folder_path + filename + '.npy' for filename in self.records['id'].tolist()] self.labels = self.records['label'].tolist() self.transform = transform if weights is None: pos = np.sum(self.labels) neg = len(self.labels) - pos self.weights = torch.FloatTensor([1, neg / pos]) else: self.weights = torch.FloatTensor(weights) def __len__(self): return len(self.paths) def __getitem__(self, index): array = np.load(self.paths[index]) label = self.labels[index] if label == 1: label = torch.FloatTensor([[0, 1]]) elif label == 0: label = torch.FloatTensor([[1, 0]]) if self.transform: array = self.transform(array) else: array = np.stack((array,)*3, axis=1) array = torch.FloatTensor(array) # if label.item() == 1: # weight = np.array([self.weights[1]]) # weight = torch.FloatTensor(weight) # else: # weight = np.array([self.weights[0]]) # weight = torch.FloatTensor(weight) return array, label, self.weights There is a model and train class to run this. Arguments specified in train. Running the train should load data and run through model
How can I implement custom GRU in keras
I am trying to implement a custom GRU layer in keras 2.1.2-py36_0 where i want to use the following gate equations: zt = act ( Wz.ht-1 + xt ) rt = act ( Wr.ht-1 + xt ) ht = act ( Wh.(r * ht-1) + xt ) instead of keras current implementation of gates as: zt = act ( Wz.ht-1 + Uzxt ) rt = act ( Wr.ht-1 + Urxt ) ht = act ( Wh.(r * ht-1) + Uhxt ) Customizing GRU cell for the data class CGRUCell(Layer): def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, **kwargs): super(CGRUCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.implementation = implementation self.state_size = self.units self._dropout_mask = None self._recurrent_dropout_mask = None def build(self, input_shape): input_dim = input_shape[-1] #self.kernel = self.add_weight(shape=(input_dim, self.units * 3), # name='kernel', # initializer=self.kernel_initializer, # regularizer=self.kernel_regularizer, # constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.units, self.units * 3), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.units * 3,), name='bias', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None #self.kernel_z = self.kernel[:, :self.units] self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] #self.kernel_r = self.kernel[:, self.units: self.units * 2] self.recurrent_kernel_r = self.recurrent_kernel[:, self.units: self.units * 2] #self.kernel_h = self.kernel[:, self.units * 2:] self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] if self.use_bias: self.bias_z = self.bias[:self.units] self.bias_r = self.bias[self.units: self.units * 2] self.bias_h = self.bias[self.units * 2:] else: self.bias_z = None self.bias_r = None self.bias_h = None self.built = True def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=3) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs print(inputs) # Custom implementation of inputs which are already embedding parameters #x_z = K.dot(inputs_z, self.kernel_z) #x_r = K.dot(inputs_r, self.kernel_r) #x_h = K.dot(inputs_h, self.kernel_h) #if self.use_bias: # x_z = K.bias_add(x_z, self.bias_z) # x_r = K.bias_add(x_r, self.bias_r) # x_h = K.bias_add(x_h, self.bias_h) x_z = inputs_z x_r = inputs_r x_h = inputs_h if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 z = self.recurrent_activation(x_z + K.dot(h_tm1_z, self.recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1_r, self.recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1_h, self.recurrent_kernel_h)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] # Custom implementation of inputs which are already embedding parameters #matrix_x = K.dot(inputs, self.kernel) #if self.use_bias: # matrix_x = K.bias_add(matrix_x, self.bias) matrix_x = inputs if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units: 2 * self.units] recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units: 2 * self.units] z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) x_h = matrix_x[:, 2 * self.units:] recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h] def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(CGRUCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class CGRU(RNN): #interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if implementation == 0: warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') cell = CGRUCell(units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation) super(CGRU, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): self.cell._dropout_mask = None self.cell._recurrent_dropout_mask = None return super(CGRU, self).call(inputs, mask=mask, training=training, initial_state=initial_state) #property def units(self): return self.cell.units #property def activation(self): return self.cell.activation #property def recurrent_activation(self): return self.cell.recurrent_activation #property def use_bias(self): return self.cell.use_bias #property def kernel_initializer(self): return self.cell.kernel_initializer #property def recurrent_initializer(self): return self.cell.recurrent_initializer #property def bias_initializer(self): return self.cell.bias_initializer #property def kernel_regularizer(self): return self.cell.kernel_regularizer #property def recurrent_regularizer(self): return self.cell.recurrent_regularizer #property def bias_regularizer(self): return self.cell.bias_regularizer #property def kernel_constraint(self): return self.cell.kernel_constraint #property def recurrent_constraint(self): return self.cell.recurrent_constraint #property def bias_constraint(self): return self.cell.bias_constraint #property def dropout(self): return self.cell.dropout #property def recurrent_dropout(self): return self.cell.recurrent_dropout #property def implementation(self): return self.cell.implementation def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(CGRU, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) #classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) Model Implementation is as follows: user_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='user_inputs') user_emb = Embedding(input_dim=num_users+1, output_dim=out_dim, input_length=chunk_size)(user_input) item_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='item_inputs') item_emb = Embedding(input_dim=num_items+1, output_dim=out_dim, input_length=chunk_size)(item_input) inputs = keras.layers.add([user_emb, item_emb]) gru_args = { "units":hidden_size, "return_sequences":True, #"return_state":True, "stateful":True, "unroll":False } gru = CGRU(**gru_args)(inputs) outputs = Dense(num_items+1, activation='softmax')(gru) [recc_model = Model(inputs=\[user_input,item_input\], outputs=outputs) recc_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=\[metrics.cate][1]gorical_accuracy]) #metrics=[metrics.sparse_categorical_accuracy]) But on running the code I am getting the following error which seems is due to gradients are getting computed to None: ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported. Find the complete error here: https://pastebin.com/n9UzCRiP
The error occurs because the bias weights are added to the model but not used anywhere. When you call self.add_weight(...), you have to make sure these weights are used somewhere in your model. Otherwise, since these weights are not connected to the loss tensor, TF cannot compute the gradient and an error will be raised. If you don't need the bias weights, you can either remove the add_weight lines, or set use_bias=False in your cell. Also, I think you don't need to re-implement a CGRU layer to use a custom cell. Just wrap your custom cell with the built-in RNN layer should work. gru = RNN(CGRUCell(hidden_size, use_bias=False), return_sequences=True, stateful=True, unroll=False)(inputs)
TypeError: __init__() takes from 1 to 4 positional arguments but 9 were given
when l run the following program l got this error : originDataset = dataset.lmdbDataset(originPath, 'abc', *args) TypeError: __init__() takes from 1 to 4 positional arguments but 9 were given This error is relate to the second code source l presented below. it's strange because l don't have 9 argument. what's wrong with my code ? import sys origin_path = sys.path sys.path.append("..") import dataset sys.path = origin_path import lmdb def writeCache(env, cache): with env.begin(write=True) as txn: for k, v in cache.iteritems(): txn.put(k, v) def convert(originPath, outputPath): args = [0] * 6 originDataset = dataset.lmdbDataset(originPath, 'abc', *args) print('Origin dataset has %d samples' % len(originDataset)) labelStrList = [] for i in range(len(originDataset)): label = originDataset.getLabel(i + 1) labelStrList.append(label) if i % 10000 == 0: print(i) lengthList = [len(s) for s in labelStrList] items = zip(lengthList, range(len(labelStrList))) items.sort(key=lambda item: item[0]) env = lmdb.open(outputPath, map_size=1099511627776) cnt = 1 cache = {} nSamples = len(items) for i in range(nSamples): imageKey = 'image-%09d' % cnt labelKey = 'label-%09d' % cnt origin_i = items[i][1] img, label = originDataset[origin_i + 1] cache[labelKey] = label cache[imageKey] = img if cnt % 1000 == 0 or cnt == nSamples: writeCache(env, cache) cache = {} print('Written %d / %d' % (cnt, nSamples)) cnt += 1 nSamples = cnt - 1 cache['num-samples'] = str(nSamples) writeCache(env, cache) print('Convert dataset with %d samples' % nSamples) if __name__ == "__main__": convert('/share/datasets/scene_text/Synth90k/synth90k-val-lmdb', '/share/datasets/scene_text/Synth90k/synth90k-val-ordered-lmdb') convert('/share/datasets/scene_text/Synth90k/synth90k-train-lmdb', '/share/datasets/scene_text/Synth90k/synth90k-train-ordered-lmdb') which calls the following program : #!/usr/bin/python # encoding: utf-8 import random import torch from torch.utils.data import Dataset from torch.utils.data import sampler import torchvision.transforms as transforms import lmdb import six import sys from PIL import Image import numpy as np class lmdbDataset(Dataset): def __init__(self, root=None, transform=None, target_transform=None): self.env = lmdb.open( root, max_readers=1, readonly=True, lock=False, readahead=False, meminit=False) if not self.env: print('cannot creat lmdb from %s' % (root)) sys.exit(0) with self.env.begin(write=False) as txn: nSamples = int(txn.get('num-samples')) self.nSamples = nSamples self.transform = transform self.target_transform = target_transform def __len__(self): return self.nSamples def __getitem__(self, index): assert index <= len(self), 'index range error' index += 1 with self.env.begin(write=False) as txn: img_key = 'image-%09d' % index imgbuf = txn.get(img_key) buf = six.BytesIO() buf.write(imgbuf) buf.seek(0) try: img = Image.open(buf).convert('L') except IOError: print('Corrupted image for %d' % index) return self[index + 1] if self.transform is not None: img = self.transform(img) label_key = 'label-%09d' % index label = str(txn.get(label_key)) if self.target_transform is not None: label = self.target_transform(label) return (img, label) class resizeNormalize(object): def __init__(self, size, interpolation=Image.BILINEAR): self.size = size self.interpolation = interpolation self.toTensor = transforms.ToTensor() def __call__(self, img): img = img.resize(self.size, self.interpolation) img = self.toTensor(img) img.sub_(0.5).div_(0.5) return img class randomSequentialSampler(sampler.Sampler): def __init__(self, data_source, batch_size): self.num_samples = len(data_source) self.batch_size = batch_size def __iter__(self): n_batch = len(self) // self.batch_size tail = len(self) % self.batch_size index = torch.LongTensor(len(self)).fill_(0) for i in range(n_batch): random_start = random.randint(0, len(self) - self.batch_size) batch_index = random_start + torch.range(0, self.batch_size - 1) index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index # deal with tail if tail: random_start = random.randint(0, len(self) - self.batch_size) tail_index = random_start + torch.range(0, tail - 1) index[(i + 1) * self.batch_size:] = tail_index return iter(index) def __len__(self): return self.num_samples class alignCollate(object): def __init__(self, imgH=32, imgW=128, keep_ratio=False, min_ratio=1): self.imgH = imgH self.imgW = imgW self.keep_ratio = keep_ratio self.min_ratio = min_ratio def __call__(self, batch): images, labels = zip(*batch) imgH = self.imgH imgW = self.imgW if self.keep_ratio: ratios = [] for image in images: w, h = image.size ratios.append(w / float(h)) ratios.sort() max_ratio = ratios[-1] imgW = int(np.floor(max_ratio * imgH)) imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW transform = resizeNormalize((imgW, imgH)) images = [transform(image) for image in images] images = torch.cat([t.unsqueeze(0) for t in images], 0) return images, labels
how 'sequences' parameter works in theano scan function
In prior to get to the point, I apologize for my English sentences that can be rather awkward. Because English is not my first language. Now I am struggle with using theano.tensor.scan function properly. But, I don’t know how ‘sequences parameter’ works. I created 3 dimensional array (42,10,7002) and input as sequences. I expect the unit of sequences would be 2 dimensional array (10,7002) and the number of steps would be 42. But it seems like unit of sequences is (1,7002) how can I handle unit of sequences that should be row 10 and column 7002 ? thanks for reading this question. add # -*- coding: utf-8 -*- __author__ = "Haizhou Qu" import readFile import numpy as np import theano import theano.tensor as T from six.moves import zip # from theano.compile.debugmode import DebugMode theano.config.optimizer='fast_compile' theano.config.exception_verbosity='high' theano.config.compute_test_value = 'warn' epsilon = 1e-6 dtype = theano.config.floatX minibatch_size_g = 0 longest_seq_g = 0 voca_dim_global = 0 n_time_step_input_g = 0 n_timestep_target_g = 0 def printT(x): t = theano.printing.Print('T')(x) return t def shared(value, name=None): return theano.shared(value.astype(dtype), name=name) def shared_zeros(shape, name=None): return shared(value=np.zeros(shape), name=name) def shared_zeros_like(x, name=None): return shared_zeros(shape=x.shape, name=name) def init_weights(shape, name=None): bound = np.sqrt(1.0/shape[1]) w = np.random.uniform(-bound, bound, shape) return shared(value=w, name=name) def adadelta(params, cost, lr=1.0, rho=0.95): # from https://github.com/fchollet/keras/blob/master/keras/optimizers.py cost = cost.astype('float32') grads = T.grad(cost, params) accus = [shared_zeros_like(p.get_value()) for p in params] delta_accus = [shared_zeros_like(p.get_value()) for p in params] updates = [] for p, g, a, d_a in zip(params, grads, accus, delta_accus): new_a = rho * a + (1.0 - rho) * T.square(g) updates.append((a, new_a)) update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon) new_p = p - lr * update updates.append((p, new_p)) new_d_a = rho * d_a + (1.0 - rho) * T.square(update) updates.append((d_a, new_d_a)) return updates def categorical_crossentropy(y_true, y_pred): # from https://github.com/fchollet/keras/blob/master/keras/objectives.py y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon) y_pred = y_true.astype('int64') # only matrix can be calculated cce, updates = theano.scan( fn=T.nnet.categorical_crossentropy, sequences=[y_pred,y_true] ) cce.astype('float32') return T.mean(cce) def mean_square_error(y_true, y_pred): return T.mean(T.square(y_pred - y_true)) class LSTM(object): def __init__(self, size, dim): self.size = size self.dim = dim shape_b = (minibatch_size_g, size) shape_U = (dim, size) shape_W = (size, size) self.h_tm1 = shared_zeros(shape_b, "h_tm1") self.c_tm1 = shared_zeros(shape_b, "c_tm1") self.Ui = init_weights(shape_U, "Ui") self.Wi = init_weights(shape_W, "Wi") self.bi = shared_zeros(shape_b, "bi") self.Uf = init_weights(shape_U, "Uf") self.Wf = init_weights(shape_W, "Wf") self.bf = shared_zeros(shape_b, "bf") self.Uo = init_weights(shape_U, "Uo") self.Wo = init_weights(shape_W, "Wo") self.bo = shared_zeros(shape_b, "bo") self.Ug = init_weights(shape_U, "Ug") self.Wg = init_weights(shape_W, "Wg") self.bg = shared_zeros(shape_b, "bg") self.params = [ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg ] def set_state(self, h, c): self.h_tm1.set_value(h.get_value()) self.c_tm1.set_value(c.get_value()) def reset_state(self): self.h_tm1 = shared_zeros((1, self.size), "h_tm1") self.c_tm1 = shared_zeros((1, self.size), "c_tm1") #staticmethod def step( x_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg ): """ x_t.shape = (timestep=1, dim) x_t.shape = (n_samples, timestep=1, dim) """ # x_t.eval().shape x_t = x_t.reshape( (minibatch_size_g , -1) ) #x_t = x_t.reshape( (voca_dim_global , -1) ) h_tm1 = h_tm1.reshape( (-1, n_time_step_input_g) ) c_tm1 = c_tm1.reshape( (-1, n_time_step_input_g) ) i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi) a=T.dot(x_t, Uf) b=T.dot(h_tm1, Wf) c=a+b f_t=c+bf #f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf) o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo) g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg) c_t = c_tm1 * f_t + g_t * i_t h_t = T.tanh(c_t) * o_t #c_t = c_t.reshape( (1, -1) ) #h_t = h_t.reshape( (1, -1) ) return h_t, c_t def forward(self, X): """ X.shape = (timesteps, dim) X.shape = (n_samples, timesteps, dim) """ X = X.reshape( (-1, voca_dim_global * minibatch_size_g) ) states, updates = theano.scan( fn=self.step, sequences=[ X ], outputs_info=[self.h_tm1, self.c_tm1], non_sequences=[ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg ] ) updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])] print("### forward completed ###") return states, updates class LSTMEncoder(LSTM): def encode(self, X): states, updates = self.forward(X) h_t = states[0][-1] c_t = states[1][-1] return h_t, c_t, updates class LSTMDecoder(LSTM): def __init__(self, size, dim, h_tm1=None, c_tm1=None): super(LSTMDecoder, self).__init__(size=size, dim=dim) self.Wh = init_weights((size, dim), "Wh") self.bh = shared_zeros((1, dim), "bh") self.h_tm1 = h_tm1 or shared_zeros((1, size), "h_tm1") self.c_tm1 = c_tm1 or shared_zeros((1, size), "c_tm1") self.y_t = shared_zeros((1, dim), "y_t") # self.decode_length = theano.shared(decode_length) self.params.append(self.Wh) self.params.append(self.bh) def decode_step( self, y_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg, Wh, bh ): h_t, c_t = self.step( y_t, h_tm1, c_tm1, Ui, Wi, bi, Uf, Wf, bf, Uo, Wo, bo, Ug, Wg, bg ) y_t = T.dot(h_t, Wh) + bh return y_t, h_t, c_t def decode(self, h_tm1, c_tm1, timesteps): outputs, updates = theano.scan( fn=self.decode_step, outputs_info=[self.y_t, h_tm1, c_tm1], non_sequences=[ self.Ui, self.Wi, self.bi, self.Uf, self.Wf, self.bf, self.Uo, self.Wo, self.bo, self.Ug, self.Wg, self.bg, self.Wh, self.bh ], n_steps=timesteps ) updates = [ (self.h_tm1, outputs[1][-1]), (self.c_tm1, outputs[2][-1]) ] return T.flatten(outputs[0], 2), updates class Seq2Seq(object): def __init__(self, size, dim): self.encoder = LSTMEncoder(size, dim) self.decoder = LSTMDecoder(size, dim) self.params = [] self.params += self.encoder.params self.params += self.decoder.params self._predict = None self._train = None self._test = None def compile(self, loss_func, optimizer): seq_input = T.tensor3() seq_target = T.tensor3() decode_timesteps = T.iscalar() h_tm1, c_tm1, updates_encode = self.encoder.encode(seq_input) seq_predict_flex, updates_decode_flex = self.decoder.decode(h_tm1, c_tm1, decode_timesteps) seq_predict, updates_decode = self.decoder.decode(h_tm1, c_tm1, T.shape(seq_target)[0]) loss = loss_func(seq_predict, seq_target) self._predict = theano.function([seq_input, decode_timesteps], seq_predict_flex, updates=updates_encode+updates_decode_flex) self._test = theano.function([seq_input, seq_target], loss, updates=updates_encode+updates_decode) updates = [] updates += updates_encode updates += updates_decode updates += optimizer(self.params, loss) self._train = theano.function([seq_input, seq_target], loss, updates=updates) def predict(self, seq_input, decode_timesteps): self.encoder.reset_state() self.decoder.reset_state() return self._predict(seq_input, decode_timesteps) def train(self, seq_input, seq_target): self.encoder.reset_state() self.decoder.reset_state() return self._train(seq_input, seq_target) def test(self, seq_input, seq_target): self.encoder.reset_state() self.decoder.reset_state() return self._test(seq_input, seq_target) def train(x, target): for mini_batch, target in zip(x,target): print("mini_batch shape :",mini_batch.shape) mini_batch = mini_batch.astype(dtype) target = target.astype(dtype) print(seq2seq.train(mini_batch, target)) def predict(x, target): for mini_batch, target in zip(x,target): so = seq2seq.predict(x, n_time_step_output_g) print(so) loss = seq2seq.test(x, so) print(loss) if __name__ == "__main__": si, st, maxlen_input, minibatch_size, voca_dim = readFile.preprocessing() voca_dim_global = voca_dim + 2 minibatch_size_g = si[0].shape[1] print("minibatch_size_g : " , 10) print("minibatch_size_g : " , si[0].shape[1]) n_time_step_input_g = si[0].shape[0] n_time_step_output_g = st[0].shape[0] seq2seq = Seq2Seq(n_time_step_input_g, voca_dim_global ) seq2seq.compile(loss_func=categorical_crossentropy, optimizer=adadelta) print("select a menu") print("1. Training") print("2. Predict and test translated sentence.") val = input("selection : ") if val == 1: train(si, st) elif val == 2: predict(si, st)