class StockDataset(Dataset):
#데이터 셋은 i번째 레코드 값을 주는 역할 수행
def __init__(self, symbol, x_frames, y_frames, start, end):
self.symbol = symbol
self.x_frames = x_frames
self.y_frames = y_frames
self.start = datetime.datetime(*start)
self.end = datetime.datetime(*end)
#위에서 지정했던 데이터, 날짜 값들 다 받아옴
self.data = pdr.DataReader(self.symbol, 'yahoo', self.start, self.end)
def __len__(self):
return len(self.data) - (self.x_frames + self.y_frames) + 1
def __getitem__(self, idx):
global data
#global data_set
#데이터셋 i번째 값 입력받았을때 그걸 출력해줌 데이터를 '리스트'화 하는 것
idx += self.x_frames
data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
data = data[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']]
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1)) #로그수익률 변환 한뒤, 혹시모를 결측값 위해 1더해줌
global x_ex
global y_ex
x_ex= data[:self.x_frames]
y_ex= data[self.x_frames:]
data = data.values #numpy array로 변환한거
X = data[:self.x_frames]
y = data[self.x_frames:]
return X, y
This one is dataset
class LSTM(nn.Module):
#50분 이후부터 모델설명
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.batch_size = batch_size
self.dropout = dropout
self.use_bn = use_bn
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
self.hidden = self.init_hidden()
self.regressor = self.make_regressor()
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def make_regressor(self):
layers = []
if self.use_bn:
layers.append(nn.BatchNorm1d(self.hidden_dim))
layers.append(nn.Dropout(self.dropout))
layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
layers.append(nn.ReLU())
layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
regressor = nn.Sequential(*layers)
return regressor
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
return y_pred
This one is model
def test(model, partition, args):
global y_true
global y_pred
global X
testloader = DataLoader(partition['test'],
batch_size=args.batch_size,
shuffle=False, drop_last=True)
model.eval()
test_acc = 0.0
with torch.no_grad():
for i, (X, y) in enumerate(testloader):
X = X.transpose(0, 1).float().to(args.device)
y_true = y[:, :, 3].float().to(args.device)
model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]
y_pred = model(X)
test_acc += metric(y_pred, y_true)[0]
test_acc = test_acc / len(testloader)
return test_acc
This is test data loader.
# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ====== Data Loading ====== #
args.symbol = '005930.KS' #원하는 종목
args.batch_size = 4 #배치사이즈
args.x_frames = 5 #수정x 이전 n일치 데이터 이게 너무 길면 1주일 예측 불가능
args.y_frames = 5 #수정y 이후 n일치 데이터 이게 너무 길면 1주일 예측 불가능
# ====== Model Capacity ===== #
args.input_dim = 6
args.hid_dim = 50
args.n_layers = 2 # (은닉층의 레이어 갯수) https://justkode.kr/deep-learning/pytorch-rnn링크 참고
# ====== Regularization ======= #
args.l2 = 0.0001
args.dropout = 0.3
args.use_bn = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.001
args.epoch = 1
# ====== Experiment Variable ====== #
name_var1 = 'lr' # lr=러닝레이트
name_var2 = 'n_layers'#뉴럴 네트워크 몇개를 쌓을것인지?
list_var1 = [0.001, 0.0001, 0.00001]
list_var2 = [1,2,3]
#데이터셋 실제 형성
trainset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2012,1,1), (2021,1,1)) #학습기간
valset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2021,1,2), (2021,12,30)) #검증기간 최소 +6 월, +19 일 안하면 float division by zero 에러 발생 왜?? 21년 기준
testset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2022,1,10), (2022,1,14)) #테스트기간 최소 +6 월, + 25일 안하면 float division by zero 에러 발생. 22년기준
#기간이 일정 영업일 이상을 요구하는듯? <<146 영업일 이상 데이터 요구. 그만큼 안주면 오류남 왜??
partition = {'train': trainset, 'val':valset, 'test':testset}
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1)
setattr(args, name_var2, var2)
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
#꼭 디렉토리에 있는 파일들 지운다음에 그래프 그려야한다. 안그러면 결과값 전부 겹쳐서 나옴
This one is hyper parameter regulate.
I wonder how can I get result when I set testset length in 5days? (like (2022,1,10) (2022,1,14))
This cord didn't work when I set testset length at least 7month (maybe + 146 trade day)
error is float divided by zero. (when I use lower 146 days.)
if I set length +146 days, then codes work well.
I think this code cause error:
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
log data was so small, so error occurred. (my opinion)
data is yahoo finance data. Thanx to read
When I # below code, then data got infinite.
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
I'm trying to measure the latent space clustering but the error raised.
class AutoEncoder(nn.Module):
def __init__(self, input_dim1, input_dim2, hidden_dims, agg, sep_decode):
super(AutoEncoder, self).__init__()
self.agg = agg
self.sep_decode = sep_decode
print("hidden_dims:", hidden_dims)
self.encoder_layers = []
self.encoder2_layers = []
dims = [[input_dim1, input_dim2]] + hidden_dims
for i in range(len(dims) - 1):
if i == 0:
layer = nn.Sequential(nn.Linear(dims[i][0], dims[i+1]), nn.ReLU())
layer2 = nn.Sequential(nn.Linear(dims[i][1], dims[i+1]), nn.ReLU())
elif i != 0 and i < len(dims) - 2:
layer = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU())
layer2 = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU())
else:
layer = nn.Linear(dims[i], dims[i+1])
layer2 = nn.Linear(dims[i], dims[i+1])
self.encoder_layers.append(layer)
self.encoder2_layers.append(layer2)
self.encoder = nn.Sequential(*self.encoder_layers)
self.encoder2 = nn.Sequential(*self.encoder2_layers)
self.decoder_layers = []
self.decoder2_layers = []
hidden_dims.reverse()
dims = hidden_dims + [[input_dim1, input_dim2]]
if self.agg == "concat" and not self.sep_decode:
dims[0] = 2 * dims[0]
for i in range(len(dims) - 1):
if i < len(dims) - 2:
layer = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU())
layer2 = nn.Sequential(nn.Linear(dims[i], dims[i+1]), nn.ReLU())
else:
layer = nn.Linear(dims[i], dims[i+1][0])
layer2 = nn.Linear(dims[i], dims[i+1][1])
self.decoder_layers.append(layer)
self.decoder2_layers.append(layer2)
self.decoder = nn.Sequential(*self.decoder_layers)
self.decoder2 = nn.Sequential(*self.decoder2_layers)
def forward(self, x1, x2):
z1 = self.encoder(x1)
z2 = self.encoder2(x2)
if self.agg == "max":
z = torch.max(z1, z2)
elif self.agg == "multi":
z = z1 * z2
elif self.agg == "sum":
z = z1 + z2
elif self.agg == "concat":
z = torch.cat([z1, z2], dim=1)
if self.sep_decode:
x_bar1 = self.decoder(z1)
x_bar1 = F.normalize(x_bar1, dim=-1)
x_bar2 = self.decoder2(z2)
x_bar2 = F.normalize(x_bar2, dim=-1)
else:
x_bar1 = self.decoder(z)
x_bar1 = F.normalize(x_bar1, dim=-1)
x_bar2 = self.decoder2(z)
x_bar2 = F.normalize(x_bar2, dim=-1)
return x_bar1, x_bar2, z
class TopicCluster(nn.Module):
def __init__(self, args):
super(TopicCluster, self).__init__()
self.alpha = 1.0
self.dataset_path = args.dataset_path
self.args = args
self.device = args.device
self.temperature = args.temperature
self.distribution = args.distribution
self.agg_method = args.agg_method
self.sep_decode = (args.sep_decode == 1)
input_dim1 = args.input_dim1
input_dim2 = args.input_dim2
hidden_dims = eval(args.hidden_dims)
self.model = AutoEncoder(input_dim1, input_dim2, hidden_dims, self.agg_method, self.sep_decode)
if self.agg_method == "concat":
self.topic_emb = Parameter(torch.Tensor(args.n_clusters, 2*hidden_dims[-1]))
else:
self.topic_emb = Parameter(torch.Tensor(args.n_clusters, hidden_dims[-1]))
torch.nn.init.xavier_normal_(self.topic_emb.data)
def pretrain(self, input_data, pretrain_epoch=200):
pretrained_path = os.path.join(self.dataset_path, f"pretrained_{args.suffix}.pt")
if os.path.exists(pretrained_path) and self.args.load_pretrain:
# load pretrain weights
print(f"loading pretrained model from {pretrained_path}")
self.model.load_state_dict(torch.load(pretrained_path))
else:
train_loader = DataLoader(input_data, batch_size=self.args.batch_size, shuffle=True)
optimizer = Adam(self.model.parameters(), lr=self.args.lr)
for epoch in range(pretrain_epoch):
total_loss = 0
for batch_idx, (x1, x2, _, weight) in enumerate(train_loader):
x1 = x1.to(self.device)
x2 = x2.to(self.device)
weight = weight.to(self.device)
optimizer.zero_grad()
x_bar1, x_bar2, z = self.model(x1, x2)
loss = cosine_dist(x_bar1, x1) + cosine_dist(x_bar2, x2) #, weight)
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"epoch {epoch}: loss = {total_loss / (batch_idx+1):.4f}")
torch.save(self.model.state_dict(), pretrained_path)
print(f"model saved to {pretrained_path}")
def cluster_assign(self, z):
if self.distribution == 'student':
p = 1.0 / (1.0 + torch.sum(
torch.pow(z.unsqueeze(1) - self.topic_emb, 2), 2) / self.alpha)
p = p.pow((self.alpha + 1.0) / 2.0)
p = (p.t() / torch.sum(p, 1)).t()
else:
self.topic_emb.data = F.normalize(self.topic_emb.data, dim=-1)
z = F.normalize(z, dim=-1)
sim = torch.matmul(z, self.topic_emb.t()) / self.temperature
p = F.softmax(sim, dim=-1)
return p
def forward(self, x1, x2):
x_bar1, x_bar2, z = self.model(x1, x2)
p = self.cluster_assign(z)
return x_bar1, x_bar2, z, p
def target_distribution(self, x1, x2, freq, method='all', top_num=0):
_, _, z = self.model(x1, x2)
p = self.cluster_assign(z).detach()
if method == 'all':
q = p**2 / (p * freq.unsqueeze(-1)).sum(dim=0)
q = (q.t() / q.sum(dim=1)).t()
elif method == 'top':
assert top_num > 0
q = p.clone()
sim = torch.matmul(self.topic_emb, z.t())
_, selected_idx = sim.topk(k=top_num, dim=-1)
for i, topic_idx in enumerate(selected_idx):
q[topic_idx] = 0
q[topic_idx, i] = 1
return p, q
def cosine_dist(x_bar, x, weight=None):
if weight is None:
weight = torch.ones(x.size(0), device=x.device)
cos_sim = (x_bar * x).sum(-1)
cos_dist = 1 - cos_sim
cos_dist = (cos_dist * weight).sum() / weight.sum()
return cos_dist
def train(args, emb_dict):
# ipdb.set_trace()
inv_vocab = {k: " ".join(v) for k, v in emb_dict["inv_vocab"].items()}
vocab = {" ".join(k):v for k, v in emb_dict["vocab"].items()}
print(f"Vocab size: {len(vocab)}")
embs = F.normalize(torch.tensor(emb_dict["vs_emb"]), dim=-1)
embs2 = F.normalize(torch.tensor(emb_dict["oh_emb"]), dim=-1)
freq = np.array(emb_dict["tuple_freq"])
if not args.use_freq:
freq = np.ones_like(freq)
input_data = TensorDataset(embs, embs2, torch.arange(embs.size(0)), torch.tensor(freq))
topic_cluster = TopicCluster(args).to(args.device)
topic_cluster.pretrain(input_data, args.pretrain_epoch)
train_loader = DataLoader(input_data, batch_size=args.batch_size, shuffle=False)
optimizer = Adam(topic_cluster.parameters(), lr=args.lr)
# topic embedding initialization
embs = embs.to(args.device)
embs2 = embs2.to(args.device)
x_bar1, x_bar2, z = topic_cluster.model(embs, embs2)
z = F.normalize(z, dim=-1)
print(f"Running K-Means for initialization")
kmeans = KMeans(n_clusters=args.n_clusters, n_init=5)
if args.use_freq:
y_pred = kmeans.fit_predict(z.data.cpu().numpy(), sample_weight=freq)
else:
y_pred = kmeans.fit_predict(z.data.cpu().numpy())
print(f"Finish K-Means")
freq = torch.tensor(freq).to(args.device)
y_pred_last = y_pred
topic_cluster.topic_emb.data = torch.tensor(kmeans.cluster_centers_).to(args.device)
topic_cluster.train()
i = 0
for epoch in range(50):
if epoch % 5 == 0:
_, _, z, p = topic_cluster(embs, embs2)
z = F.normalize(z, dim=-1)
topic_cluster.topic_emb.data = F.normalize(topic_cluster.topic_emb.data, dim=-1)
if not os.path.exists(os.path.join(args.dataset_path, f"clusters_{args.suffix}")):
os.makedirs(os.path.join(args.dataset_path, f"clusters_{args.suffix}"))
embed_save_path = os.path.join(args.dataset_path, f"clusters_{args.suffix}/embed_{epoch}.pt")
torch.save({
"inv_vocab": emb_dict['inv_vocab'],
"embed": z.detach().cpu().numpy(),
"topic_embed": topic_cluster.topic_emb.detach().cpu().numpy(),
}, embed_save_path)
f = open(os.path.join(args.dataset_path, f"clusters_{args.suffix}/{epoch}.txt"), 'w')
pred_cluster = p.argmax(-1)
result_strings = []
for j in range(args.n_clusters):
if args.sort_method == 'discriminative':
word_idx = torch.arange(embs.size(0))[pred_cluster == j]
sorted_idx = torch.argsort(p[pred_cluster == j][:, j], descending=True)
word_idx = word_idx[sorted_idx]
else:
sim = torch.matmul(topic_cluster.topic_emb[j], z.t())
_, word_idx = sim.topk(k=30, dim=-1)
word_cluster = []
freq_sum = 0
for idx in word_idx:
freq_sum += freq[idx].item()
if inv_vocab[idx.item()] not in word_cluster:
word_cluster.append(inv_vocab[idx.item()])
if len(word_cluster) >= 10:
break
result_strings.append((freq_sum, f"Topic {j} ({freq_sum}): " + ', '.join(word_cluster)+'\n'))
result_strings = sorted(result_strings, key=lambda x: x[0], reverse=True)
for result_string in result_strings:
f.write(result_string[1])
for x1, x2, idx, weight in train_loader:
if i % args.update_interval == 0:
p, q = topic_cluster.target_distribution(embs, embs2, freq.clone().fill_(1), method='all', top_num=epoch+1)
y_pred = p.cpu().numpy().argmax(1)
delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
y_pred_last = y_pred
if i > 0 and delta_label < args.tol:
print(f'delta_label {delta_label:.4f} < tol ({args.tol})')
print('Reached tolerance threshold. Stopping training.')
return None
i += 1
x1 = x1.to(args.device)
x2 = x2.to(args.device)
idx = idx.to(args.device)
weight = weight.to(args.device)
x_bar1, x_bar2, _, p = topic_cluster(x1, x2)
reconstr_loss = cosine_dist(x_bar1, x1) + cosine_dist(x_bar2, x2) #, weight)
kl_loss = F.kl_div(p.log(), q[idx], reduction='none').sum(-1)
kl_loss = (kl_loss * weight).sum() / weight.sum()
loss = args.gamma * kl_loss + reconstr_loss
if i % args.update_interval == 0:
print(f"KL loss: {kl_loss}; Reconstruction loss: {reconstr_loss}")
optimizer.zero_grad()
loss.backward()
optimizer.step()
return None
if __name__ == "__main__":
# CUDA_VISIBLE_DEVICES=0 python3 latent_space_clustering.py --dataset_path ./pandemic --input_emb_name po_tuple_features_all_svos.pk
parser = argparse.ArgumentParser(
description='train',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset_path', type=str)
parser.add_argument('--input_emb_name', type=str)
parser.add_argument('--lr', type=float, default=5e-4)
parser.add_argument('--n_clusters', default=30, type=int)
parser.add_argument('--input_dim1', default=1000, type=int)
parser.add_argument('--input_dim2', default=1000, type=int)
parser.add_argument('--agg_method', default="multi", choices=["sum", "multi", "concat", "attend"], type=str)
parser.add_argument('--sep_decode', default=0, choices=[0, 1], type=int)
parser.add_argument('--pretrain_epoch', default=100, type=int)
parser.add_argument('--load_pretrain', default=False, action='store_true')
parser.add_argument('--temperature', default=0.1, type=float)
parser.add_argument('--sort_method', default='generative', choices=['generative', 'discriminative'])
parser.add_argument('--distribution', default='softmax', choices=['softmax', 'student'])
parser.add_argument('--batch_size', default=256, type=int)
parser.add_argument('--use_freq', default=False, action='store_true')
parser.add_argument('--hidden_dims', default='[1000, 2000, 1000, 100]', type=str)
parser.add_argument('--suffix', type=str, default='')
parser.add_argument('--gamma', default=5, type=float, help='weight of clustering loss')
parser.add_argument('--update_interval', default=100, type=int)
parser.add_argument('--tol', default=0.001, type=float)
args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print("use cuda: {}".format(args.cuda))
args.device = torch.device("cuda" if args.cuda else "cpu")
print(args)
with open(os.path.join(args.dataset_path, args.input_emb_name), "rb") as fin:
emb_dict = pk.load(fin)
candidate_idx = train(args, emb_dict)
print(candidate_idx)
The error I'm getting is: RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x726 and 1000x1000). I cannot figure out which part is the problem. Please help me.. Thank you so much
for the images runtime error like
enter image description here
I have the following network where I am trying to do triplet loss:
First, I have a custom Convolution class ConvBlock(nn.Module):
def __init__(self, ngpu, input_c, output_c, mode=0):
super(ConvBlock, self).__init__()
self.ngpu = ngpu
self.input_c = input_c
self.output_c = output_c
self.mode = mode
self.b1 = nn.Sequential(
nn.Conv2d(input_c, output_c, 3, stride=1, padding=1),
#nn.BatchNorm2d(output_c),
nn.PReLU(),
)
self.b2 = nn.Sequential(
nn.Conv2d(output_c, output_c, 3, stride=1, padding=1),
#nn.BatchNorm2d(output_c),
nn.PReLU(),
)
self.pool = nn.Sequential(
nn.MaxPool2d(2, 2),
)
def forward(self, input):
batch_size = input.size(0)
if self.mode == 0:
b1 = self.b1(input)
hidden = self.pool(b1)
return hidden, b1
elif self.mode == 1:
b1 = self.b1(input)
b2 = self.b2(b1)
hidden = self.pool(b2)
return hidden, b2
elif self.mode == 2:
b1 = self.b1(input)
hidden = self.b2(b1)
return hidden
I now have an encoder module:
class _Encoder(nn.Module):
def __init__(self, ngpu,nc,nef,out_size,nz):
super(_Encoder, self).__init__()
self.ngpu = ngpu
self.nc = nc
self.nef = nef
self.out_size = out_size
self.nz = nz
self.c1 = ConvBlock(self.ngpu, nc, nef, 0) # 3 - 64
self.c2 = ConvBlock(self.ngpu, nef, nef*2, 0) # 64-128
self.c3 = ConvBlock(self.ngpu, nef*2, nef*4, 1) # 128-256
self.c4 = ConvBlock(self.ngpu, nef*4, nef*8, 1) # 256 -512
self.c5 = ConvBlock(self.ngpu, nef*8, nef*8, 2) # 512-512
# 8 because..the depth went from 32 to 32*8
self.mean = nn.Linear(nef * 8 * out_size * (out_size/2), nz)
self.logvar = nn.Linear(nef * 8 * out_size * (out_size/2), nz)
#for reparametrization trick
def sampler(self, mean, logvar):
std = logvar.mul(0.5).exp_()
if args.cuda:
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
eps = Variable(eps)
return eps.mul(std).add_(mean)
def forward(self, input):
batch_size = input.size(0)
if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
c1_out, c1_x = nn.parallel.data_parallel(self.c1, input, range(self.ngpu))
c2_out, c2_x = nn.parallel.data_parallel(self.c2, c1_out, range(self.ngpu))
c3_out, c3_x = nn.parallel.data_parallel(self.c3, c2_out, range(self.ngpu))
c4_out, c4_x = nn.parallel.data_parallel(self.c4, c3_out, range(self.ngpu))
hidden = nn.parallel.data_parallel(self.c5, c4_out, range(self.ngpu))
# hidden = nn.parallel.data_parallel(self.encoder, input, range(self.ngpu))
hidden = hidden.view(batch_size, -1)
mean = nn.parallel.data_parallel(self.mean, hidden, range(self.ngpu))
logvar = nn.parallel.data_parallel(self.logvar, hidden, range(self.ngpu))
else:
c1_out, c1_x = self.c1(input)
c2_out, c2_x = self.c2(c1_out)
c3_out, c3_x = self.c3(c2_out)
c4_out, c4_x = self.c4(c3_out)
hidden = self.c5(c4_out)
# hidden = self.encoder(input)
hidden = hidden.view(batch_size, -1)
mean, logvar = self.mean(hidden), self.logvar(hidden)
latent_z = self.sampler(mean, logvar)
if ADD_SKIP_CONNECTION:
return latent_z,mean,logvar,{"c1_x":c1_x, "c2_x":c2_x, "c3_x":c3_x, "c4_x":c4_x}
else:
return latent_z,mean,logvar,{"c1_x":None, "c2_x":None, "c3_x":None, "c4_x":None}
I initialize my encoder as a single object:
encoder = _Encoder(ngpu,nc,nef,out_size,nz)
encoder = encoder.cuda()
And then I am applying some functions:
latent_x,mean_x,logvar_x,skip_x = self.encoder(x)
latent_y,mean_y,logvar_y,skip_y = self.encoder(y)
latent_z,mean_z,logvar_z,skip_z = self.encoder(z)
dist_a = F.pairwise_distance(mean_x, mean_y, 2)
dist_b = F.pairwise_distance(mean_x, mean_z, 2)
loss_triplet = triplet_loss(dist_a, dist_b, target)
optimizer.zero_grad()
loss_triplet.backward()
optimizer.step()
I am starting to doubt if the weights are actually being shared across the 3 encoder blocks. Please help me check an tell me if it does
I am a beginner of theano. I am studying it now. I'd like to print 'value' and 'shape' of TensorVariable while operating theano.function. When I used print fuction of python, print function ran before compiling theano function. So I learned using print fuction is useless. Thereofre I tried my another hand. I added a following syntax to execute theano.printing.Print.
(cce is return value of theano.scan. Therefore, Maybe it is not symbolic vaiable.
Actually, I am confused by the concept of TensorVariable and shared variable. TensorVariable is a sort of shared variable?)
x = theano.tensor.tensor3() # define data type
t_print =theano.printing.Print("cce value is : ")(x)
f = theano.function([x], t_print) # define theano.function
f(cce) # call f (print value of cce)
Then, Following Error Occured
TypeError: ('Bad input argument to theano function with name "seq2seq.py : 98" at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')
Could you possibly let me know how to correct this code to print value of cce(TensorVariable) ? Or, is it impossible to print the value of TensorVariable when theano.function is on progress ?
Thank you for reading my question.
ADDED -
here is my source code. this is a large picture. theano.function() starts with last line. loss_func is 'categorical_crossentropy function'. last 4 line is about theano function
def categorical_crossentropy(y_true, y_pred):
y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
y_pred = y_pred.reshape( (-1, voca_dim_g) )
y_true = y_true.reshape( (-1, voca_dim_g) )
cce, updates = theano.scan(
fn=T.nnet.categorical_crossentropy,
sequences=[y_pred,y_true]
)
##### I want to print cce HERE #######
return T.mean(cce)
#staticmethod
def step(
x_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
):
"""
x_t.shape = (timestep=1, dim)
x_t.shape = (n_samples, timestep=1, dim)
"""
i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi)
f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf)
o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo)
g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg)
c_t = c_tm1 * f_t + g_t * i_t
h_t = T.tanh(c_t) * o_t
return h_t, c_t
#########################################################################################################################
def forward(self, X):
states, updates = theano.scan(
fn=self.step,
sequences=[ X ],
outputs_info=[self.h_tm1, self.c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
)
updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])]
return states, updates
#########################################################################################################################
def encode(self, X):
states, updates = self.forward(X)
h_t = states[0][-1]
c_t = states[1][-1]
return h_t, c_t, updates
def decode_step(
self, y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg,
Wh, bh
):
h_t, c_t = self.step(
y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
)
y_t = T.dot(h_t, Wh) + bh
return y_t, h_t, c_t
def decode(self, h_tm1, c_tm1, timesteps):
outputs, updates = theano.scan(
fn=self.decode_step,
outputs_info=[self.y_t, h_tm1, c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg,
self.Wh, self.bh
],
n_steps=timesteps
)
updates = [
(self.h_tm1, outputs[1][-1]),
(self.c_tm1, outputs[2][-1])
]
return outputs[0], updates
h_tm1, c_tm1, updates_encode = encode(seq_input)
seq_predict, updates_decode = decode(h_tm1, c_tm1, T.shape(seq_target)[0])
loss = loss_func(seq_predict, seq_target)
self._train = theano.function([seq_input, seq_target], loss, updates = updates)
below is full source code
# -*- coding: utf-8 -*-
__modifier__ = "Lee Guk Beom, Lee Jae Sang, Jang Jae Kwang (alphabetical Order)"
import readFile
import numpy as np
import theano
import theano.tensor as T
from six.moves import zip
from theano.compile.debugmode import DebugMode
import nltk
import sys
import os
from nltk.tokenize import sent_tokenize
import codecs
#theano.config.optimizer='fast_compile'
#theano.config.exception_verbosity='high'
#theano.config.compute_test_value = 'warn'
epsilon = 1e-6
dtype = theano.config.floatX
minibatch_size_g = 0
longest_seq_g = 0
voca_dim_g = 0
n_time_step_input_g = 0
n_timestep_target_g = 0
word_to_index_input_g = dict()
word_to_index_targrt_g = dict()
index_to_word_target_g = dict()
#########################################################################################################################
def shared(value, name=None):
return theano.shared(value.astype(dtype), name=name)
#########################################################################################################################
def shared_zeros(shape, name=None):
return shared(value=np.zeros(shape), name=name)
#########################################################################################################################
def shared_zeros_like(x, name=None):
return shared_zeros(shape=x.shape, name=name)
#########################################################################################################################
def init_weights(shape, name=None):
bound = np.sqrt(1.0/shape[1])
w = np.random.uniform(-bound, bound, shape)
return shared(value=w, name=name)
#########################################################################################################################
def adadelta(params, cost, lr=1.0, rho=0.95):
# from https://github.com/fchollet/keras/blob/master/keras/optimizers.py
cost = cost.astype('float32')
grads = T.grad(cost, params)
accus = [shared_zeros_like(p.get_value()) for p in params]
delta_accus = [shared_zeros_like(p.get_value()) for p in params]
updates = []
for p, g, a, d_a in zip(params, grads, accus, delta_accus):
new_a = rho * a + (1.0 - rho) * T.square(g)
updates.append((a, new_a))
update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon)
new_p = p - lr * update
updates.append((p, new_p))
new_d_a = rho * d_a + (1.0 - rho) * T.square(update)
updates.append((d_a, new_d_a))
return updates
#########################################################################################################################
def categorical_crossentropy(y_true, y_pred):
# from https://github.com/fchollet/keras/blob/master/keras/objectives.py
y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
# y_true = y_true.reshape( (-1, minibatch_size_g, voca_dim_g) )
'''
cce = T.nnet.categorical_crossentropy(y_pred,y_true)
# only matrix can be calculated
'''
# Y_PRED SOFTMAX
y_pred = y_pred.reshape( (-1, voca_dim_g) )
# y_pred_flat = T.nnet.softmax(y_pred)
y_true = y_true.reshape( (-1, voca_dim_g) )
cce, updates = theano.scan(
fn=T.nnet.categorical_crossentropy,
sequences=[y_pred,y_true]
)
return T.mean(cce)
#########################################################################################################################
def mean_square_error(y_true, y_pred):
return T.mean(T.square(y_pred - y_true))
#########################################################################################################################
class LSTM(object):
def __init__(self, size, dim):
self.size = size
self.dim = dim
shape_b = (minibatch_size_g, size)
shape_U = (dim, size)
shape_W = (size, size)
self.h_tm1 = shared_zeros(shape_b, "h_tm1")
self.c_tm1 = shared_zeros(shape_b, "c_tm1")
self.Ui = init_weights(shape_U, "Ui")
self.Wi = init_weights(shape_W, "Wi")
self.bi = shared_zeros(shape_b, "bi")
self.Uf = init_weights(shape_U, "Uf")
self.Wf = init_weights(shape_W, "Wf")
self.bf = shared_zeros(shape_b, "bf")
self.Uo = init_weights(shape_U, "Uo")
self.Wo = init_weights(shape_W, "Wo")
self.bo = shared_zeros(shape_b, "bo")
self.Ug = init_weights(shape_U, "Ug")
self.Wg = init_weights(shape_W, "Wg")
self.bg = shared_zeros(shape_b, "bg")
self.params = [
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
def set_state(self, h, c):
self.h_tm1.set_value(h.get_value())
self.c_tm1.set_value(c.get_value())
def reset_state(self):
self.h_tm1 = shared_zeros((1, self.size), "h_tm1")
self.c_tm1 = shared_zeros((1, self.size), "c_tm1")
#########################################################################################################################
#staticmethod
def step(
x_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
):
"""
x_t.shape = (timestep=1, dim)
x_t.shape = (n_samples, timestep=1, dim)
"""
i_t = T.nnet.sigmoid(T.dot(x_t, Ui) + T.dot(h_tm1, Wi) + bi)
f_t = T.nnet.sigmoid(T.dot(x_t, Uf) + T.dot(h_tm1, Wf) + bf)
o_t = T.nnet.sigmoid(T.dot(x_t, Uo) + T.dot(h_tm1, Wo) + bo)
g_t = T.tanh(T.dot(x_t, Ug) + T.dot(h_tm1, Wg) + bg)
c_t = c_tm1 * f_t + g_t * i_t
h_t = T.tanh(c_t) * o_t
return h_t, c_t
#########################################################################################################################
def forward(self, X):
states, updates = theano.scan(
fn=self.step,
sequences=[ X ],
outputs_info=[self.h_tm1, self.c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg
]
)
updates = [(self.h_tm1, states[0][-1]), (self.c_tm1, states[1][-1])]
return states, updates
#########################################################################################################################
class LSTMEncoder(LSTM):
def encode(self, X):
states, updates = self.forward(X)
h_t = states[0][-1]
c_t = states[1][-1]
return h_t, c_t, updates
class LSTMDecoder(LSTM):
def __init__(self, size, dim, h_tm1=None, c_tm1=None):
super(LSTMDecoder, self).__init__(size=size, dim=dim)
self.Wh = init_weights((size, dim), "Wh")
self.bh = shared_zeros((minibatch_size_g, dim), "bh")
self.h_tm1 = h_tm1 or shared_zeros((minibatch_size_g, size), "h_tm1")
self.c_tm1 = c_tm1 or shared_zeros((minibatch_size_g, size), "c_tm1")
self.y_t = shared_zeros((minibatch_size_g, dim), "y_t")
# self.decode_length = theano.shared(decode_length)
self.params.append(self.Wh)
self.params.append(self.bh)
def decode_step(
self, y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg,
Wh, bh
):
h_t, c_t = self.step(
y_t, h_tm1, c_tm1,
Ui, Wi, bi, Uf, Wf, bf,
Uo, Wo, bo, Ug, Wg, bg
)
y_t = T.dot(h_t, Wh) + bh
return y_t, h_t, c_t
def decode(self, h_tm1, c_tm1, timesteps):
outputs, updates = theano.scan(
fn=self.decode_step,
outputs_info=[self.y_t, h_tm1, c_tm1],
non_sequences=[
self.Ui, self.Wi, self.bi,
self.Uf, self.Wf, self.bf,
self.Uo, self.Wo, self.bo,
self.Ug, self.Wg, self.bg,
self.Wh, self.bh
],
n_steps=timesteps
)
updates = [
(self.h_tm1, outputs[1][-1]),
(self.c_tm1, outputs[2][-1])
]
# return T.flatten(outputs[0], 3), updates
return outputs[0], updates
#staticmethod
def argmax(seq):
seq = T.argmax(seq, axis=2)
return seq
#########################################################################################################################
class Seq2Seq(object):
def __init__(self, size, dim):
self.encoder = LSTMEncoder(size, dim)
self.decoder = LSTMDecoder(size, dim)
self.params = []
self.params += self.encoder.params
self.params += self.decoder.params
self._predict = None
self._train = None
self._test = None
def compile(self, loss_func, optimizer):
seq_input = T.tensor3()
seq_target = T.tensor3()
decode_timesteps = T.iscalar()
h_tm1, c_tm1, updates_encode = self.encoder.encode(seq_input)
seq_predict_flex, updates_decode_flex = self.decoder.decode(h_tm1, c_tm1, decode_timesteps)
seq_argmax = self.decoder.argmax(seq_predict_flex)
seq_predict, updates_decode = self.decoder.decode(h_tm1, c_tm1, T.shape(seq_target)[0])
loss = loss_func(seq_predict, seq_target)
self._predict = theano.function([seq_input, decode_timesteps], seq_argmax, updates=updates_encode+updates_decode_flex)
self._test = theano.function([seq_input, seq_target], loss, updates=updates_encode+updates_decode)
updates = []
updates += updates_encode
updates += updates_decode
updates += optimizer(self.params, loss)
self._train = theano.function([seq_input, seq_target], loss, updates = updates)
def predict(self, seq_input, decode_timesteps):
self.encoder.reset_state()
self.decoder.reset_state()
return self._predict(seq_input, decode_timesteps)
def train(self, seq_input, seq_target):
self.encoder.reset_state()
self.decoder.reset_state()
return self._train(seq_input, seq_target)
def test(self, seq_input, seq_target):
self.encoder.reset_state()
self.decoder.reset_state()
return self._test(seq_input, seq_target)
#########################################################################################################################
def train(x, target):
for mini_batch, target in zip(x,target):
mini_batch = mini_batch.astype(dtype)
target = target.astype(dtype)
print("result of train function(loss or update) :", seq2seq.train(mini_batch, target))
#########################################################################################################################
# make weight information to pickle file
# information of Encooder class and decoder class of Seq2Seq class
# Encooder and decoder class should have function that returns value of weight variables
# one list contains elements that save weights' information
def save_weight():
None
#########################################################################################################################
def gen_processed_seq(input_sentence):
tokenized_seq = nltk.word_tokenize( input_sentence )
input_sentences = [ None for _ in range(1) ]
input_sentences[0] = tokenized_seq
seq_input = readFile.word_to_idx(input_sentences, word_to_index_input_g )
sorted_seq_input = [ None for _ in range(minibatch_size_g) ]
sorted_seq_input[0] = seq_input[0]
input_len = len(seq_input[0])
for i in range(minibatch_size_g-1):
for j in range(input_len):
sorted_seq_input[i+1] = [-1]
input_finally = []
input_finally.append(sorted_seq_input)
return input_finally
#########################################################################################################################
def gen_one_hot(input_len, input_seq):
one_hot = readFile.seq_to_1hot(n_time_step_input_g, input_seq, "predict", 1, 1)
one_hot[0] = one_hot[0].astype(dtype)
print("one_hot : ", one_hot)
return one_hot
def get_idx(argmax, num_of_word):
idx_list = argmax[ : num_of_word, 0]
return idx_list
#########################################################################################################################
def predict():
input_sentence = raw_input("Input the English Sentence You Want to Translate into Spanish : ")
input_seq = gen_processed_seq(input_sentence)
print("input_seq[0][0] : ",input_seq[0][0])
num_of_word = len(input_seq[0][0])
one_hot = gen_one_hot(n_time_step_input_g, input_seq)
argmax = seq2seq.predict(one_hot[0] , n_time_step_input_g )
print("argmax_fin shape : ", argmax.shape)
print("argmax_fin : ", argmax)
idx_list_np = get_idx(argmax, num_of_word)
idx_list_py = idx_list_np.tolist()
print("index_to_word_target_g : ",index_to_word_target_g)
print("index_to_word_target_g[6] :", index_to_word_target_g[6])
result = readFile.idx_to_word(idx_list_py, index_to_word_target_g)
translated = ""
for elem in result :
translated += elem
translated += " "
print("translated : " , translated)
print("Translation End")
#########################################################################################################################
def gen_global_var(word_to_index_input, word_to_index_targrt, voca_dim, si, st, index_to_word_target):
global word_to_index_input_g
global word_to_index_targrt_g
global voca_dim_g
global minibatch_size_g
global n_time_step_input_g
global n_timestep_target_g
global index_to_word_target_g
word_to_index_input_g = word_to_index_input
word_to_index_targrt_g = word_to_index_targrt
voca_dim_g = voca_dim + 2
minibatch_size_g = si[0].shape[1]
n_time_step_input_g = si[0].shape[0]
n_timestep_target_g = st[0].shape[0]
index_to_word_target_g = index_to_word_target
return
#########################################################################################################################
def menu(si, st):
None
#########################################################################################################################
def gen_object():
return None
#########################################################################################################################
if __name__ == "__main__":
si, st, maxlen_input, minibatch_size, voca_dim, word_to_index_input, word_to_index_targrt, index_to_word_target = readFile.preprocessing()
gen_global_var(word_to_index_input, word_to_index_targrt, voca_dim, si, st, index_to_word_target)
seq2seq = Seq2Seq(n_time_step_input_g, voca_dim_g )
seq2seq.compile(loss_func=categorical_crossentropy, optimizer=adadelta)
while(True):
print("select a menu")
print("1. Training")
print("2. Translate specific English sentence into Spanish.")
val = input("selection : ")
if val == 1:
train(si, st)
elif val == 2:
predict()
and readfile.py is
import numpy as np
import itertools
import nltk
import sys
import os
from nltk.tokenize import sent_tokenize
import codecs
unknown_token = 'UNKNOWN_TOKEN'
start_token = '_S'
end_token = '__E'
num_of_seq = 0
input_path = "./europarl-v7.es-en.en"
target_path = "./europarl-v7.es-en.es"
minibatch_unit = 100
voca_dim = 3000
SEQ_NUM_LIMIT = 1000
##########################################################################################
def file_tokenize(file):
f = codecs.open( file, "r", "utf-8" )
tokenized_seq = []
sentences = []
total_sentence_num = 0
# sequence tokenize
for i,line in enumerate(f):
print("tokenized Sentence No." , i)
# strip() method to remove the newline character at the end of the input line.
tokenized_seq = nltk.word_tokenize( line.strip() )
tokenized_seq.insert(0, start_token)
tokenized_seq.append(end_token)
sentences.append(tokenized_seq)
total_sentence_num += 1;
if(total_sentence_num == SEQ_NUM_LIMIT):
break
return sentences,total_sentence_num
##########################################################################################
# Count the word frequencies
def cntWordFreq(sentences):
word_freq = nltk.FreqDist(itertools.chain(*sentences))
return word_freq
##########################################################################################
# Get the most common words and build index_to_word and word_to_index vectors
def build_WordToIdx_IdxtoWord(word_freq):
vocab = word_freq.most_common(voca_dim-1)
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])
return index_to_word, word_to_index
##########################################################################################
# change word to index
def word_to_idx(sequences, word_to_index ) :
for i, sent in enumerate(sequences):
sequences[i] = [w if w in word_to_index else unknown_token for w in sent]
sequences[i] = [word_to_index[w] if w in word_to_index else -1 for w in sequences[i]]
return sequences
##########################################################################################
def idx_to_word(seq, index_to_word):
for i, sent in enumerate(seq):
seq[i] = index_to_word[sent]
#seq[i] = [index_to_word[sent] if sent in index_to_word else '?' ]
return seq
##########################################################################################
def sortByLen(seqs_input, seqs_target) :
# check maximum sentence length
max_len_input = 0
max_len_target = 0
for sentence in seqs_input :
tmp = len(sentence)
if max_len_input < tmp:
max_len_input = tmp
for sentence in seqs_target :
tmp = len(sentence)
if max_len_target < tmp:
max_len_target = tmp
seqs_sorted_input = [ [] for _ in range(max_len_input+1) ]
seqs_sorted_target = [ [] for _ in range(max_len_input+1) ]
i = 0
for sentence_input, sentence_target in zip(seqs_input, seqs_target) :
sentence_len = len(sentence_input)
seqs_sorted_input[sentence_len].append(sentence_input)
seqs_sorted_target[sentence_len].append(sentence_target)
i+=1
return seqs_sorted_input, seqs_sorted_target, max_len_input, max_len_target
##########################################################################################
def find_maxlen(sentence_group):
max_seq_len = 0
for seq in sentence_group :
if len(seq) > max_seq_len :
max_seq_len = len(seq)
return max_seq_len
##########################################################################################
def sort_by_timestep(sentence_group):
same_len_seq = np.asarray(sentence_group)
same_len_seq = apply_to_m1(same_len_seq)
sorted_seq = same_len_seq.transpose()
return sorted_seq
##########################################################################################
def seq_to_1hot(max_len, sorted_sentences, type, minibatch_unit, num_of_seq):
one_hot = [None for _ in range( len(sorted_sentences) )]
for i, sentence_group in enumerate(sorted_sentences):
if sentence_group and len(sentence_group[0]) != 0 :
max_seq_len = find_maxlen(sentence_group)
row = max_seq_len * minibatch_unit
one_hot[i] = np.zeros( (row, voca_dim + 2) )
time_step_seq = sort_by_timestep(sentence_group)
j = 0
for word_idx in np.nditer( time_step_seq ) :
if word_idx != -1:
one_hot[i][j][word_idx] = 1
j+=1
one_hot[i] = np.reshape(one_hot[i], ( max_seq_len, -1, voca_dim+2) )
return one_hot
##########################################################################################
def apply_to_m1(lst, dtype=np.int64):
inner_max_len = max(map(len, lst))
result = np.zeros( [len(lst), inner_max_len], dtype )
result[:] = -1
for i, row in enumerate(lst):
for j, val in enumerate(row):
result[i][j] = val
return result
##########################################################################################
def seq_group_by_mini_batch_size(minibatch_unit, sorted_seq, num_of_seq):
idx = 0
cnt = 0
minibatch_seq = [ [] for _ in range( (num_of_seq/minibatch_unit)+1) ]
for seqs in sorted_seq :
if seqs :
for seq in seqs :
if seq:
minibatch_seq[idx].append(seq)
cnt+=1
if minibatch_unit == cnt:
cnt = 0
idx+= 1
for i, seq in enumerate (minibatch_seq):
if seq == []:
minibatch_seq = minibatch_seq[: i- 1]
break
return minibatch_seq
##########################################################################################
def preprocessing():
global num_of_seq
global minibatch_unit
global input_path
global target_path
print("Start Preprocessing")
sentences_input, total_sentence_num = file_tokenize(input_path)
sentences_target, total_sentence_num_target = file_tokenize(target_path)
print("FINISHED : file_tokenize ")
word_freq_input = cntWordFreq(sentences_input)
word_freq_target = cntWordFreq(sentences_target)
print("FINISHED : cntWordFreq ")
index_to_word_input, word_to_index_input = build_WordToIdx_IdxtoWord(word_freq_input)
index_to_word_target, word_to_index_targrt = build_WordToIdx_IdxtoWord(word_freq_target)
print("FINISHED : build_WordToIdx_IdxtoWord ")
seqs_input = word_to_idx(sentences_input, word_to_index_input)
seqs_target = word_to_idx(sentences_target, word_to_index_targrt)
print("FINISHED : word_to_idx ")
seqs_sorted_input, seqs_sorted_target, maxlen_input, maxlen_target = sortByLen(seqs_input, seqs_target)
print("FINISHED : sortByLen ")
for seqs in seqs_input:
if seqs:
for seq in seqs:
if seq:
num_of_seq+=1
seq_by_mini_batch_size_input = seq_group_by_mini_batch_size(minibatch_unit, seqs_sorted_input, num_of_seq)
seq_by_mini_batch_size_target = seq_group_by_mini_batch_size(minibatch_unit, seqs_sorted_target, num_of_seq)
print("FINISHED : seq_group_by_mini_batch_size ")
_1hot_input = seq_to_1hot(maxlen_input, seq_by_mini_batch_size_input, "input",minibatch_unit, num_of_seq)
_1hot_target = seq_to_1hot(maxlen_target, seq_by_mini_batch_size_target, "target",minibatch_unit, num_of_seq)
print("FINISHED : seq_to_1hot ")
if minibatch_unit > total_sentence_num:
minibatch_unit = total_sentence_num
print("exit preprocessing")
return _1hot_input, _1hot_target, maxlen_input, minibatch_unit, voca_dim, word_to_index_input, word_to_index_targrt, index_to_word_target