DDPG - Actor Critic Network does not converge with continous Actionspace - python-3.x

I'm currently a bit confused. I implemented an Actor-Critic Network and depending on the setup ist either begins to converge a little but produces values far from right. Or it produces the nearly the same loss values over and over again but right from the start produces values which are kind of correct.
I really have no clue how that is possible.
This is my current Model which produces values but does not converge:
def create_actor_model(self):
state_input = Input(shape=self.observation_space_shape)
h1 = Dense(18, activation='linear')(state_input)
h1l = LeakyReLU(alpha=0.01)(h1)
h3 = Dense(18, activation='tanh')(h1l)
h3n = BatchNormalization()(h3)
output = Dense(self.action_space_shape[0], activation='tanh')(h3n)
model = Model(input=state_input, output=output)
adam = Adam(lr=self.action_space_shape)
model.compile(loss="mse", optimizer=adam)
return state_input, model
def create_critic_model(self):
state_input = Input(shape=self.observation_space_shape)
state_h1 = Dense(18, activation='relu')(state_input)
state_h2 = Dense(36)(state_h1)
action_input = Input(shape=self.action_space_shape)
action_h1 = Dense(36)(action_input)
merged = Add()([state_h2, action_h1])
l_h1 = LeakyReLU(alpha=0.01)(merged)
merged_h1 = Dense(18, activation='tanh')(l_h1)
h1n = BatchNormalization()(merged_h1)
output = Dense(1, activation='tanh')(h1n)
model = Model(input=[state_input, action_input], output=output)
adam = Adam(lr=self.action_space_shape)
model.compile(loss="mse", optimizer=adam, metrics=['mae', 'mse', 'msle'])
return state_input, action_input, model
def _train_actor_batch(self, batch_size, s_batch, a_batch, r_batch, s2_batch):
predicted_action = self.actor_model.predict_on_batch(s_batch)
grads = self.sess.run(self.critic_grads, feed_dict={
self.critic_state_input: s_batch,
self.critic_action_input: predicted_action
})
self.sess.run(self.optimize, feed_dict={
self.actor_state_input: s_batch,
self.actor_critic_grad: grads[0]
})
def _train_critic_batch(self, batch_size, s_batch, a_batch, r_batch, s2_batch):
target_action = self.target_actor_model.predict_on_batch(s2_batch)
future_reward = self.target_critic_model.predict_on_batch([s2_batch, target_action])
rewards = []
for k in range(batch_size):
this_future_reward = future_reward[k] if batch_size > 1 else future_reward
rewards.append(r_batch[k] + self.gamma * this_future_reward)
return self.critic_model.train_on_batch([s_batch, a_batch], np.reshape(rewards, batch_size))
def replay(self, batch_size):
memory_length = len(self.memory)
if memory_length < batch_size:
samples = random.sample(self.memory, memory_length)
else:
samples = random.sample(self.memory, batch_size)
s_batch = np.array([cur_state[0] for cur_state, _, _, _ in samples])
a_batch = np.array([float(action[0]) for _, action, _, _ in samples])
r_batch = np.array([reward[0] for _, _, reward, _ in samples])
s2_batch = np.array([new_state[0] for _, _, _, new_state in samples])
critic_loss = self._train_critic_batch(len(s_batch), s_batch, a_batch, r_batch, s2_batch)
self._train_actor_batch(len(s_batch), s_batch, a_batch, r_batch, s2_batch)
self.update_target()
return critic_loss
def _update_actor_target(self):
actor_model_weights = self.actor_model.get_weights()
actor_target_weights = self.target_actor_model.get_weights()
for i in range(len(actor_target_weights)):
actor_target_weights[i] = actor_model_weights[i] * self.tau + actor_target_weights[i] * (1 - self.tau)
self.target_actor_model.set_weights(actor_target_weights)
def _update_critic_target(self):
critic_model_weights = self.critic_model.get_weights()
critic_target_weights = self.target_critic_model.get_weights()
for i in range(len(critic_target_weights)):
critic_target_weights[i] = critic_model_weights[i] * self.tau + critic_target_weights[i] * (1 - self.tau)
self.target_critic_model.set_weights(critic_target_weights)
def update_target(self):
self._update_actor_target()
self._update_critic_target()
def __init__(self):
self.memory = deque(maxlen=2000)
self.actor_state_input, self.actor_model = self.create_actor_model()
_, self.target_actor_model = self.create_actor_model()
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.action_space_shape[0]])
actor_model_weights = self.actor_model.trainable_weights
self.actor_grads = tf.gradients(self.actor_model.output,
actor_model_weights, -self.actor_critic_grad)
grads = zip(self.actor_grads, actor_model_weights)
self.optimize = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(grads)
self.critic_state_input, self.critic_action_input, \
self.critic_model = self.create_critic_model()
_, _, self.target_critic_model = self.create_critic_model()
self.critic_grads = tf.gradients(self.critic_model.output,
self.critic_action_input)
self.sess.run(tf.initialize_all_variables())
And then i am training with the following method which is called for each epoch (at the end, the memory which is getting cleared is the experience-replay memory):
def train(self, states, epoch, env, is_new_epoch):
train_size = int(len(states) * 0.70)
train = dict(list(states.items())[0:train_size])
test = dict(list(states.items())[train_size:len(states)])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
working_states = copy(train)
critic_eval = list()
rewards = dict()
for last_day, (last_state_vec, _, last_action) in working_states.items():
this_day = last_day + timedelta(days=1)
if this_day in working_states:
(new_state_vec, _, _) = working_states.get(this_day)
rewards[last_day] = env.get_reward_by_states(last_state_vec, new_state_vec)
amt = len(working_states)
i = 0
for last_day, (last_state_vec, _, last_action) in working_states.items():
i+= 1
this_day = last_day + timedelta(days=1)
if this_day in working_states:
(new_state_vec, _, _) = working_states.get(this_day)
reward = np.reshape(rewards[last_day], [1, ])
self.remember(last_state_vec, [last_action], reward, new_state_vec)
new_eval = self.replay(env.batch_size)
critic_eval.append(new_eval)
self.memory.clear()
These are the loss values i got over 15 epochs:
One Sample as it comes from the memory:
state
[8 79 48246 53607 29 34 37 Decimal('1.0000000000') 6]
action
0.85
reward
0.2703302
next state
[9 79 48074 57869 27 28 32 Decimal('1.0000000000') 0]

Related

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu thanks

Screenshots of all error messages are in the picture link
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm #产生进度条
import dataloader4kg
from sklearn.metrics import precision_score,recall_score,accuracy_score
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
class KGCN( nn.Module ):
def __init__( self, n_users, n_entitys, n_relations,
e_dim, adj_entity, adj_relation, n_neighbors,
aggregator_method = 'sum',
act_method = F.relu, drop_rate=0.5):
super( KGCN, self ).__init__()
self.e_dim = e_dim # 特征向量维度
self.aggregator_method = aggregator_method #消息聚合方法
self.n_neighbors = n_neighbors #邻居的数量
self.user_embedding = nn.Embedding( n_users, e_dim, max_norm = 1 )#
self.entity_embedding = nn.Embedding( n_entitys, e_dim, max_norm = 1)
self.relation_embedding = nn.Embedding( n_relations, e_dim, max_norm = 1)
self.adj_entity = adj_entity #节点的邻接列表
self.adj_relation = adj_relation #关系的邻接列表
#线性层
self.linear_layer = nn.Linear(
in_features = self.e_dim * 2 if self.aggregator_method == 'concat' else self.e_dim,
out_features = self.e_dim,
bias = True)
self.act = act_method #激活函数
self.drop_rate = drop_rate #drop out 的比率
def forward(self, users, items, is_evaluate = False):
neighbor_entitys, neighbor_relations = self.get_neighbors( items )
user_embeddings = self.user_embedding( users)
item_embeddings = self.entity_embedding( items )
#得到v波浪线
neighbor_vectors = self.__get_neighbor_vectors( neighbor_entitys, neighbor_relations, user_embeddings )
out_item_embeddings = self.aggregator( item_embeddings, neighbor_vectors,is_evaluate)
# print(self.user_embedding)
# print(self.entity_embedding)
# print(self.relation_embedding)
# print(out_item_embeddings)
out = torch.sigmoid( torch.sum( user_embeddings * out_item_embeddings, axis = -1 ) )
# print(out)
return out
def get_neighbors( self, items ):#得到邻居的节点embedding,和关系embedding
#[[1,2,3,4,5],[2,1,3,4,5]...[]]#总共batchsize个n_neigbor的id
entity_ids = [ self.adj_entity[item] for item in items ]
relation_ids = [ self.adj_relation[item] for item in items ]
neighbor_entities = [ torch.unsqueeze(self.entity_embedding(torch.LongTensor(one_ids)),0) for one_ids in entity_ids]
neighbor_relations = [ torch.unsqueeze(self.relation_embedding(torch.LongTensor(one_ids)),0) for one_ids in relation_ids]
# [batch_size, n_neighbor, dim]
neighbor_entities = torch.cat( neighbor_entities, dim=0 )
neighbor_relations = torch.cat( neighbor_relations, dim=0 )
return neighbor_entities, neighbor_relations
#得到v波浪线
def __get_neighbor_vectors(self, neighbor_entitys, neighbor_relations, user_embeddings):
# [batch_size, n_neighbor, dim]
user_embeddings = torch.cat([torch.unsqueeze(user_embeddings,1) for _ in range(self.n_neighbors)],dim=1)
# [batch_size, n_neighbor]
user_relation_scores = torch.sum(user_embeddings * neighbor_relations, axis=2)
# [batch_size, n_neighbor]
user_relation_scores_normalized = F.softmax(user_relation_scores, dim=-1)
# [batch_size, n_neighbor, 1]
user_relation_scores_normalized = torch.unsqueeze(user_relation_scores_normalized, 2)
# [batch_size, dim]
neighbor_vectors = torch.sum(user_relation_scores_normalized * neighbor_entitys, axis=1)
return neighbor_vectors
#经过进一步的聚合与线性层得到v
def aggregator(self,item_embeddings, neighbor_vectors, is_evaluate):
# [batch_size, dim]
if self.aggregator_method == 'sum':
output = item_embeddings + neighbor_vectors
elif self.aggregator_method == 'concat':
# [batch_size, dim * 2]
output = torch.cat([item_embeddings, neighbor_vectors], axis=-1)
else:#neighbor
output = neighbor_vectors
if not is_evaluate:
output = F.dropout(output, self.drop_rate)
# [batch_size, dim]
output = self.linear_layer(output)
return self.act(output)
#验证
def do_evaluate( model, testSet ):
testSet = torch.LongTensor(testSet)
model.eval()
with torch.no_grad():
user_ids = testSet[:, 0]
item_ids = testSet[:, 1]
labels = testSet[:, 2]
user_ids = user_ids.to(device)
item_ids = item_ids.to(device)
labels = labels.to(device)
logits = model( user_ids, item_ids, True )
predictions = [1 if i >= 0.5 else 0 for i in logits]
p = precision_score(y_true = labels, y_pred = predictions)
r = recall_score(y_true = labels, y_pred = predictions)
acc = accuracy_score(labels, y_pred = predictions)
return p,r,acc
def train( epochs, batchSize, lr,
n_users, n_entitys, n_relations,
adj_entity, adj_relation,
train_set, test_set,
n_neighbors,
aggregator_method = 'sum',
act_method = F.relu, drop_rate = 0.5, weight_decay=5e-4
):
model = KGCN( n_users, n_entitys, n_relations,
10, adj_entity, adj_relation,
n_neighbors = n_neighbors,
aggregator_method = aggregator_method,
act_method = act_method,
drop_rate = drop_rate ).to(device)
optimizer = torch.optim.Adam( model.parameters(), lr = lr, weight_decay = weight_decay )
loss_fcn = nn.BCELoss()
dataIter = dataloader4kg.DataIter()
print(len(train_set)//batchSize)
for epoch in range( epochs ):
total_loss = 0.0
for datas in tqdm( dataIter.iter( train_set, batchSize = batchSize ) ):
user_ids = datas[:, 0]
item_ids = datas[:, 1]
labels = datas[:, 2]
user_ids=user_ids.to(device)
item_ids = item_ids.to(device)
labels = labels.to(device)
logits = model.forward( user_ids, item_ids )
loss = loss_fcn( logits, labels.float() )
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
p, r, acc = do_evaluate(model,test_set)
print("Epoch {} | Loss {:.4f} | Precision {:.4f} | Recall {:.4f} | Accuracy {:.4f} "
.format(epoch, total_loss/(len(train_set)//batchSize), p, r, acc))
if __name__ == '__main__':
n_neighbors = 20
users, items, train_set, test_set = dataloader4kg.readRecData(dataloader4kg.Ml_100K.RATING,dataloader4kg.Ml_100K.KG)
entitys, relations, kgTriples = dataloader4kg.readKgData(dataloader4kg.Ml_100K.KG)
adj_kg = dataloader4kg.construct_kg(kgTriples)
adj_entity, adj_relation = dataloader4kg.construct_adj(n_neighbors, adj_kg, len(entitys))
train( epochs = 40, batchSize = 1024, lr = 0.001,
n_users = max( users ) + 1, n_entitys = max( entitys ) + 1,
n_relations = max( relations ) + 1, adj_entity = adj_entity,
adj_relation = adj_relation, train_set = train_set,
test_set = test_set, n_neighbors = n_neighbors,
aggregator_method = 'sum', act_method = F.relu, drop_rate = 0.5 )
This is the code of my model.I have already loaded the model into cuda, and the data has also been loaded into cuda during training. Why is there the problem that it is not on the same device? How should I modify it
I have already loaded the model into cuda, and the data has also been loaded into cuda during training.
Code loaded into gpu
Code loaded into gpu

Multivariate Encoder-Decoder Model approaches value

I am creating my first multivariate multistep encoder-decoder LSTM to forecast revenues.
As you can see, the values move towards a value and then stop at that value. The aim is to create a forecast for a longer period, but there is no deviation at all from this standard value after the first week.
What is wrong and what can I do? To me it doesn't look like it is working at all.
code:
class ModelTrainer:
def __init__(self, prediction_length=30, offset=1):
self.prediction_length = prediction_length
self.offset = offset
self._setup_values()
self.use_scaling = True
self.__prepare_data()
def _setup_values(self):
# Model configuration
self.additional_metrics = ['accuracy']
self.embedding_output_dims = 15
self.max_sequence_length = 300
self.num_distinct_words = 5000
self.verbosity_mode = 1
# DATA
self.WINDOW_LENGTH = 70 # ! SHOULD BE ADJUSTED TO THE AMOUNT OF FORECASTING DAYS
self.SAMPLING_RATE = 1
self.BATCH_SIZE = 128
# MODEL
self.DROPOUT = 0.3
self.NODES_PER_LAYER = 256
self.NUMBER_OF_LAYERS = 3
# TRAINING
self.LEARNING_RATE = 0.001
self.OPTIMIZER = Adam(learning_rate=self.LEARNING_RATE)
self.VALIDATION_SPLIT = 0.20
self.NUMBER_OF_EPOCHS = 10
self.TEST_SIZE = 0.1
self.RANDOM_STATE = 123
self.LOSS_FUNCTION = MeanSquaredError()
def __import_data(self):
self.series = DataOrganizer().df
def __prepare_data(self):
self.__import_data()
self.scaler = preprocessing.MinMaxScaler()
data_scaled = self.scaler.fit_transform(self.series)
self.features, self.target = self._create_feature_target_values_window(
data_scaled)
def _create_feature_target_values_window(self, data):
self.number_of_output_columns = 4
feature_data = data
target_data = data[:, :self.number_of_output_columns]
features, target = list(), list()
in_start = 0
for _ in range(len(data)):
in_end = in_start + self.WINDOW_LENGTH
out_end = in_end + self.prediction_length
if out_end <= len(data):
features.append(feature_data[in_start:in_end, :])
target.append(
target_data[in_end:out_end, 0:self.number_of_output_columns])
in_start += 1
return np.array(features), np.array(target)
def __create_LSTM_model(self):
num_feature_columns = self.features.shape[2]
num_output_columns = self.target.shape[2]
model = Sequential()
model.add(LSTM(self.NODES_PER_LAYER, input_shape=(
self.WINDOW_LENGTH, num_feature_columns)))
model.add(Dropout(self.DROPOUT))
model.add(RepeatVector(self.prediction_length))
model.add(LSTM(self.NODES_PER_LAYER, return_sequences=True))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(self.NODES_PER_LAYER)))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(num_output_columns)))
model.summary()
return model
def train_model(self, callbacks=[]):
model = self.__create_LSTM_model()
model.compile(loss=self.LOSS_FUNCTION,
optimizer=self.OPTIMIZER,
metrics=['accuracy', MeanAbsoluteError()]
)
model.fit(
x=self.features,
y=self.target,
epochs=self.NUMBER_OF_EPOCHS,
validation_split=self.TEST_SIZE,
shuffle=False,
callbacks=callbacks
)
self.model = model
def create_forecast(self):
prediction = self.model.predict(self.features[-1:])
# prediction = self.model.predict(self.features[-30:-29]) # Show forecast from a month old
test_X = self.features.copy()
test_X = test_X[:self.prediction_length,
:1, self.number_of_output_columns:]
test_X = test_X.reshape(
self.prediction_length, self.series.shape[1] - self.number_of_output_columns)
prediction = prediction.reshape(self.prediction_length,
self.number_of_output_columns)
inv_yhat = np.concatenate((prediction, test_X), axis=1)
inv_yhat = self.scaler.inverse_transform(inv_yhat)
prediction_df = pd.DataFrame(
inv_yhat, columns=self.scaler.feature_names_in_)
first_date = self.series.last_valid_index() + timedelta(days=1)
last_date = first_date + timedelta(days=self.prediction_length-1)
days = pd.date_range(first_date, last_date, freq='D')
prediction_df.set_index(days, inplace=True)
prediction_df = prediction_df[self.series.columns[0:4]]
Actual
Forecast:
(I know the x-axis description is incorrect. Don't worry about it)

ViVIT PyTorch: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15

I am trying to run Video Vision Transformer (ViViT) code with my dataset but getting an error using CrossEntropyLoss from Pytorch as the Loss function.
There are 6 classes I have:
['Run', 'Sit', 'Walk', 'Wave', 'Sit', 'Stand']
Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-9, momentum=0.9)
Class Weights
tensor([0.0045, 0.0042, 0.0048, 0.0038, 0.0070, 0.0065])
Loss Function
loss_func = nn.CrossEntropyLoss(weight=class_weights.to(device))
Code Throwning Error
train_epoch(model, optimizer, train_loader, train_loss_history, loss_func)
Error
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
Code Calling the transformer
model = ViViT(224, 16, 100, 16).cuda()
Getting Video Frames
def get_frames(filename, n_frames=1):
frames = []
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = np.linspace(0, v_len - 1, n_frames + 1, dtype=np.int16)
frame_dims = np.array([224, 224, 3])
for fn in range(v_len):
success, frame = v_cap.read()
if success is False:
continue
if (fn in frame_list):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (frame_dims[0], frame_dims[1]))
frames.append(frame)
v_cap.release()
return frames, v_len
Dataset Preprocessing
class DatasetProcessing(data.Dataset):
def __init__(self, df, root_dir):
super(DatasetProcessing, self).__init__()
# List of all videos path
video_list = df["Video"].apply(lambda x: root_dir + '/' + x)
self.video_list = np.asarray(video_list)
self.df = df
def __getitem__(self, index):
# Ensure that the raw videos are in respective folders and folder name matches the output class label
video_label = self.video_list[index].split('/')[-2]
video_name = self.video_list[index].split('/')[-1]
video_frames, len_ = get_frames(self.video_list[index], n_frames = 15)
video_frames = np.asarray(video_frames)
video_frames = video_frames/255
class_list = ['Run', 'Walk', 'Wave', 'Sit', 'Turn', 'Stand']
class_id_loc = np.where(class_list == video_label)
label = class_id_loc
d = torch.as_tensor(np.array(video_frames).astype('float'))
l = torch.as_tensor(np.array(label).astype('float'))
return (d, l)
def __len__(self):
return self.video_list.shape[0]
Training Epochs
def train_epoch(model, optimizer, data_loader, loss_history, loss_func):
total_samples = len(data_loader.dataset)
model.train()
for i, (data, target) in enumerate(data_loader):
optimizer.zero_grad()
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
pred = model(data.float())
output = F.log_softmax(pred, dim=1)
loss = loss_func(output, target.squeeze(1))
loss.backward()
optimizer.step()
if i % 100 == 0:
print('[' + '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)] Loss: ' +
'{:6.4f}'.format(loss.item()))
loss_history.append(loss.item())
Evaluate Model
def evaluate(model, data_loader, loss_history, loss_func):
model.eval()
total_samples = len(data_loader.dataset)
correct_samples = 0
total_loss = 0
with torch.no_grad():
for data, target in data_loader:
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
output = F.log_softmax(model(data.float()), dim=1)
loss = loss_func(output, target)
_, pred = torch.max(output, dim=1)
total_loss += loss.item()
correct_samples += pred.eq(target).sum()
avg_loss = total_loss / total_samples
loss_history.append(avg_loss)
print('\nAverage test loss: ' + '{:.4f}'.format(avg_loss) +
' Accuracy:' + '{:5}'.format(correct_samples) + '/' +
'{:5}'.format(total_samples) + ' (' +
'{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
Transformer
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
super().__init__()
self.layers = nn.ModuleList([])
self.norm = nn.LayerNorm(dim)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
ViViT Code
class ViViT(nn.Module):
def __init__(self, image_size, patch_size, num_classes, num_frames, dim = 192, depth = 4, heads = 3, pool = 'cls', in_channels = 3, dim_head = 64, dropout = 0.,
emb_dropout = 0., scale_dim = 4, ):
super().__init__()
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_size // patch_size) ** 2
patch_dim = in_channels * patch_size ** 2
self.to_patch_embedding = nn.Sequential(
Rearrange('b t c (h p1) (w p2) -> b t (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
nn.Linear(patch_dim, dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_frames, num_patches + 1, dim))
self.space_token = nn.Parameter(torch.randn(1, 1, dim))
self.space_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.temporal_token = nn.Parameter(torch.randn(1, 1, dim))
self.temporal_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.dropout = nn.Dropout(emb_dropout)
self.pool = pool
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
b, t, n, _ = x.shape
cls_space_tokens = repeat(self.space_token, '() n d -> b t n d', b = b, t=t)
x = torch.cat((cls_space_tokens, x), dim=2)
x += self.pos_embedding[:, :, :(n + 1)]
x = self.dropout(x)
x = rearrange(x, 'b t n d -> (b t) n d')
x = self.space_transformer(x)
x = rearrange(x[:, 0], '(b t) ... -> b t ...', b=b)
cls_temporal_tokens = repeat(self.temporal_token, '() n d -> b n d', b=b)
x = torch.cat((cls_temporal_tokens, x), dim=1)
x = self.temporal_transformer(x)
x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
return self.mlp_head(x)
Multi target appears to be a feature supported since version 1.10.0.
https://discuss.pytorch.org/t/crossentropyloss-vs-per-class-probabilities-target/138331
Please check your pytorch version.
Please refer to the example of using the UTF101 top5 dataset, which is available on my Colab. The version of pytorch is 1.12.0+cu113, and the code you listed was able to run the training almost exactly as it was written.

Must the input height of a 1D CNN be constant?

I'm currently doing my honours research project on online/dynamic signature verification. I am using the SVC 2004 dataset (Task 2). I have done the following data processing:
def load_dataset_normalized(path):
file_names = os.listdir(path)
num_of_persons = len(file_names)
initial_starting_point = np.zeros(np.shape([7]))
x_dataset = []
y_dataset = []
for infile in file_names:
full_file_name = os.path.join(path, infile)
file = open(full_file_name, "r")
file_lines = file.readlines()
num_of_points = int(file_lines[0])
x = []
y = []
time_stamp = []
button_status = []
azimuth_angles = []
altitude = []
pressure = []
for idx, line in enumerate(file_lines[1:]):
idx+=1
nums = line.split(' ')
if idx == 1:
nums[2] = 0
initial_starting_point = nums
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(0)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
else:
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(10)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
max_x = max(x)
max_y = max(y)
max_azimuth_angle = max(azimuth_angles)
max_altitude = max(altitude)
max_pressure = max(pressure)
min_x = min(x)
min_y = min(y)
min_azimuth_angle = min(azimuth_angles)
min_altitude = min(altitude)
min_pressure = min(pressure)
#Alignment normalization:
for i in range(num_of_points):
x[i] -= int(initial_starting_point[0])
y[i] -= int(initial_starting_point[1])
azimuth_angles[i] -= int(initial_starting_point[4])
altitude[i] -= int(initial_starting_point[5])
pressure[i] -= int(initial_starting_point[6])
#Size normalization
for i in range(num_of_points):
x[i] = ((x[i] - max_x) / (min_x - max_x))
y[i] = ((y[i] - max_y) / (min_y - max_y))
azimuth_angles[i] = ((azimuth_angles[i] - max_azimuth_angle) / (min_azimuth_angle - max_azimuth_angle))
altitude[i] = ((altitude[i] - max_altitude) / (min_altitude - max_altitude))
pressure[i] = ((pressure[i] - max_pressure) / (min_pressure - max_pressure))
#data points to dataset
x_line = []
for i in range (num_of_points):
x_line.append([x[i], y[i], time_stamp[i], button_status[i], azimuth_angles[i], altitude[i], pressure[i]])
if i == num_of_points-1:
x_dataset.append(x_line)
infile_without_extension = infile.replace('.TXT','')
index_of_s = infile_without_extension.find("S")
index_of_num = index_of_s + 1
sig_ID = int(infile_without_extension[index_of_num:])
if sig_ID < 21:
y_dataset.append([1,0])
else:
y_dataset.append([0,1])
x_dataset = np.asarray(x_dataset)
y_dataset = np.asarray(y_dataset)
return x_dataset, y_dataset
I also have another method that takes the values as they are in the text file and created an "original" dataset.
Now, the aim of my research is to create a CRNN (convolutional recurrent neural network) that can identify if a signature is authentic or forged. Here is the code for the model:
class crnn_model:
def __init__(self, trainX, trainy, testX, testy, optimizer_method):
self.trainX = trainX
self.trainy = trainy
self.testX = testX
self.testy = testy
self.evaluate_model(optimizer_method)
def evaluate_model(self, optimizer_method):
verbose, epochs, batch_size = 0, 40, 10
n_timesteps, n_features, n_outputs = len(self.trainX), 7, 2
print(n_timesteps)
model = keras.Sequential()
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), use_bias=True))
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.LSTM(2, input_shape=[30592,1], return_sequences=True))
model.summary()
# Compile the model
model.compile(optimizer=optimizer_method, loss='categorical_crossentropy', metrics=['accuracy'])
#fit model
model.fit(self.trainX, self.trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(self.testX, self.testy, batch_size=batch_size, verbose=0)
return accuracy
Here is the problem I am having: the number of points used to store each signature is different, hence making the input height of the input matrix vary from one signature to the next. Must I now force the dataset to some uniform/constant number of points?
Much appreciated for your time.

DMN neural network with poor validation results -- only 50%

I have this problem with my Neural Network. I'm trying to implement what's called a DMN (Dynamic Memory Network) for the babi data set. A paper about the DMN model can be found here: http://arxiv.org/abs/1506.07285 Another paper about DMNs can be found here: https://yerevann.github.io/2016/02/05/implementing-dynamic-memory-networks/
Here's my problem. btw I'm using PyTorch.
I split the training and testing data into parts for training, testing, and validation. I use 1000 parts for training, 500 parts for testing and 500 parts for validation. I run into a problem. I can train successfully but when I go to the validation step I never get a score above 50% accuracy. With the babi data set it is documented that you should be able to get 100% accuracy with the first test set. (There are 20 test sets in all). I can get 100% accuracy during training, but only 50% in validation. My question to you is what part of the program would be responsible for this kind of behavior? In other words, can you tell me why I'm always getting 50% ?? Thanks for your time. I'm limiting my experiments to the first babi test for now.
I thought I had this all figured out but my problem has cropped up again. I really don't have a clue what it is. Here is a link to the code. If you could take a look I would be most grateful. https://github.com/radiodee1/awesome-chatbot/blob/master/model/babi_iv.py
Some code is included below.
class WrapMemRNN(nn.Module):
def __init__(self,vocab_size, embed_dim, hidden_size, n_layers, dropout=0.3, do_babi=True, bad_token_lst=[], freeze_embedding=False, embedding=None, print_to_screen=False):
super(WrapMemRNN, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.do_babi = do_babi
self.print_to_screen = print_to_screen
self.bad_token_lst = bad_token_lst
self.embedding = embedding
self.freeze_embedding = freeze_embedding
self.teacher_forcing_ratio = hparams['teacher_forcing_ratio']
gru_dropout = dropout * 0
self.model_1_enc = Encoder(vocab_size, embed_dim, hidden_size, n_layers, dropout=dropout,embedding=embedding, bidirectional=False)
self.model_2_enc = Encoder(vocab_size, embed_dim, hidden_size, n_layers, dropout=gru_dropout, embedding=embedding, bidirectional=False)
self.model_3_mem_a = MemRNN(hidden_size, dropout=gru_dropout)
self.model_3_mem_b = MemRNN(hidden_size, dropout=gru_dropout)
self.model_4_att = EpisodicAttn(hidden_size, dropout=gru_dropout)
self.model_5_ans = AnswerModule(vocab_size, hidden_size,dropout=dropout)
self.input_var = None # for input
self.q_var = None # for question
self.answer_var = None # for answer
self.q_q = None # extra question
self.inp_c = None # extra input
self.inp_c_seq = None
self.all_mem = None
self.last_mem = None # output of mem unit
self.prediction = None # final single word prediction
self.memory_hops = hparams['babi_memory_hops']
self.reset_parameters()
if self.freeze_embedding or self.embedding is not None:
self.new_freeze_embedding()
#self.criterion = nn.CrossEntropyLoss()
pass
def reset_parameters(self):
#print('reset')
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
#print('here...')
weight.data.uniform_(-stdv, stdv)
if len(weight.size()) > 1:
init.xavier_normal_(weight)
def forward(self, input_variable, question_variable, target_variable, criterion=None):
self.new_input_module(input_variable, question_variable)
self.new_episodic_module()
outputs, ans = self.new_answer_module_simple()
return outputs, None, ans, None
def new_freeze_embedding(self):
self.model_1_enc.embed.weight.requires_grad = False
self.model_2_enc.embed.weight.requires_grad = False
print('freeze embedding')
pass
def new_input_module(self, input_variable, question_variable):
prev_h1 = []
for ii in input_variable:
ii = self.prune_tensor(ii, 2)
out1, hidden1 = self.model_1_enc(ii, None)
prev_h1.append(hidden1)
self.inp_c_seq = prev_h1
self.inp_c = prev_h1[-1]
prev_h2 = []
for ii in question_variable:
ii = self.prune_tensor(ii, 2)
out2, hidden2 = self.model_2_enc(ii, None)
prev_h2.append(hidden2)
self.q_q = hidden2[:,-1,:]
return
def new_episodic_module(self):
if True:
mem_list = []
sequences = self.inp_c_seq
for i in range(len(sequences)):
m_list = [self.q_q.clone()]
#print(sequences[i].size(),'seq')
for iter in range(self.memory_hops):
x = self.new_attention_step(sequences[i], None, m_list[iter], self.q_q)
if self.print_to_screen and not self.training:
print(x,'x -- after', len(x), sequences[i].size())
e, _ = self.new_episode_small_step(sequences[i], x.permute(1,0), None)
assert len(sequences[i].size()) == 3
#print(e.size(),'e')
ee = e[:, 0, -1]#.permute(2,1,0)
_, out = self.model_3_mem_a(ee.unsqueeze(0), self.prune_tensor(m_list[iter], 3))
m_list.append(out)
mem_list.append(m_list[self.memory_hops])
mm_list = torch.cat(mem_list, dim=1)
self.last_mem = mm_list
#print(self.last_mem.size(),'lm')
return None
def new_episode_small_step(self, ct, g, prev_h):
assert len(ct.size()) == 3
bat, sen, emb = ct.size()
#print(ct.size(),'ct')
#print(sen,'sen', g.size())
last = [prev_h]
ep = []
for iii in range(sen):
c = ct[0,iii,:].unsqueeze(0)
if prev_h is not None:
prev_h = self.prune_tensor(prev_h, 3)
out, gru = self.model_3_mem_b(c, last[iii] )
last.append(out)
g = g.squeeze(0)
gru = gru.squeeze(0).permute(1,0)
#if not self.training: print(g.size(),'g', iii)
#ggg = g[:, iii]
ggg = g[iii]
h = torch.mul(ggg , gru)# + torch.mul((1 - g[iii]) , prev_h.permute(1,0))
index = -1 #-1 # -2
if last[iii + index] is not None:
#print(last[iii].size(),'last -',ggg.size(), ggg, sen)
if False: h = h + torch.mul((1 - ggg), last[iii + index])
#print(h.size(),'hsize')
if iii == sen - 1 : ep.append(h.unsqueeze(1))
h = torch.cat(ep, dim=1)
#print(h.size(),ep[0].size(),'h',sen, gru.size())
return h, gru
def new_attention_step(self, ct, prev_g, mem, q_q):
q_q = self.prune_tensor(q_q,3)
mem = self.prune_tensor(mem,3)
assert len(ct.size()) == 3
bat, sen, emb = ct.size()
#print(sen,'len sen')
att = []
for iii in range(sen):
c = ct[0,iii,:]
concat_list = [
c.unsqueeze(0),
mem.squeeze(0),
q_q.squeeze(0),
(c * q_q).squeeze(0),
(c * mem).squeeze(0),
(torch.abs(c - q_q) ).squeeze(0),
(torch.abs(c - mem) ).squeeze(0)
]
#for ii in concat_list: print(ii.size())
#print(sen,'sen')
#exit()
#z = F.sigmoid(z)
concat_list = torch.cat(concat_list, dim=1)
#print(concat_list.size(),'cl')
att.append(concat_list)
att = torch.cat(att, dim=0)
#z = torch.cat(att, dim=0)
z = self.model_4_att(att)
z = F.sigmoid(z)
#z = F.softmax(z, dim=1) #F.sigmoid(z)
#print(z.size(),'z')
return z
def prune_tensor(self, input, size):
if len(input.size()) < size:
input = input.unsqueeze(0)
if len(input.size()) > size:
input = input.squeeze(0)
return input
def new_answer_module_simple(self):
#outputs
ansx = self.model_5_ans(self.last_mem, None)
#ansx = F.softmax(ansx, dim=0)
if self.print_to_screen:
print(ansx, 'ansx printed')
print(ansx.size(), 'ansx')
vocab, sen = ansx.size()
aa = torch.argmax(ansx, dim=0)
print(aa.size(),'aa')
for i in range(sen):
zz = aa[i]
z = ansx[:, i]
a = torch.argmax(z, dim=0)
print(a.item(), zz.item())
print('----')
#ans = torch.argmax(ansx,dim=1)#[0]
return [None], ansx
pass

Resources