When I am training a model, I should use only 10% of data for trainer.fit(model,datamodule)
so I should call DataModule just for 10% of data
Part of DataModule is:
class DataModule(pl.LightningDataModule):
def __init__(self, train_dataset, val_dataset, batch_size = 1):
super(DataModule, self).__init__()
self.train_dataset = train_dataset
self.val_dataset = val_dataset
self.batch_size = batch_size
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = True, num_workers = 2, pin_memory = True)
def val_dataloader(self):
return DataLoader(self.val_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = False, num_workers = 2, pin_memory = True)
So I use a for loop
datamodule = DataModule(train_ds, val_ds)
for i,data in enumerate(datamodule.train_dataloader()):
print( datamodule.train_dataloader(i,data))
But it doesn't work. How can I change it?
I have a question about getting all parameters of the network. My network is defined as follow:
activation = nn.ReLU()
class OneInputBasis(nn.Module):
def __init__(self):
super().__init__()
bo_b = True
bo_last = False
self.l1 = nn.Linear(200, 100, bias = bo_b).to(device)
self.l4 = nn.Linear(100, 100, bias = bo_last).to(device)
def forward(self, v):
v = activation ( self.l1(v) )
v = ( self.l4(v) )
return v
and
class node(nn.Module):
def __init__(self):
super().__init__()
bo_b = True
bo_last = False
self.set_lay = []
for jj in range(dim_output_space_basis):
self.set_lay.append(OneInputBasis())
def forward(self, v):
w = self.set_lay[0](v)
for ii in range(dim_output_space_basis-1):
w = torch.cat((w, self.set_lay[ii+1](v)), dim = 1 )
return w
and
class mesh(nn.Module):
def __init__(self):
super().__init__()
bo_b = True
bo_last = False
self.l3 = nn.Linear(2, 100, bias = bo_b).to(device)
self.l4 = nn.Linear(100, 100, bias = bo_b).to(device)
self.l7 = nn.Linear(100,10, bias = bo_last).to(device)
def forward(self, w):
w = activation ( self.l3(w) )
w = activation ( self.l4(w) )
w = ( self.l7(w) )
return w
finally, I have
activation = nn.ReLU()
class Test(nn.Module):
def __init__(self):
super().__init__()
bo_b = True
bo_last = False
self.top = node()
self.bottom = mesh()
def forward(self, v, w, y):
v = self.top(v)
w = self.bottom(w)
e = torch.bmm(w ,torch.bmm(v, y))
return e[:, :, 0]
Now I define the network:
fnn_adam = Test()
When I print the parameters of the network, as
for p in fnn_adam.parameters():
print(p)
I can only see the parameters associated with fnn_adam.bottom, how can I print out the parameters associated with fnn_adam.top? Are the parameters associated with .top trainable? Thank you!
Calling self.set_lay.append(OneInputBasis()) with the instantiation of node does not register the fully-connected layers
self.l1 = nn.Linear(200, 100, bias = bo_b).to(device)
self.l4 = nn.Linear(100, 100, bias = bo_last).to(device)
to the instance fnn_adam of class Test. This is why the respective parameters do not show up in your code above.
Without loss of generality, I chose
import torch
import torch.nn as nn
import torch.nn.functional as F
dim_output_space_basis = 2
device ='cpu'
and modified the init method of class node. The remainder of your code is perfectly fine. Please see below:
class node(nn.Module):
def __init__(self):
super().__init__()
bo_b = True
bo_last = False
# self.set_lay = [] # Legacy
attributeNames = ['l_btm{}'.format(i) for i in range(dim_output_space_basis)]
for jj_index, jj in enumerate(range(dim_output_space_basis)):
# self.set_lay.append(OneInputBasis()) # Legacy
setattr(self, attributeNames[jj_index], OneInputBasis())
Now, the parameters register as evidenced by running fnn_adam._modules and observing its output
OrderedDict([('top',
node(
(l_btm0): OneInputBasis(
(l1): Linear(in_features=200, out_features=100, bias=True)
(l4): Linear(in_features=100, out_features=100, bias=False)
)
(l_btm1): OneInputBasis(
(l1): Linear(in_features=200, out_features=100, bias=True)
(l4): Linear(in_features=100, out_features=100, bias=False)
)
)),
('bottom',
mesh(
(l3): Linear(in_features=2, out_features=100, bias=True)
(l4): Linear(in_features=100, out_features=100, bias=True)
(l7): Linear(in_features=100, out_features=10, bias=False)
))])
I have made a custom model in tensorflow 2, which uses eager execution.
The model is trained using the inherited .fit() function, about 600k training samples are used in a 10 epoch cycle with a batch size of 128 (up to 8k batch has been done). After training the model is saved as a SavedModel format. This is then used in C++ by using the cppflow library. However, this process requires the inference to use the same batch size as the training of the model, while only requiring to do inference on a single sample at a time. The application requires that things are fast and padding a feature vector array with 127 dummy vectors is slowing everyting down.
The batch size is also used in the NormalizeLayer at the end, which is using a hardcoded units value at the moment to initialize a matrix.
I have searched for a way to use variable batch sizes in Tensorflow 2 custom models, but the only thing that is remotely close are TF1 examples; which are so outdated they are unusable.
My model:
class IndividualFeaturesLayer(tf.keras.layers.Layer):
def __init__(self):
super(IndividualFeaturesLayer, self).__init__()
def build(self, input_shape):
stddev = 2 / np.sqrt(input_shape[-1] + input_shape[-1])
self.w = tf.Variable(tf.random.truncated_normal((input_shape[-1], input_shape[-1]), dtype='float64'), trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(input_shape[-1]), dtype='float64'), trainable=True)
def call(self, input):
returnVar = tf.math.add(tf.matmul(input, self.w), self.b)
return returnVar
class FullFeatureLayer(tf.keras.layers.Layer):
def __init__(self):
super(FullFeatureLayer, self).__init__()
self.globalFeatures = IndividualFeaturesLayer()
self.pieceFeatures = IndividualFeaturesLayer()
self.squareFeatures = IndividualFeaturesLayer()
def call(self, input):
globalFeature = input[:, :17]
pieceFeature = input[:, 17:225]
squareFeature = input[:, 225:353]
x = self.globalFeatures(globalFeature)
y = self.pieceFeatures(pieceFeature)
z = self.squareFeatures(squareFeature)
returnVar = tf.concat([x, y, z], 1)
return tf.nn.relu(returnVar)
class FullFullyConnectedFeatureLayer(tf.keras.layers.Layer):
def __init__(self):
super(FullFullyConnectedFeatureLayer, self).__init__()
def build(self, input_shape):
stddev = 2 / np.sqrt(input_shape[-1] + input_shape[-1])
self.w = tf.Variable(tf.random.truncated_normal((input_shape[-1], input_shape[-1]), dtype='float64'), trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(input_shape[-1]), dtype='float64'), trainable=True)
def call(self, input):
return tf.nn.relu(tf.math.add(tf.matmul(input, self.w), self.b))
class FullFullyConnectedOutputLayer(tf.keras.layers.Layer):
def __init__(self):
super(FullFullyConnectedOutputLayer, self).__init__()
def build(self, input_shape):
stddev = 2 / np.sqrt(input_shape[-1] + 1)
self.w = tf.Variable(tf.random.truncated_normal((input_shape[-1], 1), dtype='float64'), trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(1), dtype='float64'), trainable=True)
def call(self, input):
return tf.matmul(input, self.w) + self.b
class NormalizeLayer(tf.keras.layers.Layer):
def __init__(self, units=128):
super(NormalizeLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.divideTensor = tf.fill((self.units, 1), tf.constant(1500, dtype='float64'))
self.minTensor = tf.fill((self.units, 1), tf.constant(-1, dtype='float64'))
self.maxTensor = tf.fill((self.units, 1), tf.constant(1, dtype='float64'))
def call(self, input):
dividedTensor = tf.divide(input, self.divideTensor)
minimizedTensor = tf.math.minimum(dividedTensor, self.maxTensor)
maximizedTensor = tf.math.maximum(minimizedTensor, self.minTensor)
return maximizedTensor
class FullNetwork(tf.keras.Model):
def __init__(self, batch_size):
super(FullNetwork, self).__init__(name='')
self.inputLayer = FullFeatureLayer()
self.hiddenLayer1 = FullFeatureLayer()
self.hiddenLayer2 = FullFullyConnectedFeatureLayer()
self.outputLayer = FullFullyConnectedOutputLayer()
self.normalizeLayer = NormalizeLayer()
def call(self, input, batch_size):
print(batch_size)
x = self.inputLayer(input)
x = self.hiddenLayer1(x)
x = self.hiddenLayer2(x)
x = self.outputLayer(x)
x = self.normalizeLayer(x)
return x
tf.keras.backend.set_floatx('float64')
fullNetwork = FullNetwork()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
fullNetwork.compile(optimizer, loss=tf.keras.losses.MeanSquaredError(), metrics=["MeanAbsoluteError"], run_eagerly=True)
fullNetwork.fit(training_feature_array, training_score_array, epochs=10, batch_size=128)
I'm trying to calculate the gradient with tf.GradientTape. When I try to do it using as inputs the loss and Model.trainable_weights (tf.keras.Model) the result that returns me in an array of None. what am I doing wrong? The tensorflow version I use is 1.13.0.
The implemneted algorithm is a OnPolicy DQN(Not usual DQN) so that I don't use a target network(whihch is used as behavioural network in conventional DQN code). So, I wanted to differentiate the Error, which is defined as a minibatch MSE of Y(which is R + gamma * max_a Q(s', a')) and Q(s,a) in the code below.
import gym
import numpy as np
import tensorflow as tf
from collections import deque
# ==== import below from my repo ====
from common.wrappers import MyWrapper # just a wrapper to set a reward at the terminal state -1
from common.params import Parameters # params for training
from common.memory import ReplayBuffer # Experience Replay Buffer
tf.enable_eager_execution()
class Model(tf.keras.Model):
def __init__(self, num_action):
super(Model, self).__init__()
self.dense1 = tf.keras.layers.Dense(16, activation='relu')
self.dense2 = tf.keras.layers.Dense(16, activation='relu')
self.dense3 = tf.keras.layers.Dense(16, activation='relu')
self.pred = tf.keras.layers.Dense(num_action, activation='softmax')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
pred = self.pred(x)
return pred
class DQN:
"""
On policy DQN
"""
def __init__(self, num_action):
self.num_action = num_action
self.model = Model(num_action)
self.optimizer = tf.train.AdamOptimizer()
def predict(self, state):
return self.model(tf.convert_to_tensor(state[None, :], dtype=tf.float32)).numpy()[0]
def update(self, state, action, target):
# target: R + gamma * Q(s',a')
# calculate Q(s,a)
q_values = self.predict(state)
actions_one_hot = tf.one_hot(action, self.num_action, 1.0, 0.0)
action_probs = tf.reduce_sum(actions_one_hot * q_values, reduction_indices=-1)
# Minibatch MSE => (1/batch_size) * (R + gamma * Q(s',a') - Q(s,a))^2
loss = tf.reduce_mean(tf.squared_difference(target, action_probs))
return loss
if __name__ == '__main__':
reward_buffer = deque(maxlen=5)
env = MyWrapper(gym.make("CartPole-v0"))
replay_buffer = ReplayBuffer(5000)
params = Parameters(mode="CartPole")
agent = DQN(env.action_space.n)
for i in range(2000):
state = env.reset()
total_reward = 0
for t in range(210):
# env.render()
action = np.argmax(agent.predict(state)) # behave greedily
next_state, reward, done, info = env.step(action)
replay_buffer.add(state, action, reward, next_state, done)
total_reward += reward
state = next_state
if done:
print("Episode {0} finished after {1} timesteps".format(i, t + 1))
if i > 10:
print("Update")
with tf.GradientTape() as tape:
states, actions, rewards, next_states, dones = replay_buffer.sample(params.batch_size)
next_Q = agent.predict(next_states)
Y = rewards + params.gamma * np.max(next_Q, axis=1) * np.logical_not(dones)
loss = agent.update(states, actions, Y)
print(loss)
grads = tape.gradient(loss, agent.model.trainable_weights)
# ==== THIS RETURNS ONLY NONE ====
print(grads)
agent.optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))
break
# store the episode reward
reward_buffer.append(total_reward)
# check the stopping condition
if np.mean(reward_buffer) > 195:
print("GAME OVER!!")
break
env.close()
import gym
import numpy as np
import tensorflow as tf
from collections import deque
# ==== import below from my repo ====
from common.wrappers import MyWrapper # just a wrapper to set a reward at the terminal state -1
from common.params import Parameters # params for training
from common.memory import ReplayBuffer # Experience Replay Buffer
tf.enable_eager_execution()
class Model(tf.keras.Model):
def __init__(self, num_action):
super(Model, self).__init__()
self.dense1 = tf.keras.layers.Dense(16, activation='relu')
self.dense2 = tf.keras.layers.Dense(16, activation='relu')
self.dense3 = tf.keras.layers.Dense(16, activation='relu')
self.pred = tf.keras.layers.Dense(num_action, activation='softmax')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
pred = self.pred(x)
return pred
class DQN:
"""
On policy DQN
"""
def __init__(self, num_action):
self.num_action = num_action
self.model = Model(num_action)
self.optimizer = tf.train.AdamOptimizer()
def predict(self, state):
return self.model(tf.convert_to_tensor(state[None, :], dtype=tf.float32)).numpy()[0]
def update(self, state, action, target):
# target: R + gamma * Q(s',a')
# calculate Q(s,a)
q_values = self.predict(state)
actions_one_hot = tf.one_hot(action, self.num_action, 1.0, 0.0)
action_probs = tf.reduce_sum(actions_one_hot * q_values, reduction_indices=-1)
# Minibatch MSE => (1/batch_size) * (R + gamma * Q(s',a') - Q(s,a))^2
loss = tf.reduce_mean(tf.squared_difference(target, action_probs))
return loss
if __name__ == '__main__':
reward_buffer = deque(maxlen=5)
env = MyWrapper(gym.make("CartPole-v0"))
replay_buffer = ReplayBuffer(5000)
params = Parameters(mode="CartPole")
agent = DQN(env.action_space.n)
for i in range(2000):
state = env.reset()
total_reward = 0
for t in range(210):
# env.render()
action = np.argmax(agent.predict(state)) # behave greedily
next_state, reward, done, info = env.step(action)
replay_buffer.add(state, action, reward, next_state, done)
total_reward += reward
state = next_state
if done:
print("Episode {0} finished after {1} timesteps".format(i, t + 1))
if i > 10:
print("Update")
with tf.GradientTape() as tape:
states, actions, rewards, next_states, dones = replay_buffer.sample(params.batch_size)
next_Q = agent.predict(next_states)
Y = rewards + params.gamma * np.max(next_Q, axis=1) * np.logical_not(dones)
loss = agent.update(states, actions, Y)
print(loss)
grads = tape.gradient(loss, agent.model.trainable_weights)
# ==== THIS RETURNS ONLY NONE ====
print(grads)
agent.optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))
break
# store the episode reward
reward_buffer.append(total_reward)
# check the stopping condition
if np.mean(reward_buffer) > 195:
print("GAME OVER!!")
break
env.close()
import gym
import numpy as np
import tensorflow as tf
from collections import deque
# ==== import below from my repo ====
from common.wrappers import MyWrapper # just a wrapper to set a reward at the terminal state -1
from common.params import Parameters # params for training
from common.memory import ReplayBuffer # Experience Replay Buffer
tf.enable_eager_execution()
class Model(tf.keras.Model):
def __init__(self, num_action):
super(Model, self).__init__()
self.dense1 = tf.keras.layers.Dense(16, activation='relu')
self.dense2 = tf.keras.layers.Dense(16, activation='relu')
self.dense3 = tf.keras.layers.Dense(16, activation='relu')
self.pred = tf.keras.layers.Dense(num_action, activation='softmax')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
pred = self.pred(x)
return pred
class DQN:
"""
On policy DQN
"""
def __init__(self, num_action):
self.num_action = num_action
self.model = Model(num_action)
self.optimizer = tf.train.AdamOptimizer()
def predict(self, state):
return self.model(tf.convert_to_tensor(state[None, :], dtype=tf.float32)).numpy()[0]
def update(self, state, action, target):
# target: R + gamma * Q(s',a')
# calculate Q(s,a)
q_values = self.predict(state)
actions_one_hot = tf.one_hot(action, self.num_action, 1.0, 0.0)
action_probs = tf.reduce_sum(actions_one_hot * q_values, reduction_indices=-1)
# Minibatch MSE => (1/batch_size) * (R + gamma * Q(s',a') - Q(s,a))^2
loss = tf.reduce_mean(tf.squared_difference(target, action_probs))
return loss
if __name__ == '__main__':
reward_buffer = deque(maxlen=5)
env = MyWrapper(gym.make("CartPole-v0"))
replay_buffer = ReplayBuffer(5000)
params = Parameters(mode="CartPole")
agent = DQN(env.action_space.n)
for i in range(2000):
state = env.reset()
total_reward = 0
for t in range(210):
# env.render()
action = np.argmax(agent.predict(state)) # behave greedily
next_state, reward, done, info = env.step(action)
replay_buffer.add(state, action, reward, next_state, done)
total_reward += reward
state = next_state
if done:
print("Episode {0} finished after {1} timesteps".format(i, t + 1))
if i > 10:
print("Update")
with tf.GradientTape() as tape:
states, actions, rewards, next_states, dones = replay_buffer.sample(params.batch_size)
next_Q = agent.predict(next_states)
Y = rewards + params.gamma * np.max(next_Q, axis=1) * np.logical_not(dones)
loss = agent.update(states, actions, Y)
print(loss)
grads = tape.gradient(loss, agent.model.trainable_weights)
# ==== THIS RETURNS ONLY NONE ====
print(grads)
agent.optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))
break
# store the episode reward
reward_buffer.append(total_reward)
# check the stopping condition
if np.mean(reward_buffer) > 195:
print("GAME OVER!!")
break
env.close()
import gym
import numpy as np
import tensorflow as tf
from collections import deque
# ==== import below from my repo ====
from common.wrappers import MyWrapper # just a wrapper to set a reward at the terminal state -1
from common.params import Parameters # params for training
from common.memory import ReplayBuffer # Experience Replay Buffer
tf.enable_eager_execution()
class Model(tf.keras.Model):
def __init__(self, num_action):
super(Model, self).__init__()
self.dense1 = tf.keras.layers.Dense(16, activation='relu')
self.dense2 = tf.keras.layers.Dense(16, activation='relu')
self.dense3 = tf.keras.layers.Dense(16, activation='relu')
self.pred = tf.keras.layers.Dense(num_action, activation='softmax')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
pred = self.pred(x)
return pred
class DQN:
"""
On policy DQN
"""
def __init__(self, num_action):
self.num_action = num_action
self.model = Model(num_action)
self.optimizer = tf.train.AdamOptimizer()
def predict(self, state):
return self.model(tf.convert_to_tensor(state[None, :], dtype=tf.float32)).numpy()[0]
def update(self, state, action, target):
# target: R + gamma * Q(s',a')
# calculate Q(s,a)
q_values = self.predict(state)
actions_one_hot = tf.one_hot(action, self.num_action, 1.0, 0.0)
action_probs = tf.reduce_sum(actions_one_hot * q_values, reduction_indices=-1)
# Minibatch MSE => (1/batch_size) * (R + gamma * Q(s',a') - Q(s,a))^2
loss = tf.reduce_mean(tf.squared_difference(target, action_probs))
return loss
if __name__ == '__main__':
reward_buffer = deque(maxlen=5)
env = MyWrapper(gym.make("CartPole-v0"))
replay_buffer = ReplayBuffer(5000)
params = Parameters(mode="CartPole")
agent = DQN(env.action_space.n)
for i in range(2000):
state = env.reset()
total_reward = 0
for t in range(210):
# env.render()
action = np.argmax(agent.predict(state)) # behave greedily
next_state, reward, done, info = env.step(action)
replay_buffer.add(state, action, reward, next_state, done)
total_reward += reward
state = next_state
if done:
print("Episode {0} finished after {1} timesteps".format(i, t + 1))
if i > 10:
print("Update")
with tf.GradientTape() as tape:
states, actions, rewards, next_states, dones = replay_buffer.sample(params.batch_size)
next_Q = agent.predict(next_states)
Y = rewards + params.gamma * np.max(next_Q, axis=1) * np.logical_not(dones)
loss = agent.update(states, actions, Y)
print(loss)
grads = tape.gradient(loss, agent.model.trainable_weights)
# ==== THIS RETURNS ONLY NONE ====
print(grads)
agent.optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))
break
# store the episode reward
reward_buffer.append(total_reward)
# check the stopping condition
if np.mean(reward_buffer) > 195:
print("GAME OVER!!")
break
env.close()
Try to change your update function to:
def update(self, state, action, target):
# target: R + gamma * Q(s',a')
# calculate Q(s,a)
q_values = self.model(tf.convert_to_tensor(state[None, :], dtype=tf.float32))
actions_one_hot = tf.one_hot(action, self.num_action, 1.0, 0.0)
action_probs = tf.reduce_sum(actions_one_hot * q_values, reduction_indices=-1)
# Minibatch MSE => (1/batch_size) * (R + gamma * Q(s',a') - Q(s,a))^2
loss = tf.reduce_mean(tf.squared_difference(target, action_probs))
return loss
I think with the .numpy() call in the predict function the tape loses the refererence to the weights. (I've not tested my answer)