Training DQN Agent with Multidiscrete action space in gym - openai-gym

I would like to train a DQN Agent with Keras-rl. My environment has both multi-discrete action and observation spaces. I am adapting the code of this video: https://www.youtube.com/watch?v=bD6V3rcr_54&t=5s
Then, I am sharing my code
class ShowerEnv(Env):
def __init__(self, max_machine_states_vec, production_rates_vec, production_threshold, scheduling_horizon, operations_horizon = 100):
"""
Returns:
self.action_space is a vector with the maximum production rate fro each machine, a binary call-to-maintenance and a binary call-to-schedule
"""
num_machines = len(max_machine_states_vec)
assert len(max_machine_states_vec) == len(production_rates_vec), "Machine states and production rates have different cardinality"
# Actions we can take, down, stay, up
self.action_space = MultiDiscrete(production_rates_vec + num_machines*[2] + [2]) ### Action space is the production rate from 0 to N and the choice of scheduling
# Temperature array
self.observation_space = MultiDiscrete(max_machine_states_vec + [scheduling_horizon+2]) ### Observation space is the 0,...,L for each machine + the scheduling state including "ns" (None = "ns")
# Set start temp
Code going on...
.
.
.
.
def build_model(states, actions):
actions_number = reduce(lambda a,b: a*b, env.action_space.nvec)
model = Sequential()
model.add(Dense(24, activation='relu', input_shape= (1, states[0]) ))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions_number, activation='linear'))
return model
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
.
.
.
.
states = env.observation_space.shape
actions_number = reduce(lambda a,b: a*b, env.action_space.nvec)
model = build_model(states, actions)
model.summary()
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
After initializing with 2 elements, so 5 actions, I get the following error:
ValueError: Model output "Tensor("dense_2/BiasAdd:0", shape=(None, 1, 32), dtype=float32)" has invalid shape. DQN expects a model that has one dimension for each action, in this case [2 2 2 2 2]
How can I solve this. I am quite sure because I do not fully understand how to adapt the code in the video to a MultiDiscrete action space.
Thanks :)

I had the same problem, unfortunately it's impossible to use gym.spaces.MultiDiscrete with the DQNAgent in Keras-rl.
Solution:
Use the library stable-baselines3 and use the A2C agent. It's very easy to implement it.

Related

Custom activation function dependant on other output nodes in Keras

I would like to predict a multi-dimensional array using Long Short-Term Memory (LSTM) networks while imposing restrictions on the shape of the surface of interest.
I thought to accomplish this by setting some elements of the output (regions of the surface) in a functional relationship to others (simple scaling conditions).
Is it possible to set such custom activation functions for the output, whose argument are other output nodes, in Keras?
If not, is there any other interface that allows this? Do you have any source to a manual?
The keras-team on the GitHub answered the question about how to make a custom activation function.
There also is a question with a code with a custom activation function.
These pages may help you!
Additional comment
These pages were not enough for this question so I add the comment below;
Maybe PyTorch is better for customization than Keras. I tried to write such a network, though it is a very simple one, based on PyTorch tutorials and "Extending PyTorch with Custom Activation Functions"
I made a custom activation function in which the 1-th(counting from 0) elements of the output vector are equal to twice the 0-th elements. A very simple network with one layer was used for the training. After training, I checked that the condition was satisfied.
import torch
import matplotlib.pyplot as plt
# Define the custom activation function
# reference: https://towardsdatascience.com/extending-pytorch-with-custom-activation-functions-2d8b065ef2fa
def silu(input):
input[:,1] = input[:,0] * 2
return input
class SiLU(torch.nn.Module):
def __init__(self):
super().__init__() # init the base class
def forward(self, input):
return silu(input) # simply apply already implemented SiLU
# Training
# reference: https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
k = 10
x = torch.rand([k,3])
y = x * 2
model = torch.nn.Sequential(
torch.nn.Linear(3, 3),
SiLU() # custom activation function
)
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-3
for t in range(2000):
y_pred = model(x)
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
model.zero_grad()
loss.backward()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
# check the behaviour
yy = model(x) # predicted
print('ground truth')
print(y)
print('predicted')
print(yy)
# examples for the first five data
colorlist = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00']
plt.figure()
for i in range(5):
plt.plot(y[i,:].detach().numpy(), linestyle = "solid", label = "ground truth_" + str(i), color=colorlist[i])
plt.plot(yy[i,:].detach().numpy(), linestyle = "dotted", label = "predicted_" + str(i), color=colorlist[i])
plt.legend()
# check if the custom activation works correctly
plt.figure()
plt.plot(yy[:,0].detach().numpy()*2, label = '0th * 2')
plt.plot(yy[:,1].detach().numpy(), label = '1th')
plt.legend()
print(yy[:,0]*2)
print(yy[:,1])

module 'tensorflow' has no attribute 'random_uniform'

I tried to perform some deep learning application and got a module 'tensorflow' has no attribute 'random_uniform' error. On CPU the code works fine but it is really slow. In order to run the code on GPU i needed to change some definitions. Here is my code below. Any ideas?
def CapsNet(input_shape, n_class, routings):
x = tf.keras.layers.Input(shape=input_shape)
# Layer 1: Just a conventional Conv2D layer
conv1 = tf.keras.layers.Convolution2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv1')(x)
# Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule]
primarycaps = PrimaryCap(conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid')
# Layer 3: Capsule layer. Routing algorithm works here.
digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings,
name='digitcaps')(primarycaps)
# Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
# If using tensorflow, this will not be necessary. :)
out_caps = Length(name='capsnet')(digitcaps)
# Decoder network.
y = tf.keras.layers.Input(shape=(n_class,))
masked_by_y = Mask()([digitcaps, y]) # The true label is used to mask the output of capsule layer. For training
masked = Mask()(digitcaps) # Mask using the capsule with maximal length. For prediction
# Shared Decoder model in training and prediction
decoder = tf.keras.models.Sequential(name='decoder')
decoder.add(tf.keras.layers.Dense(512, activation='relu', input_dim=16*n_class))
decoder.add(tf.keras.layers.Dense(1024, activation='relu'))
decoder.add(tf.keras.layers.Dense(np.prod(input_shape), activation='sigmoid'))
decoder.add(tf.keras.layers.Reshape(target_shape=input_shape, name='out_recon'))
# Models for training and evaluation (prediction)
train_model = tf.keras.models.Model([x, y], [out_caps, decoder(masked_by_y)])
eval_model = tf.keras.models.Model(x, [out_caps, decoder(masked)])
# manipulate model
noise = tf.keras.layers.Input(shape=(n_class, 16))
noised_digitcaps = tf.keras.layers.Add()([digitcaps, noise])
masked_noised_y = Mask()([noised_digitcaps, y])
manipulate_model = tf.keras.models.Model([x, y, noise], decoder(masked_noised_y))
return train_model, eval_model, manipulate_model
def margin_loss(y_true, y_pred):
L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \
0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1))
return K.mean(K.sum(L, 1))
model, eval_model, manipulate_model = CapsNet(input_shape=train_x_temp.shape[1:], n_class=len(np.unique(np.argmax(train_y, 1))), routings=3)
The problem lays with your tenserflow installation. To be exact your python tensorflow library. Make sure you reinstall the package correctly, with anaconda you need to install it with administrator rights.
Or you have the newest version then you need to add like
tf.random.uniform(
See for more information the documentation: https://www.tensorflow.org/api_docs/python/tf/random/uniform

Keras model seems untrained after loading weights

I am trying to save and restore the weights for a given model in Keras.
I am successful in saving the weights, using model.save_weights(filepath, ...) and also the weights are actually loaded. I can confirm this by saving model.get_weights() to a file, after saving and after restoring, and diffing the files that I receive that way.
However my model is just as bad as it is at the start. Is there anything I am missing?
def __init__(self, **args):
# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
self.model.add(Convolution2D(32, (3, 3), strides=(1, 1)))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
self.model.add(Activation('relu'))
self.model.add(Flatten())
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(Dense(self.nb_actions)) #nb_actions))
self.model.add(Activation('linear'))
print(self.model.summary())
if os.path.isfile("/home/abcd/model.weights"):
self.model.load_weights("/home/abcd/model.weights")
self.compile(Adam(lr=.00025), metrics=['mae'])
...
def compile(self, optimizer, metrics=[]):
metrics += [mean_q] # register default metrics
# We never train the target model, hence we can set the optimizer and loss arbitrarily.
self.target_model = clone_model(self.model, self.custom_model_objects)
if os.path.isfile("/home/abcd/target_model.weights"):
self.target_model.load_weights("/home/abcd/target_model.weights")
self.target_model.compile(optimizer='sgd', loss='mse')
self.model.compile(optimizer='sgd', loss='mse')
# Compile model.
if self.target_model_update < 1.:
# We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model.
updates = get_soft_target_model_updates(self.target_model, self.model, self.target_model_update)
optimizer = AdditionalUpdatesOptimizer(optimizer, updates)
def clipped_masked_error(args):
y_true, y_pred, mask = args
loss = huber_loss(y_true, y_pred, self.delta_clip)
loss *= mask # apply element-wise mask
return K.sum(loss, axis=-1)
# Create trainable model. The problem is that we need to mask the output since we only
# ever want to update the Q values for a certain action. The way we achieve this is by
# using a custom Lambda layer that computes the loss. This gives us the necessary flexibility
# to mask out certain parameters by passing in multiple inputs to the Lambda layer.
y_pred = self.model.output
y_true = Input(name='y_true', shape=(self.nb_actions,))
mask = Input(name='mask', shape=(self.nb_actions,))
loss_out = Lambda(clipped_masked_error, output_shape=(1,), name='loss')([y_true, y_pred, mask])
ins = [self.model.input] if type(self.model.input) is not list else self.model.input
trainable_model = Model(inputs=ins + [y_true, mask], outputs=[loss_out, y_pred])
assert len(trainable_model.output_names) == 2
combined_metrics = {trainable_model.output_names[1]: metrics}
losses = [
lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer
lambda y_true, y_pred: K.zeros_like(y_pred), # we only include this for the metrics
]
if os.path.isfile("/home/abcd/trainable_model.weights"):
trainable_model.load_weights("/home/abcd/trainable_model.weights")
trainable_model.compile(optimizer=optimizer, loss=losses, metrics=combined_metrics)
self.trainable_model = trainable_model
self.compiled = True
...
def final(self, state):
"Called at the end of each game."
# call the super-class final method
PacmanQAgent.final(self, state)
# did we finish training?
if self.episodesSoFar == self.numTraining:
# you might want to print your weights here for debugging
"*** YOUR CODE HERE ***"
self.training = False
# Save the model
self.model.save_weights("/home/abcd/model.weights", True)
self.trainable_model.save_weights("/home/abcd/trainable_model.weights", True)
self.target_model.save_weights("/home/abcd/target_model.weights", True)
I found the problem. Actually the saving and loading worked fine. I was working with an Annealed Epsilon Greedy Policy, so every time I started training, it would, at the start, do basically random steps only anyway.
In addition, my Testing Code was wrong, so testing did not what it was supposed to do. These two combined made it feel like it would actually learn something (training went well) but didn't save the weights (testing went wrong, next training started from 'random').

tensorflow batch normalization after a restore

Let us say that we create a small network:
tf.reset_default_graph()
layers = [5, 3, 1]
activations = [tf.tanh, tf.tanh, None]
inp = tf.placeholder(dtype=tf.float32, shape=(None, 2 ), name='inp')
out = tf.placeholder(dtype=tf.float32, shape=(None, 1 ), name='out')
isTraining = tf.placeholder(dtype=tf.bool, shape=(), name='isTraining')
N = inp * 1 # I am lazy
for i, (l, a) in enumerate(zip(layers, activations)):
N = tf.layers.dense(N, l, None)
#N = tf.layers.batch_normalization( N, training = isTraining) # comment this line
if a is not None:
N = a(N)
err = tf.reduce_mean((N - out)**2)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
opt = tf.train.AdamOptimizer(0.05).minimize(err)
# insert vectors from the batch normalization
tVars = tf.trainable_variables()
graph = tf.get_default_graph()
for v in graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
if all([
('batch_normalization' in v.name),
('optimizer' not in v.name),
v not in tVars ]):
tVars.append(v)
init = tf.global_variables_initializer()
saver = tf.train.Saver(var_list= tVars)
This is a simple NN generated for optimization. The only thing that I am currently interested in is batch optimization (the line that has been commented out). Now, we train this network, save it, restore its and calculate the error again, we do ok:
# Generate random data
N = 1000
X = np.random.rand(N, 2)
y = 2*X[:, 0] + 3*X[:, 1] + 3
y = y.reshape(-1, 1)
# Run the session and save it
with tf.Session() as sess:
sess.run(init)
print('During Training')
for i in range(3000):
_, errVal = sess.run([opt, err], feed_dict={inp:X, out:y, isTraining:True})
if i %500 == 0:
print(errVal)
shutil.rmtree('models1', ignore_errors=True)
os.makedirs('models1')
path = saver.save( sess, 'models1/model.ckpt' )
# restore the session
print('During testing')
with tf.Session() as sess:
saver.restore(sess, path)
errVal = sess.run(err, feed_dict={inp:X, out:y, isTraining:False})
print( errVal )
Here is the output:
During Training
24.4422
0.00330666
0.000314223
0.000106421
6.00441e-05
4.95262e-05
During testing
INFO:tensorflow:Restoring parameters from models1/model.ckpt
5.5899e-05
On the other hand, when we uncomment the batch normalization line, and redo the above calculation:
During Training
31.7372
1.92066e-05
3.87879e-06
2.55274e-06
1.25418e-06
1.43078e-06
During testing
INFO:tensorflow:Restoring parameters from models1/model.ckpt
0.041519
As you can see, the restored value is far from what the model is predicting. Is there anything that I am doing wrong?
Note: I know that for batch-normalization I need to generate mini batches. I have skipped all of that to keep the code simple and yet complete.
Batch normalization layer, as defined in Tensorflow, needs to have access to the placeholder isTraining (https://www.tensorflow.org/api_docs/python/tf/layers/batch_normalization). Make sure you include it when you define the layer: tf.layers.batch_normalization(..., training=isTraining, ...).
The reason for this is that Batch Normalization Layers have 2 trainable parameters (beta and gamma) that are trained normally with the rest of the network, but they also have 2 extra parameters (batch mean and variance) that require you to tell them to train. you do this simply by aplying the recipe above.
Right now your code seems not to be training mean and variance. Instead, they are randomly fixed and the network is optimized with those. Later on, when you save and restore, they are reinitialized with different values, hence the network doesn't perform as it used to.

Optimizing two estimators (dependent on each other) using Sklearn Grid Search

The flow of my program is in two stages.
I am using Sklearn ExtraTreesClassifier along with SelectFromModelmethod to select the most important features. Here it should be noted that the ExtraTreesClassifier takes many parameters as input like n_estimators etc for classification and eventually giving different set of important features for different values of n_estimators via SelectFromModel. This means that I can optimize the n_estimators to get the best features.
In the second stage, I am traing my NN keras model based on the features selected in the first stage. I am using AUROC as the score for grid search but this AUROC is calculated using Keras based neural network. I want to use Grid Search for n_estimators in my ExtraTreesClassifier to optimize the AUROC of keras neural Network. I know I have to use Pipline but I am confused in implementing both together. I don't know where to put Pipeline in my code. I am getting an error which saysTypeError: estimator should be an estimator implementing 'fit' method, <function fs at 0x0000023A12974598> was passed
#################################################################################
I concatenate the CV set and the train set so that I may select the most important features
in both CV and Train together.
##############################################################################
frames11 = [train_x_upsampled, cross_val_x_upsampled]
train_cv_x = pd.concat(frames11)
frames22 = [train_y_upsampled, cross_val_y_upsampled]
train_cv_y = pd.concat(frames22)
def fs(n_estimators):
m = ExtraTreesClassifier(n_estimators = tree_number)
m.fit(train_cv_x,train_cv_y)
sel = SelectFromModel(m, prefit=True)
##################################################
The code below is to get the names of the selected important features
###################################################
feature_idx = sel.get_support()
feature_name = train_cv_x.columns[feature_idx]
feature_name =pd.DataFrame(feature_name)
X_new = sel.transform(train_cv_x)
X_new =pd.DataFrame(X_new)
######################################################################
So Now the important features selected are in the data-frame X_new. In
code below, I am again dividing the data into train and CV but this time
only with the important features selected.
####################################################################
train_selected_x = X_new.iloc[0:train_x_upsampled.shape[0], :]
cv_selected_x = X_new.iloc[train_x_upsampled.shape[0]:train_x_upsampled.shape[0]+cross_val_x_upsampled.shape[0], :]
train_selected_y = train_cv_y.iloc[0:train_x_upsampled.shape[0], :]
cv_selected_y = train_cv_y.iloc[train_x_upsampled.shape[0]:train_x_upsampled.shape[0]+cross_val_x_upsampled.shape[0], :]
train_selected_x=train_selected_x.values
cv_selected_x=cv_selected_x.values
train_selected_y=train_selected_y.values
cv_selected_y=cv_selected_y.values
##############################################################
Now with this new data which only contains the important features,
I am training a neural network as below.
#########################################################
def create_model():
n_x_new=train_selected_x.shape[1]
model = Sequential()
model.add(Dense(n_x_new, input_dim=n_x_new, kernel_initializer='glorot_normal', activation='relu'))
model.add(Dense(10, kernel_initializer='glorot_normal', activation='relu'))
model.add(Dropout(0.8))
model.add(Dense(1, kernel_initializer='glorot_normal', activation='sigmoid'))
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
seed = 7
np.random.seed(seed)
model = KerasClassifier(build_fn=create_model, epochs=20, batch_size=400, verbose=0)
n_estimators=[10,20,30]
param_grid = dict(n_estimators=n_estimators)
grid = GridSearchCV(estimator=fs, param_grid=param_grid,scoring='roc_auc',cv = PredefinedSplit(test_fold=my_test_fold), n_jobs=1)
grid_result = grid.fit(np.concatenate((train_selected_x, cv_selected_x), axis=0), np.concatenate((train_selected_y, cv_selected_y), axis=0))
I created a pipeline using keras classifier and a function. The function is not satisfying the conditions of sklearn custom estimator. Still , I am not getting it right.
def feature_selection(n_estimators=10):
m = ExtraTreesClassifier(n_estimators)
m.fit(train_cv_x,train_cv_y)
sel = SelectFromModel(m, prefit=True)
print(" Getting features names ")
print(" ")
feature_idx = sel.get_support()
feature_name = train_cv_x.columns[feature_idx]
feature_name =pd.DataFrame(feature_name)
X_new = sel.transform(train_cv_x)
X_new =pd.DataFrame(X_new)
print(" adding names and important feature values ")
print(" ")
X_new.columns = feature_name
print(" dividing the imporrtant features into train and test ")
print(" ")
#-----------ARE Data splitting Value-------------
train_selected_x = X_new.iloc[0:train_x_upsampled.shape[0], :]
cv_selected_x = X_new.iloc[train_x_upsampled.shape[0]:train_x_upsampled.shape[0]+cross_val_x_upsampled.shape[0], :]
train_selected_y = train_cv_y.iloc[0:train_x_upsampled.shape[0], :]
cv_selected_y = train_cv_y.iloc[train_x_upsampled.shape[0]:train_x_upsampled.shape[0]+cross_val_x_upsampled.shape[0], :]
##################################################
print(" Converting the selected important festures on train and test into numpy array to be suitable for NN model ")
print(" ")
train_selected_x=train_selected_x.values
cv_selected_x=cv_selected_x.values
train_selected_y=train_selected_y.values
cv_selected_y=cv_selected_y.values
print(" Now test fold ")
my_test_fold = []
for i in range(len(train_selected_x)):
my_test_fold.append(-1)
for i in range(len(cv_selected_x)):
my_test_fold.append(0)
print(" Now after test fold ")
return my_test_fold,train_selected_x,cv_selected_x,train_selected_y,cv_selected_y
def create_model():
n_x_new=X_new.shape[1]
np.random.seed(6000)
model_new = Sequential()
model_new.add(Dense(n_x_new, input_dim=n_x_new, kernel_initializer ='he_normal', activation='sigmoid'))
model_new.add(Dense(10, kernel_initializer='he_normal', activation='sigmoid'))
model_new.add(Dropout(0.3))
model_new.add(Dense(1, kernel_initializer='he_normal', activation='sigmoid'))
model_new.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_crossentropy'])
return model_new
pipeline = pipeline.Pipeline(steps=[('featureselection', custom_classifier()),('nn',KerasClassifier(build_fn=model, nb_epoch=10, batch_size=1000,
verbose=0))])
n_estimators=[10,20,30,40]
param_grid = dict(n_estimators=n_estimators)
grid = GridSearchCV(estimator=pipeline, param_grid=param_grid,scoring='roc_auc',cv = PredefinedSplit(test_fold=my_test_fold), n_jobs=1)
grid_result = grid.fit(np.concatenate((train_selected_x, cv_selected_x), axis=0), np.concatenate((train_selected_y, cv_selected_y), axis=0))
This is how I built my own custom transformer.
class fs(TransformerMixin, BaseEstimator):
def __init__(self, n_estimators=10 ):
self.ss=None
self.n_estimators = n_estimators
self.x_new = None
def fit(self, X, y):
m = ExtraTreesClassifier(10)
m.fit(X,y)
self.ss = SelectFromModel(m, prefit=True)
return self
def transform(self, X):
self.x_new=self.ss.transform(X)
print(np.shape(self.x_new))
return self.x_new

Resources