I am trying to use torch.nn.utils.clip_grad_norm_() which requires an iterable of Tensors. See below
for epoch in progress_bar(range(num_epochs)):
lstm.train()
outputs = lstm(trainX.to(device))
optimizer.zero_grad()
torch.nn.utils.clip_grad_norm_(lstm.parameters(), 1)
My code errors with:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-168-4cd34e6fd44d> in <module>
28 lstm.train()
29 outputs = lstm(trainX.to(device))
---> 30 torch.nn.utils.clip_grad_norm_(lstm.parameters(), 1)
31
32
/opt/conda/lib/python3.6/site-packages/torch/nn/utils/clip_grad.py in clip_grad_norm_(parameters, max_norm, norm_type)
28 total_norm = max(p.grad.detach().abs().max() for p in parameters)
29 else:
---> 30 total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type) for p in parameters]), norm_type)
31 clip_coef = max_norm / (total_norm + 1e-6)
32 if clip_coef < 1:
RuntimeError: stack expects a non-empty TensorList
If I example lstm.parameters() I get a list of Parameters, instead of a list of Tensors:
<class 'torch.nn.parameter.Parameter'> torch.Size([2048, 1])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048, 512])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048, 512])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048, 512])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048])
<class 'torch.nn.parameter.Parameter'> torch.Size([2048])
<class 'torch.nn.parameter.Parameter'> torch.Size([1, 512])
<class 'torch.nn.parameter.Parameter'> torch.Size([1])
Looking at the first Parameter, it is a list of Tensors:
<class 'torch.Tensor'> torch.Size([1])
<class 'torch.Tensor'> torch.Size([1])
<class 'torch.Tensor'> torch.Size([1])
<class 'torch.Tensor'> torch.Size([1])
<class 'torch.Tensor'> torch.Size([1])
<class 'torch.Tensor'> torch.Size([1])
.
.
.
Does anyone know what is going on here?
PyTorch's clip_grad_norm, as the name suggests, operates on gradients.
You have to calculate your loss from output, use loss.backward() and perform gradient clipping afterwards.
Also, you should use optimizer.step() after this operation.
Something like this:
for epoch in progress_bar(range(num_epochs)):
lstm.train()
for batch in dataloader:
optimizer.zero_grad()
outputs = lstm(trainX.to(device))
loss = my_loss(outputs, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(lstm.parameters(), 1)
optimizer.step()
You don't have parameter.grad calculated (it's value is None) and that's the reason of your error.
Related
Say I have a list of tensors, volumes, which I can iterate over:
for volume in range(len(volumes)):
print (volume.shape)
torch.Size([3, 512, 512, 222])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 185])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 271])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 261])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 215])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 284])
<class 'torch.Tensor'>
torch.Size([3, 512, 512, 191])
<class 'torch.Tensor'>
How can I change the channel from 3 to 1, for all volumes?
Thanks
If you are looking to remove the first two channels, then you should only keep the last one dim=-1 i.e. dim=2:
>>> vnew = [v[2:] for v in volumes] # list of (1, 512, 512, *)
If you want to squeeze the singleton dimensions in the process then do:
>>> vnew = [v[2] for v in volumes] # list of (512, 512, *)
If you like just to keep the first channel for each volume, you can create a new list like that:
new_volumes = [volume[0,...] for volume in volumes]
I'm experimenting with TF 2.0.
I want to record the gradient and weights norm across my NN. To do so I'm using the following code.
def get_weights_norm(layer, optim_iters, log=False):
"""
Calculate norm of layer's weights and save it as tf.summary
if log = true it also print it
"""
w_l = layer.trainable_weights
name = layer.name
if log:
print("Layer " + name)
for w in w_l:
shape = str(w.shape.as_list())
norm = tf.norm(w.numpy(), name="norm").numpy()
s_name = name + "_layer_norm/ shape-" + shape
tf.summary.scalar(s_name, norm, step=optim_iters)
if log:
print("\tWeights norm: %s shape: %s" % (norm, shape))
def get_grad_norm(g_tape, loss_value, layer, optim_iters, log=False):
"""
Calculate norm of gradients of the loss respect to layer weights weights and save it as tf.summary
if log = true it also print it
"""
grad = g_tape.gradient(loss_value, layer.trainable_weights)
name = layer.name
if log:
print("Layer " + name)
for w in grad:
shape = str(w.shape.as_list())
norm = tf.norm(w.numpy(), name="norm").numpy()
s_name = name + "_layer_grad_norm/ shape-" + shape
tf.summary.scalar(s_name, norm, step=optim_iters)
if log:
print("\tGrad norm: %s shape: %s" % (norm, shape))
print("{:.2E}".format(norm))
And here is the training loop:
for epoch in range(epochs):
print('Start of epoch %d' % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
# Open a GradientTape to record the operations run
# during the forward pass, which enables autodifferentiation.
with tf.GradientTape(persistent=True) as tape:
# Run the forward pass of the layer.
logits = model(x_batch_train) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
g_bidi = tape.gradient(loss_value, model.get_layer("bi_lstm").trainable_weights)
g_out = tape.gradient(loss_value, model.get_layer("output").trainable_weights)
g_dense = tape.gradient(loss_value, model.get_layer("dense").trainable_weights)
print("Out Layer")
w_out = model.get_layer("output").trainable_weights
print(model.get_layer("output").name)
print(float(tf.norm(w_out[0].numpy(), name="norm")))
print("\tWeights norm: %s shape: %s" % (tf.norm(w_out[0].numpy(), name="norm"), w_out[0].shape))
print("\tWeights norm: %s shape: %s" % (tf.norm(w_out[1].numpy(), name="norm"), w_out[1].shape))
print()
print("\t ||dE/dw_out|| = %s shape: %s" % (tf.norm(g_out[0].numpy(), name='norm'), g_out[0].shape))
print("\t ||dE/db_out|| = %s shape: %s" % (tf.norm(g_out[1].numpy(), name='norm'), g_out[1].shape))
get_weights_norm(model.get_layer("output"), optimizer.iterations, True)
get_grad_norm(tape, loss_value, model.get_layer("output"), optimizer.iterations, True)
print()
print()
print("Bidirect")
w_bid = model.get_layer("bi_lstm").trainable_weights
print("\tWeights fwd norm: %s shape %s:" % (tf.norm(w_bid[0].numpy(), name="norm"), w_bid[0].shape))
print("\tWeights fwd_rec norm: %s shape %s:" % (tf.norm(w_bid[1].numpy(), name="norm"), w_bid[1].shape))
print("\tWeights fwd bias norm: %s shape %s:" % (tf.norm(w_bid[2].numpy(), name="norm"), w_bid[2].shape))
print("\tWeights bwd norm: %s shape %s:" % (tf.norm(w_bid[3].numpy(), name="norm"), w_bid[3].shape))
print("\tWeights bwd_rec norm: %s shape %s:" % (tf.norm(w_bid[4].numpy(), name="norm"), w_bid[4].shape))
print("\tWeights bwd bias norm: %s shape %s:" % (tf.norm(w_bid[5].numpy(), name="norm"), w_bid[5].shape))
print()
print("\t ||dE/dw_forw|| = %s shape: %s" % (tf.norm(g_bidi[0].numpy(), name='norm'), g_bidi[0].shape))
print("\t ||dE/dw_forw_rec|| = %s shape: %s" % (tf.norm(g_bidi[1].numpy(), name='norm'), g_bidi[1].shape))
print("\t ||dE/dw_forw_bias|| = %s shape: %s" % (tf.norm(g_bidi[2].numpy(), name='norm'), g_bidi[2].shape))
print("\t ||dE/dw_bckw|| = %s shape: %s" % (tf.norm(g_bidi[3].numpy(), name='norm'), g_bidi[3].shape))
print("\t ||dE/dw_bkw_rec|| = %s shape: %s" % (tf.norm(g_bidi[4].numpy(), name='norm'), g_bidi[4].shape))
print("\t ||dE/dw_bkw_bias|| = %s shape: %s" % (tf.norm(g_bidi[5].numpy(), name='norm'), g_bidi[5].shape))
get_weights_norm(model.get_layer("bi_lstm"), optimizer.iterations, True)
get_grad_norm(tape, loss_value, model.get_layer("bi_lstm"), optimizer.iterations, True)
The Problem:
When running the script I get the same values for the gradient norm of the output layers but different values for the norm of the bidirectional layer (bi_lstm)
Here's the output:
Weights norm: tf.Tensor(0.33847392, shape=(), dtype=float32) shape: (64, 1)
Weights norm: tf.Tensor(88.14, shape=(), dtype=float32) shape: (1,)
||dE/dw_out|| = tf.Tensor(1.7349662, shape=(), dtype=float32) shape: (64, 1)
||dE/db_out|| = tf.Tensor(0.31759995, shape=(), dtype=float32) shape: (1,)
Layer output
Weights norm: 0.33847392 shape: [64, 1]
Weights norm: 88.14 shape: [1]
Bidirect
Weights fwd norm: tf.Tensor(13.112313, shape=(), dtype=float32) shape (256, 128):
Weights fwd_rec norm: tf.Tensor(5.691354, shape=(), dtype=float32) shape (32, 128):
Weights fwd bias norm: tf.Tensor(11.340048, shape=(), dtype=float32) shape (128,):
Weights bwd norm: tf.Tensor(13.147353, shape=(), dtype=float32) shape (256, 128):
Weights bwd_rec norm: tf.Tensor(5.685838, shape=(), dtype=float32) shape (32, 128):
Weights bwd bias norm: tf.Tensor(11.3102255, shape=(), dtype=float32) shape (128,):
||dE/dw_forw|| = tf.Tensor(9.418793e-07, shape=(), dtype=float32) shape: (256, 128)
||dE/dw_forw_rec|| = tf.Tensor(3.8971484e-06, shape=(), dtype=float32) shape: (32, 128)
||dE/dw_forw_bias|| = tf.Tensor(1.0172046e-06, shape=(), dtype=float32) shape: (128,)
||dE/dw_bckw|| = tf.Tensor(9.837944e-07, shape=(), dtype=float32) shape: (256, 128)
||dE/dw_bkw_rec|| = tf.Tensor(4.134917e-06, shape=(), dtype=float32) shape: (32, 128)
||dE/dw_bkw_bias|| = tf.Tensor(1.0577168e-06, shape=(), dtype=float32) shape: (128,)
Layer bi_lstm
Weights norm: 13.112313 shape: [256, 128]
Weights norm: 5.691354 shape: [32, 128]
Weights norm: 11.340048 shape: [128]
Weights norm: 13.147353 shape: [256, 128]
Weights norm: 5.685838 shape: [32, 128]
Weights norm: 11.3102255 shape: [128]
Layer bi_lstm
Grad norm: 0.0 shape: [256, 128]
0.00E+00
Grad norm: 0.0 shape: [32, 128]
0.00E+00
Grad norm: 0.0 shape: [128]
0.00E+00
Grad norm: 0.0 shape: [256, 128]
0.00E+00
Grad norm: 0.0 shape: [32, 128]
0.00E+00
Grad norm: 0.0 shape: [128]
0.00E+0
What am I missing here?
Thanks in advance
I tried to use keras.preprocessing.image.ImageDataGenerator on TPU,but i get this error from the first epoch.the same code work with jupyter notebook but take hours for trainig.
My MODEL:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(220))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(120))
model.add(Activation('softmax'))
Optimizer
opt = tf.train.AdamOptimizer(learning_rate)
model.compile(
optimizer=opt,
loss='categorical_crossentropy',
metrics=['acc'])
Convert Keras to TPU
try:
device_name = os.environ['COLAB_TPU_ADDR']
TPU_ADDRESS = 'grpc://' + device_name
print('Found TPU at: {}'.format(TPU_ADDRESS))
except KeyError:
print('TPU not found')
tpu_model = tf.contrib.tpu.keras_to_tpu_model(
model,
strategy=tf.contrib.tpu.TPUDistributionStrategy(
tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)))
ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')#binary ,categorical
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
Model Fit
model_fit=tpu_model.fit_generator(
train_generator,
epochs=50,
steps_per_epoch=60,
)
I get this error
Epoch 1/50 15/33 [============>.................] - ETA: 8s - loss:
4.7722 - acc: 0.0083INFO:tensorflow:New input shapes; (re-)compiling: mode=train (# of cores 8), [TensorSpec(shape=(0,), dtype=tf.int32,
name='core_id_60'), TensorSpec(shape=(0, 128, 128, 3),
dtype=tf.float32, name='conv2d_3_input_20'), TensorSpec(shape=(0,
120), dtype=tf.float32, name='activation_13_target_30')]
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call
last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py
in _create_c_op(graph, node_def, inputs, control_inputs) 1658
try:
-> 1659 c_op = c_api.TF_FinishOperation(op_desc) 1660 except errors.InvalidArgumentError as e:
InvalidArgumentError: slice index 0 of dimension 0 out of bounds. for
'strided_slice_19' (op: 'StridedSlice') with input shapes: [0], [1],
[1], [1] and with computed input tensors: input[1] = <0>, input[2] =
<1>, input[3] = <1>.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call
last) 17 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py
in _create_c_op(graph, node_def, inputs, control_inputs) 1660
except errors.InvalidArgumentError as e: 1661 # Convert to
ValueError for backwards compatibility.
-> 1662 raise ValueError(str(e)) 1663 1664 return c_op
ValueError: slice index 0 of dimension 0 out of bounds. for
'strided_slice_19' (op: 'StridedSlice') with input shapes: [0], [1],
[1], [1] and with computed input tensors: input[1] = <0>, input[2] =
<1>, input[3] = <1>.
I'm trying to train a model Keras but I'm having a problem:
g = ImageDataGenerator(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=45,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
validation_split=validation_split,
preprocessing_function=lambda x: x / 127 - 1)
g_train = g.flow(x_train, y_train,
batch_size=batch_size,
subset='training')
g_valid = g.flow(x_train, y_train,
batch_size=batch_size,
shuffle=False,
subset='validation')
history = network.fit_generator(g_train,
steps_per_epoch=len(x_train) / 32,
epochs=epochs)
ValueError: Error when checking target: expected predictions to have 4 dimensions, but got array with shape (256, 1)
Someone have any idea why? It seems much like the example in documentation to me.
x_train.shape
(50000, 32, 32, 1)
y_train.shape
(50000, 1, 1)
I want to train and test MLP Classifier using "sklearn.neural_network" lib and mnist dataset. When using "score: method and providing valid data as array, this error occurs: ValueError: Expected array-like (array or non-string sequence), got array('B', [7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0 ...
X and Y training datasets have the same length. Don't know what to check in order to solve this problem. I'm using Anaconda/Jupyter Notebook.
code:
clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu',
solver='adam', alpha=0.0001, batch_size='auto',
learning_rate='constant', learning_rate_init=0.001,
power_t=0.5, max_iter=200, shuffle=True, random_state=None,
tol=0.0001, verbose=False, warm_start=False, momentum=0.9,
nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,
beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
mndata = MNIST('./samples')
mndata.gz = True
images_tr, labels_tr = mndata.load_training()
images_test, labels_test = mndata.load_testing()
clf.fit(images_tr, labels_tr)
acc = clf.score(images_test, labels_test)
print(acc)
error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-38-c20e564e6239> in <module>
18 print(len(images_test))
19 print(len(labels_test))
---> 20 acc = clf.score(images_test, labels_test)
21 print(acc)
~\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
288 """
289 from .metrics import accuracy_score
--> 290 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
291
292
~\Anaconda3\lib\site-packages\sklearn\metrics\classification.py in accuracy_score(y_true, y_pred, normalize, sample_weight)
174
175 # Compute accuracy for each possible representation
--> 176 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
177 check_consistent_length(y_true, y_pred, sample_weight)
178 if y_type.startswith('multilabel'):
~\Anaconda3\lib\site-packages\sklearn\metrics\classification.py in _check_targets(y_true, y_pred)
70 """
71 check_consistent_length(y_true, y_pred)
---> 72 type_true = type_of_target(y_true)
73 type_pred = type_of_target(y_pred)
74
~\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in type_of_target(y)
241 if not valid:
242 raise ValueError('Expected array-like (array or non-string sequence), '
--> 243 'got %r' % y)
244
245 sparseseries = (y.__class__.__name__ == 'SparseSeries')
ValueError: Expected array-like (array or non-string sequence), got array('B', [7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0 ...