Says I import these libraries
import torch.nn as nn
import torch.optim as optim # step
import torch.nn.functional as F # all activation f. like ReLU
Suppose I need to construct Logistic regression model with output unit > 1 (as I want to do multi-label classification)
So, I found two options (a pair of nn.module and criterion) to construct them, which cause me so confused which set to choose
# SET 1
class LogisticReg(nn.Module):
def __init__(self, input_size, num_classes): # sequence of layer (768,50,10)
super(LogisticReg, self).__init__()
self.fc1 = nn.Linear(input_size, num_classes) # 768 * 1024
def forward(self, x):
x = self.fc1(x)
x = F.sigmoid(x)
return x
criterion = nn.BCELoss()
# SET 2
class LogisticReg(nn.Module):
def __init__(self, input_size, num_classes): # sequence of layer (768,50,10)
super(LogisticReg, self).__init__()
self.fc1 = nn.Linear(input_size, num_classes) # 768 * 1024
def forward(self, x):
x = self.fc1(x)
return x
criterion = nn.BCEWithLogitsLoss()
The only difference is that when nn.module of Logistic regression does not have sigmoid activation at .forward then it must pair nn.BCEwithLogitLoss instead of nn.BCELoss, since it does not got activated by Sigmoid yet
Hence, I have 2 questions here
Are these 2 sets exactly equivalent?
If yes, then what is the point of Pytorch having both criterion BCELoss and BCEwithLogitLoss
Related
This is a toy example as I'm learning PyTorch and using it on one-dimensional time series, in this case a sine wave.
I'm trying to use Conv1d, but I get the following error:
RuntimeError: Given groups=1, weight of size [5, 1, 2], expected input[1, 994, 5] to have 1 channels, but got 994 channels instead
My 'lookback' is 5 time steps, and the shape of my data batch is [994, 5].
What am I doing wrong?
import torch;from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F;import pytorch_lightning as pl
from torch import nn, tensor
class TsDs(torch.utils.data.Dataset):
def __init__(self, s, l=5): super().__init__();self.l,self.s=l,s
def __len__(self): return self.s.shape[0] - 1 - self.l
def __getitem__(self, i): return self.s[i:i+self.l], torch.log(self.s[i+self.l+1]/self.s[i+self.l])
def plt(self): plt.plot(self.s)
class TsDm(pl.LightningDataModule):
def __init__(self, length=5000, batch_size=1000): super().__init__();self.batch_size=batch_size;self.s = torch.sin(torch.arange(length)*0.2) + 5
def train_dataloader(self): return DataLoader(TsDs(self.s[:3999]), batch_size=self.batch_size, shuffle=False)
def val_dataloader(self): return DataLoader(TsDs(self.s[4000:]), batch_size=self.batch_size)
dm = TsDm()
class MyModel(pl.LightningModule):
def __init__(self, learning_rate=0.01):
super().__init__();self.learning_rate = learning_rate
super().__init__();self.learning_rate = learning_rate
self.network = nn.Sequential(nn.Conv1d(1,5,2),nn.ReLU(),nn.Linear(5,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
# self.network = nn.Sequential(nn.Linear(5,5),nn.ReLU(),nn.Linear(5,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
def forward(self, x): return self.network(x)
def step(self, batch, batch_idx, stage):
x, y = batch
loss = -torch.mean(self(x)*y)
print(loss)
return loss
def training_step(self, batch, batch_idx): return self.step(batch, batch_idx, "train")
def validation_step(self, batch, batch_idx): return self.step(batch, batch_idx, "val")
def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=self.learning_rate)
mm = MyModel(0.01);trainer = pl.Trainer(max_epochs=10)
trainer.fit(mm, datamodule=dm)
There are two issues in your code:
Looking at the documentation of nn.Conv1d, your input shape should be (B, C, L). In your default case, you have L=5, the sequence length, but you need to create that extra dimension representing the feature size of a sequence element, here C=1. You can do so by changing TsDs's __getitem__ function to:
def __getitem__(self, i):
x = self.s[i:i+self.l] # minibatch x shaped (1, self.l)
y = torch.log(self.s[i+self.l+1]/self.s[i+self.l]) # minibatch y shaped (1,)
return x, y
Your convolutional layer has a stride of 1 and a size of 2, this means its output will be shaped (B, 5, L-1=4). The following layer is a fully connected layer instantiated as nn.Linear(5, 3), which means it expects (*, H_in=5) and will output (*, H_out). You can either
You can flatten the conv1d output with nn.Flatten and feed it to a bigger fully connected layer (for instance nn.Linear(20, 3).
You can use a convolutional layer with a wider kernel, if you use a kernel of 5 (your sequence length you will end up with a tensor of (B, 5, 1) which you feed to a nn.Linear(5, 3). Although this approach doesn't really scale when L is changed.
You could apply a nn.AvgPool1d to get an average representation of the sequence after the convolutional layers have been applied.
Those are just a few directions...
I trained model on some images. Now to fit similar dataset but with another colors I want to load this model but also i want to drop all running stats from Batchnorm layers (set them to default value, like totally untrained). What parameters should i reset? Simple model looks like this
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv0 = nn.Conv2d(3, 3, 3, padding = 1)
self.norm = nn.BatchNorm2d(3)
self.conv = nn.Conv2d(3, 3, 3, padding = 1)
def forward(self, x):
x = self.conv0(x)
x = self.norm(x)
return self.conv(x)
net = Net()
##or for pretrained it will be
##net = torch.load('net.pth')
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
####???####
drop_to_default()
Simplest way to do that is to run reset_running_stats() method on BatchNorm objects:
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
m.reset_running_stats()
Below is this method's source code:
def reset_running_stats(self) -> None:
if self.track_running_stats:
# running_mean/running_var/num_batches... are registered at runtime depending
# if self.track_running_stats is on
self.running_mean.zero_() # Zero (neutral) mean
self.running_var.fill_(1) # One (neutral) variance
self.num_batches_tracked.zero_() # Number of batches tracked
You can see the source code here, _NormBase class.
I would like to create a sequential model in keras with one hidden layer with as many nodes as there are input nodes. Each input node should be connected to only one of the hidden nodes. All nodes in the hidden layer should be connected to a single output node: as in this image
I would like to be able to specify the activation function of the hidden layer.
Is it possible to achieve that with a Sequential() model in keras?
Here is a custom layer where you can do everything you want:
import keras
import tensorflow as tf
from keras.layers import *
from keras import Sequential
import numpy as np
tf.set_random_seed(10)
class MyDenseLayer(keras.layers.Layer):
def __init__(self):
super(MyDenseLayer, self).__init__()
def parametric_relu(self, _x):
# some more or less complicated activation
# with own weight
pos = tf.nn.relu(_x)
neg = self.alphas * (_x - abs(_x)) * 0.5
return pos + neg
def build(self, input_shape):
# main weight
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),],
initializer=tf.random_normal_initializer())
# any additional weights here
self.alphas = self.add_weight('alpha', shape=[int(input_shape[-1]),],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
self.size = int(input_shape[-1])
def call(self, input):
linear = tf.matmul(input, self.kernel*tf.eye(self.size))
nonlinear = self.parametric_relu(linear)
return nonlinear
model = Sequential()
model.add(MyDenseLayer())
model.build((None, 4))
print(model.summary())
x = np.ones((5,4))
print(model.predict(x))
I need to train a set of models but do not benefit from GPU acceleration using tensorflow-gpu / keras as time augments linearly with the number of models trained.
In
class Models(tf.keras.Model):
def __init__(self,N_MODELS=1):
super(Models, self).__init__()
self.block_i = [estimate_affine()
for node in range(N_MODELS)]
def call(self, inputs):
x = [self.block_i[i](input_i) for i,input_i in enumerate(inputs)]
return x
a list of N_MODELS layers are built and as are idenpendant should be parallelized. As it is not the case, even though output is what I expect, I guess my implementation is not optimal. Any idea how to make it parallelizable ?
Best
Paul
Here is a toynet of N_MODELS of linear regression
import tensorflow as tf
tf.enable_eager_execution()
from tensorflow.keras import layers
import numpy as np
from numpy import random
import time
class estimate_affine(layers.Layer):
def __init__(self):
'''
'''
super(estimate_affine, self).__init__()
self.a = tf.Variable(initial_value=[0.], dtype='float32',trainable=True,name='par1')
self.b = tf.Variable(initial_value=[0.], dtype='float32',trainable=True,name='par2')
def call(self, inputs):
return (self.a,self.b)
class Models(tf.keras.Model):
def __init__(self,N_MODELS=1):
super(Models, self).__init__()
self.block_i = [estimate_affine()
for node in range(N_MODELS)]
def call(self, inputs):
x = [self.block_i[i](input_i) for i,input_i in enumerate(inputs)]
return x
N_ITERATIONS=100
N_POINTS=100
ls_t=[]
for N_MODELS in [5,10,50,100,1000]:
t=time.time()
### Aim is to fit N_MODELS on N_POINTS which are basically N_MODELS of ax+b
a=np.random.randint(0,10,N_MODELS)
b=np.random.randint(0,10,N_MODELS)
noise=np.random.rand(N_POINTS) * 1
x=np.linspace(0,1,N_POINTS)
dataset=np.array([a_i *( x + noise) + b_i for a_i,b_i in zip(a,b)])
model=Models(N_MODELS=N_MODELS)
optimizer=tf.keras.optimizers.SGD(learning_rate=5e-3)
for i in range(N_ITERATIONS):
with tf.GradientTape() as tape:
outputs=model(dataset)
L=tf.reduce_sum([((outputs[idx][0]*x+outputs[idx][1])
- dataset[idx,:])**2 for idx in range(N_MODELS)])
grads = tape.gradient(L, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
t_diff=time.time()-t
print('N_MODEL : {}, time : {}'.format(N_MODELS,t_diff))
ls_t.append(t_diff)
When implementing a custom layer in Keras, I need to know the real size of batch_size. my shape is (?,20).
questions:
1. What is the best way to change (?,20) to (batch_size,20).
I have looked into this but it can not adjust to my problem.
I can pass the batch_size to this layer. In that case, I need to reshape (?,20) to (batch_size,20), how can I do that?
2. Is it the best way to that, or is there any builtin function that can get the real batch_size while building and running the model?
This is my layer:
from scipy.stats import entropy
from keras.engine import Layer
import keras.backend as K
import numpy as np
class measure(Layer):
def __init__(self, beta, **kwargs):
self.beta = beta
self.uses_learning_phase = True
self.supports_masking = True
super(measure, self).__init__(**kwargs)
def call(self, x):
return K.in_train_phase(self.rev_entropy(x, self.beta), x)
def get_config(self):
config = {'beta': self.beta}
base_config = super(measure, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def rev_entropy(self, x, beta):
entropy_p_t_w = np.apply_along_axis(entropy, 1, x)
con = (beta / (1 + entropy_p_t_w)) ** 1.5
new_f_w_t = x * (con.reshape(con.shape[0], 1))
norm_const = 1e-30 + np.sum(new_f_w_t, axis=0)
for t in range(norm_const.shape[0]):
new_f_w_t[:, t] /= norm_const[t]
return new_f_w_t
And here is where I call this layer:
encoded = measure(beta=0.08)(encoded)
I am also using fit_generator if it can help at all:
autoencoder.fit_generator(train_gen, steps_per_epoch=num_train_steps, epochs=NUM_EPOCHS,
validation_data=test_gen, validation_steps=num_test_steps, callbacks=[checkpoint])
The dimension of the x passed to the layer is (?,20) and that's why I can not do my calculation.
Thanks:)