Related
I am passing a batch of 256 images x_s with the following dimensions [256, 3, 560, 448]. However I whenever i try to feed my images to the CNN i get the following error:
ErrorTypeError: __init__() takes 1 positional argument but 2 were given'
Not sure what it means here by '1 positional argument'. I am passing in the images using an iterator which I created. Below is my code for the training loop up until the point where it breaks:
for e in range(num_epochs):
print(f'Epoch{e+1:04d}/ {num_epochs:04d}', end='\n================\n')
dl_source_iter = iter(dl_source)
dl_target_iter = iter(dl_target)
for batch in range(max_batches):
optimizer.zero_grad()
p = float(batch + e * max_batches) / (num_epochs *max_batches)
grl_lambda = 2. / (1. + np.exp(-10 * p)) - 1
x_s, y_s = next(dl_source_iter)
y_s_domain = torch.zeros(256, dtype=torch.long)
class_pred, domain_pred = Cnn(x_s, grl_lambda) #This is the line which throws an error
Here is my convolutional neural network:
class Cnn(nn.Module):
def __init__(self):
super(Cnn, self).__init__()
self.feature_extract= nn.Sequential(
nn.Conv2d(3, 64, 5, 1, 1),
nn.BatchNorm2d(64),
nn.MaxPool2d(2),
nn.ReLU(True),
nn.Conv2d(64, 50, 5, 1, 1),
nn.BatchNorm2d(50),
nn.MaxPool2d(2),
nn.ReLU(True),
nn.Dropout2d(),
)
self.num_cnn_features = 50*5*5
self.class_classifier = nn.Sequential(
nn.Linear(self.num_cnn_features, 200),
nn.BatchNorm1d(200),
nn.Dropout2d(),
nn.ReLU(True),
nn.Linear(200, 200),
nn.BatchNorm1d(200),
nn.ReLU(True),
nn.Linear(200, 182),
nn.LogSoftmax(dim = 1),
)
self.DomainClassifier = nn.Sequential(
nn.Linear(self.num_cnn_features, 100),
nn.BatchNorm1d(100),
nn.ReLU(True),
nn.Linear(100, 2),
nn.LogSoftmax(dim=1)
)
def forward(self, x, grl_lambda=1.0):
features = self.feature_extract(x)
features = features.view(-1, self.num_cnn_features)
features_grl = GradientReversalFn(features, grl_lambda)
class_pred = self.class_classifier(features)
domain_pred = self.DomainClassifier(features_grl)
return class_pred, domain_pred
Does anyone have any guesses as to why this might be happening? I can't seem to figure out what is going wrong. Any help would be greatly appreciated.
You need to create a Cnn object before you can pass data to it. You are calling the Cnn class constructor __init__, which expects no arguments, rather than the forward method for an instance of the Cnn class, which is what you actually want to do.
# outside of loop
model = Cnn()
# inside loop
class_pred, domain_pred = model(x_s, grl_lambda)
I am following this tutorial and can successfully generate 28px sized faces using my GPU. However I do not know how to increase the size of the face (currently 28px) with the logic of the generator function below:
def generator(z, out_channel_dim, is_train=True, alpha=0.2, keep_prob=0.5):
with tf.variable_scope('generator', reuse=(not is_train)):
# First fully connected layer, 4x4x1024
fc = tf.layers.dense(z, 4 * 4 * 1024, use_bias=False)
fc = tf.reshape(fc, (-1, 4, 4, 1024))
bn0 = tf.layers.batch_normalization(fc, training=is_train)
lrelu0 = tf.maximum(alpha * bn0, bn0)
drop0 = tf.layers.dropout(lrelu0, keep_prob, training=is_train)
# Deconvolution, 7x7x512
conv1 = tf.layers.conv2d_transpose(drop0, 512, 4, 1, 'valid', use_bias=False)
bn1 = tf.layers.batch_normalization(conv1, training=is_train)
lrelu1 = tf.maximum(alpha * bn1, bn1)
drop1 = tf.layers.dropout(lrelu1, keep_prob, training=is_train)
# Deconvolution, 14x14x256
conv2 = tf.layers.conv2d_transpose(drop1, 256, 5, 2, 'same', use_bias=False)
bn2 = tf.layers.batch_normalization(conv2, training=is_train)
lrelu2 = tf.maximum(alpha * bn2, bn2)
drop2 = tf.layers.dropout(lrelu2, keep_prob, training=is_train)
# Output layer, 28x28xn
logits = tf.layers.conv2d_transpose(drop2, out_channel_dim, 5, 2, 'same')
out = tf.tanh(logits)
return out
I assume I will need to change something in the line of:
conv2 = tf.layers.conv2d_transpose(drop1, 256, 5, 2, 'same', use_bias=False)
The change should be made on the last layer logits since that is the layer which determine the size of the output. changing out_channel_dim value to the size you want will be the solution. This may cause some error (due to the kernel size and stride values) or the result will not be the same (since your network is not trained for this size).
I've trainined a VAE that in PyTorch that I need to convert to CoreML. From this thread PyTorch VAE fails conversion to onnx I was able to get the ONNX model to export, however, this just pushed the problem one step further to the ONNX-CoreML stage.
The original function that contains the torch.randn() call is the reparametrize func:
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if self.have_cuda:
eps = torch.randn(self.bs, self.nz, device='cuda')
else:
eps = torch.randn(self.bs, self.nz)
return eps.mul(std).add_(mu)
The solution is, of course, to create a custom layer, but I'm having problems creating a layer with no inputs (i.e., it's just a randn() call).
I can get the CoreML conversion to complete with this def:
def convert_randn(node):
params = NeuralNetwork_pb2.CustomLayerParams()
params.className = "RandomNormal"
params.description = "Random normal distribution generator"
params.parameters["dtype"].intValue = node.attrs.get('dtype', 1)
params.parameters["bs"].intValue = node.attrs.get("shape")[0]
params.parameters["nz"].intValue = node.attrs.get("shape")[1]
return params
I do the conversion with:
coreml_model = convert(onnx_model, add_custom_layers=True,
image_input_names = ['input'],
custom_conversion_functions={"RandomNormal": convert_randn})
I should also note that, at the completion of the mlmodel export, the following is printed:
Custom layers have been added to the CoreML model corresponding to the
following ops in the onnx model:
1/1: op type: RandomNormal, op input names and shapes: [], op output
names and shapes: [('62', 'Shape not available')]
Bringing the .mlmodel into Xcode complains that Layer '62' of type 500 has 0 inputs but expects at least 1. So I'm wondering how to specify a kind of "dummy" input to the layer, since it doesn't actually have an input -- it's just a wrapper around torch.randn() (or, more specifically, the onnx RandonNormal op). I should clarify that I do need the whole VAE, not just the decoder, as I'm actually using the entire process to "error correct" my inputs (i.e., the encoder estimates my z vector, based on an input, then the decoder generates the closest generalizable prediction of the input).
Any help greatly appreciated.
UPDATE: Okay, I finally got a version to load in Xcode (thanks to #MattijsHollemans and his book!). The originalConversion.mlmodel is the initial output of converting my model from ONNX to CoreML. To this, I had to manually insert the input for the RandomNormal layer. I made it (64, 28, 28) for no great reason — I know my batch size is 64, and my inputs are 28 x 28 (but presumably it could also be (1, 1, 1), since it's a "dummy"):
spec = coremltools.utils.load_spec('originalConversion.mlmodel')
nn = spec.neuralNetwork
layers = {l.name:i for i,l in enumerate(nn.layers)}
layer_idx = layers["62"] # '62' is the name of the layer -- see above
layer = nn.layers[layer_idx]
layer.input.extend(["dummy_input"])
inp = spec.description.input.add()
inp.name = "dummy_input"
inp.type.multiArrayType.SetInParent()
spec.description.input[1].type.multiArrayType.shape.append(64)
spec.description.input[1].type.multiArrayType.shape.append(28)
spec.description.input[1].type.multiArrayType.shape.append(28)
spec.description.input[1].type.multiArrayType.dataType = ft.ArrayFeatureType.DOUBLE
coremltools.utils.save_spec(spec, "modelWithInsertedInput.mlmodel")
This loads in Xcode, but I have yet to test the functioning of the model in my app. Since the additional layer is simple, and the input is literally a bogus, non-functional input (just to keep Xcode happy), I don't imagine it will be a problem, but I'll post again if it doesn't run properly.
UPDATE 2: Unfortunately, the model doesn't load at runtime. It fails with [espresso] [Espresso::handle_ex_plan] exception=Failed in 2nd reshape after missing custom layer info. What I find very strange and confusing is that, inspecting model.espresso.shape, I see that almost every node has a shape like:
"62" : {
"k" : 0,
"w" : 0,
"n" : 0,
"seq" : 0,
"h" : 0
}
I have two question/concerns: 1) Most obviously, why are all the values zero (this is the case with all but the input nodes), and 2) Why does it appear to be a sequential model, when it's just a fairly conventional VAE? Opening model.espresso.shape for a fully-functioning GAN in the same app, I see that the nodes are of the format:
"54" : {
"k" : 256,
"w" : 16,
"n" : 1,
"h" : 16
}
That is, they contain reasonable shape info, and they don't have seq fields.
Very, very confused...
UPDATE 3: I've also just noticed in the compiler report the error: IMPORTANT: new sequence length computation failed, falling back to old path. Your compilation was sucessful, but please file a radar on Core ML | Neural Networks and attach the model that generated this message.
Here's the original PyTorch model:
class VAE(nn.Module):
def __init__(self, bs, nz):
super(VAE, self).__init__()
self.nz = nz
self.bs = bs
self.encoder = nn.Sequential(
# input is (nc) x 28 x 28
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# size = (ndf) x 14 x 14
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# size = (ndf*2) x 7 x 7
nn.Conv2d(ndf * 2, ndf * 4, 3, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# size = (ndf*4) x 4 x 4
nn.Conv2d(ndf * 4, 1024, 4, 1, 0, bias=False),
nn.LeakyReLU(0.2, inplace=True),
)
self.decoder = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( 1024, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# size = (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 3, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# size = (ngf*4) x 8 x 8
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# size = (ngf*2) x 16 x 16
nn.ConvTranspose2d(ngf * 2, nc, 4, 2, 1, bias=False),
nn.Sigmoid()
)
self.fc1 = nn.Linear(1024, 512)
self.fc21 = nn.Linear(512, nz)
self.fc22 = nn.Linear(512, nz)
self.fc3 = nn.Linear(nz, 512)
self.fc4 = nn.Linear(512, 1024)
self.lrelu = nn.LeakyReLU()
self.relu = nn.ReLU()
def encode(self, x):
conv = self.encoder(x);
h1 = self.fc1(conv.view(-1, 1024))
return self.fc21(h1), self.fc22(h1)
def decode(self, z):
h3 = self.relu(self.fc3(z))
deconv_input = self.fc4(h3)
deconv_input = deconv_input.view(-1,1024,1,1)
return self.decoder(deconv_input)
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
eps = torch.randn(self.bs, self.nz, device='cuda') # needs custom layer!
return eps.mul(std).add_(mu)
def forward(self, x):
# print("x", x.size())
mu, logvar = self.encode(x)
z = self.reparametrize(mu, logvar)
decoded = self.decode(z)
return decoded, mu, logvar
To add an input to your Core ML model, you can do the following from Python:
import coremltools
spec = coremltools.utils.load_spec("YourModel.mlmodel")
nn = spec.neuralNetworkClassifier # or just spec.neuralNetwork
layers = {l.name:i for i,l in enumerate(nn.layers)}
layer_idx = layers["your_custom_layer"]
layer = nn.layers[layer_idx]
layer.input.extend(["dummy_input"])
inp = spec.description.input.add()
inp.name = "dummy_input"
inp.type.doubleType.SetInParent()
coremltools.utils.save_spec(spec, "NewModel.mlmodel")
Here, "your_custom_layer" is the name of the layer you want to add the dummy input to. In your model it looks like it's called 62. You can look at the layers dictionary to see the names of all the layers in the model.
Notes:
If your model is not a classifier, use nn = spec.neuralNetwork instead of neuralNetworkClassifier.
I made the new dummy input have the type "double". That means your custom layer gets a double value as input.
You need to specify a value for this dummy input when using the model.
I have been struggling to fix this for the past couple of days, any help would be appreciated.
I have a network that outputs numeric locations x,y coordinates in an image, hence the int32 datatype. The output is of size (2,2). Ground truth also has the locations as int32.
Then I run, following two lines of code:
cost = tf.reduce_mean(tf.square(y_true - y_pred_cls))
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
and receive following error:
ValueError: Invalid type tf.int32 for Mean_7:0, expected: [tf.float32, tf.float64, tf.float16, tf.bfloat16].
I understand that average should be a float value, but if I convert predicted value to float, I start getting 'No gradients provided' error.
I have also tried to use softmax_cross_entropy_with_logits_v2 of Tensorflow to turn my problem into a classification problem, but same error. I'd really appreciate any help with the solution.
Code
Code for Input and Labels
x_p = tf.placeholder(tf.float32, shape=[None, img_size_flat*num_channels], name='x_pred')
x_pred_image = tf.reshape(x_p, [-1, 10, 10, num_channels])
k = tf.constant(npatches)
y_true = tf.placeholder(tf.int32, shape=[None, 2, 2], name='y_true')
y_true_cls = tf.placeholder(tf.int32, shape=[None, 2, 2], name='y_true_cls')
Convolution Layer
def new_weights(shape, layer_name):
initializer = tf.contrib.layers.xavier_initializer()
return tf.Variable(initializer(shape), name=layer_name+'_W')
def new_bias(length, layer_name):
return tf.Variable(tf.constant(0.05, shape=[length]), name=layer_name+'_b')
def new_conv_layer(input,
num_input_channels,
filter_size,
num_filters,
name_scope,
layer_name='',
use_pooling=True):
with tf.name_scope(name_scope):
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = new_weights(shape, layer_name)
biases = new_bias(num_filters, layer_name)
layer = tf.add(tf.nn.conv2d(input=input, filter=weights, strides=[1,1,1,1], padding='SAME'), biases, name=layer_name)
if use_pooling:
layer = tf.nn.max_pool(value=layer,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='SAME', name=layer_name+'_max')
layer = tf.nn.relu(layer, name=layer_name+'_activation')
return layer, weights
Fully Connected Layer
def new_fc_layer(input,
num_inputs,
num_outputs,
name_scope,
layer_name='',
use_relu=True):
with tf.name_scope(name_scope):
weights = new_weights([num_inputs, num_outputs], layer_name)
biases = new_bias(num_outputs, layer_name)
layer = tf.add(tf.matmul(input, weights),biases,name=layer_name)
# layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer, layer_name+'_activation')
return layer
Custom Layer Code
with tf.name_scope(name_scope):
resh_inp = tf.reshape(input, [-1, 10, 10])
input_shape = tf.shape(resh_inp)
rows, cols = input_shape[1], input_shape[2]
d_rows, d_cols = 2, 2
subm_rows, subm_cols = rows - d_rows + 1, cols - d_cols + 1
ii, jj = tf.meshgrid(tf.range(subm_rows), tf.range(subm_cols), indexing='ij')
d_ii, d_jj = tf.meshgrid(tf.range(d_rows), tf.range(d_cols), indexing='ij')
subm_ii = ii[:, :, tf.newaxis, tf.newaxis] + d_ii
subm_jj = jj[:, :, tf.newaxis, tf.newaxis] + d_jj
subm = tf.gather_nd(resh_inp[0, :, :], tf.stack([subm_ii, subm_jj], axis=-1), name=layer_name + "_gather")
subm_sum = tf.reduce_sum(subm, axis=(2, 3), name=layer_name + "_subm_sum")
_, top_idx = tf.nn.top_k(tf.reshape(subm_sum, [-1]), tf.minimum(k, tf.size(subm_sum)), name=layer_name + "_top_idx")
top_row = top_idx // subm_cols
top_col = top_idx % subm_cols
result = tf.stack([top_row, top_col], axis=-1, name=layer_name + "_result")
result_shape = tf.shape(result)
result = tf.reshape(result, [-1, result_shape[0], result_shape[1]], name=layer_name + "_shaped_result")
Result from above code is passed as y_pred_cls in below code
cost = tf.reduce_mean(tf.square(y_true - y_pred_cls))
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
So I am training a network to classify images in tensor flow. After I trained the network I began work on trying to use it to classify other images. The goal is to import an image, feed it to the classifier and have it print the result. I am having some trouble getting that part off the ground though. Here is what I have so far. I found that having tf.argmax(y,1) gave an error. I found that changing it to 0 fixed that error. However I am not convinced that it is actually working. I tossed 2 images through the classifier and they both got the same class even though they are vastly different. Just need some perspective here. Is this valid? Or is there something wrong here that will always feed me the same class (in this case I got class 0 for both of the images I tried).
Is this even the right way to approach making predictions in tensor flow? This is just the culmination of my debugging, not sure if it is what should be done or not.
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
X_train,X_validation,y_train,y_validation=train_test_split(X_train,y_train, test_size=20,random_state=0)
X_train, y_train = shuffle(X_train, y_train)
def LeNet(x):
# Arguments used for tf.truncated_normal, randomly defines variables
for the weights and biases for each layer
mu = 0
sigma = 0.1
# SOLUTION: Layer 1: Convolutional. Input = 32x32x3. Output = 28x28x6.
conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 3, 6), mean = mu, stddev = sigma))
conv1_b = tf.Variable(tf.zeros(6))
conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b
# SOLUTION: Activation.
conv1 = tf.nn.relu(conv1)
# SOLUTION: Pooling. Input = 28x28x6. Output = 14x14x6.
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Layer 2: Convolutional. Output = 10x10x16.
conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma))
conv2_b = tf.Variable(tf.zeros(16))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
# SOLUTION: Activation.
conv2 = tf.nn.relu(conv2)
# SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Flatten. Input = 5x5x16. Output = 400.
fc0 = flatten(conv2)
# SOLUTION: Layer 3: Fully Connected. Input = 400. Output = 120.
fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))
fc1_b = tf.Variable(tf.zeros(120))
fc1 = tf.matmul(fc0, fc1_W) + fc1_b
# SOLUTION: Activation.
fc1 = tf.nn.relu(fc1)
# SOLUTION: Layer 4: Fully Connected. Input = 120. Output = 84.
fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma))
fc2_b = tf.Variable(tf.zeros(84))
fc2 = tf.matmul(fc1, fc2_W) + fc2_b
# SOLUTION: Activation.
fc2 = tf.nn.relu(fc2)
# SOLUTION: Layer 5: Fully Connected. Input = 84. Output = 43.
fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 43), mean = mu, stddev = sigma))
fc3_b = tf.Variable(tf.zeros(43))
logits = tf.matmul(fc2, fc3_W) + fc3_b
return logits
import tensorflow as tf
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 43)
EPOCHS=10
BATCH_SIZE=128
rate = 0.001
logits = LeNet(x)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_examples = len(X_train)
print("Training...")
print()
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end]
sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
validation_accuracy = evaluate(X_validation, y_validation)
print("EPOCH {} ...".format(i+1))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print()
saver.save(sess, './lenet')
print("Model saved")
import cv2
image=cv2.imread('File path')
image=cv2.resize(image,(32,32)) #classifier takes 32X32 images
image=np.array(image)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver3 = tf.train.import_meta_graph('./lenet.meta')
saver3.restore(sess, "./lenet")
pred = tf.nn.softmax(logits)
predictions = sess.run(tf.argmax(y,0), feed_dict={x: image})
print (predictions)
So what had to happen here was first clear the kernel and outputs. Somewhere along the way my placeholders got muddled up and clearing the kernel fixed that right up. Then I had to realize what really had to get done here: I had to call up the softmax function on my new data.
Like this:
pred = tf.nn.softmax(logits)
classification = sess.run(pred, feed_dict={x: image_array})