I am new with using unsupervised CNN model in python. I am trying to use CNN model for image classification with unsupervised spectrogram input images. Each image is of size 523 width and 393 height. And I have tried the following code
X_data = []
files = glob.glob ("C:/train/*.png")
for myFile in files:
image = cv2.imread (myFile)
image_resized = misc.imresize(image, (523,393))
image_resi = misc.imresize(image_resized, (28, 28))
assert image_resized.shape == (523,393, 3), "img %s has shape %r" % (myFile, image_resized.shape)
X_data.append (image_resi)
X_datatest = []
files = glob.glob ("C:/test/*.png")
for myFile in files:
image = cv2.imread (myFile)
image_resized = misc.imresize(image, (523,393))
image_resi = misc.imresize(image_resized, (28, 28))
assert image_resized.shape == (523,393, 3), "img %s has shape %r" % (myFile, image_resized.shape)
X_datatest.append (image_resi)
X_data = np.array(X_data)
X_datatest = np.array(X_datatest)
X_data= X_data.astype('float32') / 255.
X_datatest = X_datatest.astype('float32') / 255.
X_data = np.reshape(X_data, (len(X_data), 28, 28, 3)) # adapt this if using `channels_first` image data format
X_datatest = np.reshape(X_datatest, (len(X_datatest), 28, 28, 3)) # adapt this if using `channels_first` image data format
noise_factor = 0.5
x_train_noisy = X_data + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=X_data.shape)
x_test_noisy = X_datatest + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=X_datatest.shape)
x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)
input_img = Input(shape=(28, 28, 3)) # adapt this if using `channels_first` image data format
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# at this point the representation is (7, 7, 32)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'] )
autoencoder.fit(x_train_noisy, X_data,
epochs=100,
batch_size=128,
verbose = 2,
validation_data=(x_test_noisy, X_datatest),
callbacks=[TensorBoard(log_dir='/tmp/tb', histogram_freq=0, write_graph=False)])
I have tried to make noise and fed it as labels because i didn't have labels because this is an unsupervised spectrogram data. But the output is 33% only for the accuracy. I don't know why. Can anyone help me with this and try to make me understand the numbers of filters, kernels and the resize with 28*28 based on what? And why we just use the image size which is here 523 width and 393 height?
Related
I am a newbie in CNN and I am trying the code the Deconvolution (to generate feature maps) in MNIST database (because it's the simplest one to learn for a beginner). I want my model to generate feature maps at the end.The idea is to implement the paper Saliency Detection Via Dense Convolution Network to some extent.
Here is the complete code that I am trying to run:
import keras
from keras.datasets import mnist
import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, Activation, Reshape
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D
from keras.initializers import RandomNormal
init = RandomNormal(mean = 0., stddev = 0.02)
def GeneratorDeconv(image_size = 28):
L = int(image_size)
inputs = Input(shape = (100, ))
x = Dense(512*int(L/16)**2)(inputs) #shape(512*(L/16)**2,)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Reshape((int(L/16), int(L/16), 512))(x) # shape(L/16, L/16, 512)
x = Conv2DTranspose(256, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x) # shape(L/8, L/8, 256)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(128, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x) # shape(L/4, L/4, 128)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(64, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x) # shape(L/2, L/2, 64)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(3, (4, 4), strides= (2, 2),
kernel_initializer = init,
padding = 'same')(x) # shape(L, L, 3)
images = Activation('tanh')(x)
model = Model(inputs = inputs, outputs = images)
model.summary()
return model
batch_size = 128
num_classes = 10
epochs = 1
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = GeneratorDeconv()
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
The function def GeneratorDeconv(image_size = 28): I picked from ProgramCreek Python
Now, I am confused that how can I embed it into my own custom model. Up to model.compile(...) the program runs okay. But at model.fit(...) , it gives error:
ValueError: Error when checking input: expected input_2 to have 2
dimensions, but got array with shape (60000, 28, 28, 1)
I don't know how to resolve the issues. Please help.
The input to your model is:
inputs = Input(shape = (100, ))
This will take a vector in the shape of (samples, 100), so it expects a 2D input.
However:
print('x_train shape:', x_train.shape)
>>>x_train shape: (60000, 28, 28, 1)
You are inputting a 4D array, when you specified that your input took a 2D one. That is what is causing the error.
I made some edits to your architecture so the shapes match up and it actually trains:
def GeneratorDeconv(image_size = 28):
L = int(image_size)
inputs = Input(shape = (28, 28,1))
x = Dense(512*int(L/16)**2)(inputs)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(256, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(128, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(64, (4, 4), strides = (2, 2),
kernel_initializer = init,
padding = 'same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2DTranspose(3, (4, 4), strides= (2, 2),
kernel_initializer = init,
padding = 'same')(x)
x = Flatten()(x)
x = Dense(10)(x)
images = Activation('tanh')(x)
model = Model(inputs = inputs, outputs = images)
model.summary()
return model
hi I am building a image classifier for one-class classification in which i've used autoencoder while running this model I am getting this error (ValueError: Layer conv2d_3 was called with an input that isn't a symbolic tensor. Received type: . Full input: [(128, 128, 3)]. All inputs to the layer should be tensors.)
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:376]=0
names = ['cat']
Y = np_utils.to_categorical(labels, num_class)
input_shape=img_data[0].shape
x,y = shuffle(img_data,Y, random_state=2)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_shape)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_shape, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
autoencoder.fit(X_train, X_train,
epochs=50,
batch_size=32,
shuffle=True,
validation_data=(X_test, X_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
Here:
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_shape)
A shape is not a tensor.
Do this:
from keras.layers import *
inputTensor = Input(input_shape)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(inputTensor)
Hint about autoencoders
You should separate the encoder and decoder as individual models. Later you will probably want to work with only one of them.
Encoder:
inputTensor = Input(input_shape)
x = ....
encodedData = MaxPooling2D((2, 2), padding='same')(x)
encoderModel = Model(inputTensor,encodedData)
Decoder:
encodedInput = Input((4,4,8))
x = ....
decodedData = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
decoderModel = Model(encodedInput,decodedData)
Autoencoder:
autoencoderInput = Input(input_shape)
encoded = encoderModel(autoencoderInput)
decoded = decoderModel(encoded)
autoencoderModel = Model(autoencoderInput,decoded)
I have images of shape 391 x 400. I attempted to use the autoencoder as described here.
Specifically, I have used the following code:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
input_img = Input(shape=(391, 400, 1)) # adapt this if using `channels_first` image data format
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
I am getting the following:
ValueError: Error when checking target: expected conv2d_37 to have shape (None, 392, 400, 1) but got array with shape (500, 391, 400, 1)
What I need: a layer that would drop/crop/reshape the last layer from 392 x 400 to 391 x 400.
Thank you for any help.
There's a layer called Cropping2D. To crop the last layer from 392 x 400 to 391 x 400, you can use it by:
cropped = Cropping2D(cropping=((1, 0), (0, 0)))(decoded)
autoencoder = Model(input_img, cropped)
The tuple ((1, 0), (0, 0)) means to crop 1 row from the top. If you want to crop from bottom, use ((0, 1), (0, 0)) instead. You can see the documentation for more detailed description about the cropping argument.
I am trying to fit a encoder-decoder-network in Keras which predicts images from the MNIST data set in a very good way. Now, I have changed the image input so that apples are shown. As a reference here is a link:
http://farm3.static.flickr.com/2133/2203251029_008e350792.jpg
I am converting converted the image to grayscale and resizing them to 28x28. However, when I now run the same network as for the MNIST data set then I am getting only a black image as a prediction. Do you have any ideas how I could improve my network?
Here is my code of the network:
input_img = Input(shape=(28, 28, 1)) # adapt this if using `channels_first` image data format
x = Conv2D(32, (3, 3), strides = (2,2),padding='valid')(input_img)
x =BatchNormalization()(x)
x = Activation('relu')(x)
#x = BatchNormalization()(x)
x = Conv2D(16, (3, 3), strides = (2,2),padding='valid')(x)
x =BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(8, (2, 2), strides = (2,2), padding='valid')(x)
x =BatchNormalization()(x)
encoded = Activation('relu')(x)
x = Conv2D(8, (1, 1), padding='valid')(encoded)
x =BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((3, 3))(x)
x = Conv2D(16, (2, 2), padding='valid')(x)
x =BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3,3))(x)
x =BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (2, 2), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
How can I improve my network? Any hints are much appreciated!
Cheers,
Andi
I am using Keras autoencodes with Theano backend. And want to make autoencode for 720x1080 RGB images.
This is my code
from keras.datasets import mnist
import numpy as np
from keras.layers import Input, LSTM, RepeatVector, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from PIL import Image
x_train = []
x_train_noisy = []
for i in range(5,1000):
image = Image.open('data/trailerframes/frame' + str(i) + '.jpg', 'r')
x_train.append(np.array(image))
image = Image.open('data/trailerframes_avg/frame' + str(i) + '.jpg', 'r')
x_train_noisy.append(np.array(image))
x_train = np.array(x_train)
x_train = x_train.astype('float32') / 255.
x_train_noisy = np.array(x_train_noisy)
x_train_noisy = x_train_noisy.astype('float32') / 255.
input_img = Input(shape=(720, 1080, 3))
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), data_format="channels_last", activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), data_format="channels_last", activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), data_format="channels_last", activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
autoencoder.fit(x_train_noisy, x_train,
epochs=10,
batch_size=128,
shuffle=True,
validation_data=(x_train_noisy, x_train))
But it gives me an error
ValueError: Error when checking input: expected input_7 to have shape (None, 720, 1080, 3) but got array with shape (995, 720, 1280, 3)
Input error:
As simple as:
You defined your input as (720,1080,3)
You're trying to trian your model with data in the form (720,1280,3)
One of them is wrong, and I think it's a typo in the input:
#change 1080 for 1280
input_img = Input(shape=(720, 1280, 3))
Output error (target):
Now, your target data is shaped like (720,1280,3), and your last layer outputs (720,1280,1)
A simple fix is:
decoded = Conv2D(3, (3, 3), data_format="channels_last", activation='sigmoid', padding='same')(x)
Using the encoder:
After training that model, you can create submodels for using only the encoder or the decoder:
encoderModel = Model(input_img, decoded)
decoderInput = Input((shape of the encoder output))
decoderModel = Model(decoderInput,decoded))
These two models will share the exact same weights of the entire model, training one model will affect all three models.
For using them without training, you can use model.predict(data), which will give you the results without training.