How to convert ndarray to spark dataframe for mlflow prediction? - apache-spark

I have a trained model, and test input from mnist
import mlflow
from pyspark.sql.functions import struct, col
logged_model = 'runs:/myid/myModel'
loaded_model = mlflow.pyfunc.spark_udf(spark, model_uri=logged_model,
result_type='double')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
shape of x_test is (10000, 28, 28)
However I am having trouble converting ndarray from mnist to spark dataframe so that I can do prediction for my model
mydf = spark.sparkContext.parallelize(x_test).map(lambda x:
[x.tolist()]).toDF(["Doc2Vec"])
output = mydf.withColumn('predictions', loaded_model(struct(*map(col, mydf.columns))))
when I try to output the result
output.show(10)
I got
An exception was thrown from a UDF: 'ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).'.
What am i missing here?
Here is my training model
import tensorflow as tf
import uuid
BUFFER_SIZE = 10000
BATCH_SIZE = 64
def make_datasets():
(mnist_images, mnist_labels), _ = \
tf.keras.datasets.mnist.load_data(path=str(uuid.uuid4())+'mnist.npz')
dataset = tf.data.Dataset.from_tensor_slices((
tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32),
tf.cast(mnist_labels, tf.int64))
)
dataset = dataset.repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
return dataset
def build_and_compile_cnn_model():
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax'),
])
model.compile(
loss=tf.keras.losses.sparse_categorical_crossentropy,
optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
metrics=['accuracy'],
)
return model
train_datasets = make_datasets()
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
train_datasets = train_datasets.with_options(options)
multi_worker_model = build_and_compile_cnn_model()
multi_worker_model.fit(x=train_datasets, epochs=3, steps_per_epoch=5)

Related

ValueError: Dimensions must be equal (keras)

I'm trying to train an autoencoder but have problems in reshaping my X_train to fit it to my model model().
from tensorflow import keras
from keras.layers import *
from keras.models import Model
from keras.models import Sequential
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.utils import plot_model
X_train = np.array(X_train, dtype=np.float)
X_test =np.array(X_train, dtype=np.float)
X_train = X_train.reshape(len(X_train), 100,1)
X_test = X_test.reshape(len(X_test), 100,1)
#inputs = Input(shape=(230, 1,100))
epoch = 100
batch = 128
def model():
m = Sequential()
# ##m.add(Reshape((,)))
m.add(Flatten())
m.add(Dense(512, activation='relu'))
m.add(Dense(128, activation = 'relu'))
m.add(Dense(2, activation = 'linear'))
m.add(Dense(128, activation = 'relu'))
m.add(Dense(512, activation = 'relu'))
m.add(Dense(784, activation = 'sigmoid'))
m.compile(loss='mean_squared_error', optimizer = 'rmsprop', metrics = ['accuracy'])
# Fit data to model m
m.fit(X_train, X_train, batch_size = batch, epochs = epoch)
m.summary()
#score = m.evaluate(X_test, Y_test, verbose = 0)
#print('Test loss:' score[0])
#print('Test accuracy:', score[1])
#m.summary()
mod = model()
The of dimension of my data is the following:
X_train = (523, 100,1)
X_test = (523, 100,1)
To fix your issue, change the following:
X_train = X_train.reshape((-1, 100))
X_test = X_test.reshape((-1, 100))
Delete the Flatten layer and use 100 neurons for the last layer as stated in the comments.

CNN validation accuracy high, but bad at predictions?

I am trying to build a CNN to distinguish between 3 classes which are genuine faces, printed faces, and replayed faces. I prepared the data as so:
classes = ['Genuine', 'Printed', 'Replay']
base_dir = '/Dataset'
import os
import numpy as np
import glob
import shutil
for cl in classes:
img_path = os.path.join(base_dir, cl)
images = glob.glob(img_path + '/*.jpg')
print("{}: {} Images".format(cl, len(images)))
num_train = int(round(len(images)*0.8))
train, val = images[:num_train], images[num_train:]
for t in train:
if not os.path.exists(os.path.join(base_dir, 'train', cl)):
os.makedirs(os.path.join(base_dir, 'train', cl))
shutil.move(t, os.path.join(base_dir, 'train', cl))
for v in val:
if not os.path.exists(os.path.join(base_dir, 'val', cl)):
os.makedirs(os.path.join(base_dir, 'val', cl))
shutil.move(v, os.path.join(base_dir, 'val', cl))
from tensorflow.keras.preprocessing.image import ImageDataGenerator
image_gen_train = ImageDataGenerator(
rescale=1./255,
rotation_range=45,
width_shift_range=.15,
height_shift_range=.15,
horizontal_flip=True,
zoom_range=0.5
)
batch_size = 32
IMG_SHAPE = 96
train_data_gen = image_gen_train.flow_from_directory(
batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_SHAPE,IMG_SHAPE),
class_mode='sparse'
)
I built a simple model like the following:
## Model
import tensorflow as tf
from keras import regularizers
from keras.layers.normalization import BatchNormalization
IMG_SHAPE = (96, 96, 3)
batch_size = 32
## Trainable classification head
aConv_layer = tf.keras.layers.Conv2D(576, (3, 3), padding="same",
activation="relu", input_shape= IMG_SHAPE)
aConv_layer = tf.keras.layers.Conv2D(144, (3, 3), padding="same",
activation="relu", input_shape= IMG_SHAPE)
gmaxPool_layer = tf.keras.layers.GlobalMaxPooling2D() #reduces input from 4D to 2D
maxPool_layer = tf.keras.layers.MaxPool2D(pool_size=(1, 1), strides=None,
padding='valid', data_format=None,
)
batNor_layer = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99,
epsilon=0.001,
center=True, scale=True,
beta_initializer='zeros',
gamma_initializer='ones',
moving_mean_initializer='zeros',
moving_variance_initializer='ones',
beta_regularizer=None, gamma_regularizer=None,
beta_constraint=None, gamma_constraint=None)
flat_layer = tf.keras.layers.Flatten()
dense_layer = tf.keras.layers.Dense(9, activation='softmax',
kernel_regularizer=regularizers.l2(0.01))
prediction_layer = tf.keras.layers.Dense(3, activation='softmax')
model = tf.keras.Sequential([
#base_model,
tf.keras.layers.Conv2D(576, (3, 3), padding="same", activation="relu", input_shape= IMG_SHAPE),
tf.keras.layers.Dense(288, activation='softmax', kernel_regularizer=regularizers.l2(0.01)),
tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None),
tf.keras.layers.Conv2D(144, (3, 3), padding="same", activation="relu"),
tf.keras.layers.Dense(72, activation='softmax', kernel_regularizer=regularizers.l2(0.01)),
tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None),
#
batNor_layer,
gmaxPool_layer,
tf.keras.layers.Flatten(),
#tf.keras.layers.Dropout(0.5),
prediction_layer
])
learning_rate = 0.001
## Compiles the model
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
I trained the model and got the following, which I would assume to be great results:
However, whenever I tried to predict an image with the following code, it would almost always get it wrong:
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = fn
img = image.load_img(path, target_size=(96, 96))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(fn)
print('Genuine | Printout | Replay')
print(np.argmax(classes))
How can the predictions be wrong when the validation accuracy be so high?
Here is the Codelab, if it helps.
Process the images for prediction in the same way that you processed your images for training. Specifically, rescale your images like you did with ImageDataGenerator.
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = fn
img = image.load_img(path, target_size=(96, 96))
x = image.img_to_array(img)
# Rescale image.
x = x / 255.
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(fn)
print('Genuine | Printout | Replay')
print(np.argmax(classes))
Somehow, the image generator of Keras works well when combined with fit() or fit_generator() function, but fails miserably when combined
with predict_generator() or the predict() function. The Keras predict() function generally fails when working with batch prediction.
When using Plaid-ML Keras back-end for AMD processor, I would rather loop through all test images one-by-one and get the prediction for each image in each iteration.
import os
from PIL import Image
import keras
import numpy
# code for creating dan training model is not included
print("Prediction result:")
dir = "/path/to/test/images"
files = os.listdir(dir)
correct = 0
total = 0
#dictionary to label all animal category class.
classes = {
0:'This is Cat',
1:'This is Dog',
}
for file_name in files:
total += 1
image = Image.open(dir + "/" + file_name).convert('RGB')
image = image.resize((100,100))
image = numpy.expand_dims(image, axis=0)
image = numpy.array(image)
image = image/255
pred = model.predict_classes([image])[0]
animals_category = classes[pred]
if ("cat" in file_name) and ("cat" in sign):
print(correct,". ", file_name, animals_category)
correct+=1
elif ("dog" in file_name) and ("dog" in animals_category):
print(correct,". ", file_name, animals_category)
correct+=1
print("accuracy: ", (correct/total))

Universal Sentence Encoder Error: Input 0 is incompatible with layer conv1d_6: expected ndim=3, found ndim=2

I'm worked on sentiment analysis task using universal sentence encoder embed_size=512 with CNN but have an error says: Input 0 is incompatible with layer conv1d_6: expected ndim=3, found ndim=2.
and wanna know if this is right to add universal sentence encoder with CNN in this way or not?
pickle_in=open("X.pickle","rb")
X=pickle.load(pickle_in)
X = X.tolist() #convert x to list as The embedding code works once I
converted
the pandas.series data type to list.
X = np.array(X, dtype=object)[:, np.newaxis]
pickle_in=open("Y.pickle","rb")
Y=pickle.load(pickle_in)
Y = np.asarray(pd.get_dummies(Y), dtype = np.int8)
import tensorflow as tf
import tensorflow_hub as hub
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
embed = hub.Module(module_url)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.15,
random_state = 42)
X_train, X_Val, Y_train, Y_Val = train_test_split(X_train,Y_train, test_size
= 0.15, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
print(X_Val.shape,Y_Val.shape)
type(Y_test)
embed_size = embed.get_output_info_dict()['default'].get_shape()[1].value
def UniversalEmbedding(x):
return embed(tf.squeeze(tf.cast(x, tf.string)),
signature="default", as_dict=True)["default"]
import keras
seed=7
np.random.seed(seed)
from keras.layers import Input, Dense, concatenate, Activation,
GlobalMaxPooling1D
from keras import layers
from keras.models import Model
input_text = layers.Input(shape=(1,), dtype=tf.string)
embedding = layers.Lambda(UniversalEmbedding,
output_shape=(embed_size,))(input_text)
bigram_branch = Conv1D(filters=64, kernel_size=1, padding='same',
activation='relu', strides=1)(embedding)
bigram_branch = GlobalMaxPooling1D()(bigram_branch)
trigram_branch = Conv1D(filters=64, kernel_size=2, padding='same',
activation='relu', strides=1)(embedding)
trigram_branch = GlobalMaxPooling1D()(trigram_branch)
fourgram_branch = Conv1D(filters=64, kernel_size=3, padding='same',
activation='relu', strides=1)(embedding)
fourgram_branch = GlobalMaxPooling1D()(fourgram_branch)
merged = concatenate([bigram_branch, trigram_branch, fourgram_branch],
axis=1)
merged = Dense(512, activation='relu')(merged)
merged = Dropout(0.8)(merged)
merged = Dense(2)(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[tweet_input], outputs=[output])
adam=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None,
decay=0.0, amsgrad=False)
model.compile(loss='mean_squared_error',
optimizer= adam,
metrics=['accuracy'])
model.summary()
You can not directly pass Universal Sentence Encoder to Conv1D because Conv1D expected a tensor with shape [batch, sequence, feature] while the output of Universal Sentence Encoder is [batch, feature]. It is also stated in tfhub.dev:
The input is variable length English text and the output is a 512
dimensional vector.
How can I fix this?
In my view, the easiest possible solution is to use ELMo on Tensorhub. With ELMo you can map each sentence to [batch, sequence, feature] and then feed into the Conv1D.

Make new predictions with a trained ANN - Keras

I'm new to the world of ANN and I was wondering, how do I pass a new dataset, possible a new csv, into the model that I alredy trained? I understand, for instance, that:
model.predict()
only accept arrays, and those arrays have to be of the same shape in order to work. So, how do I pass a complete new csv to generate predictions?
Here is my code, I know that probably it's gonna be a mess, but I'm working on it.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Prova_1.csv')
dataset = dataset[np.isfinite(dataset['ID'])]
X = dataset.iloc[:, 3:6].values
y = dataset.iloc[:, 6].values
# Encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 0] = labelencoder_X_1.fit_transform(X[:, 0])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [2])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]
# Splitting
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#ANN
import keras
from keras.models import Sequential
from keras.layers import Dense
# Initialising the ANN
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'relu', input_dim = 55))
# Adding the second hidden layer
classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'relu'))
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 1, epochs = 100)
# Prediction
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
You need to do all the preprocessing operations to train data for the new data. Because the model trained over your input shape and input features. Make sure your input shape of your input data same as before, unless it will not work.

Using tf.data.Dataset as training input to Keras model NOT working

I have a simple code, which DOES work, for training a Keras model in Tensorflow using numpy arrays as features and labels. If I then wrap these numpy arrays using tf.data.Dataset.from_tensor_slices in order to train the same Keras model using a tensorflow dataset, I get an error. I haven't been able to figure out why (it may be a tensorflow or keras bug, but I may also be missing something). I'm on python 3, tensorflow is 1.10.0, numpy is 1.14.5, no GPU involved.
OBS1: The possibility of using tf.data.Dataset as a Keras input is showed in https://www.tensorflow.org/guide/keras, under "Input tf.data datasets".
OBS2: In the code below, the code under "#Train with numpy arrays" is being executed, using numpy arrays. If this code is commented and the code under "#Train with tf.data datasets" is used instead, the error will be reproduced.
OBS3: In line 13, which is commented and starts with "###WORKAROUND 1###", if the comment is removed and the line is used for tf.data.Dataset inputs, the error changes, even though I can't completely understand why.
The complete code is:
import tensorflow as tf
import numpy as np
np.random.seed(1)
tf.set_random_seed(1)
print(tf.__version__)
print(np.__version__)
#Import mnist dataset as numpy arrays
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()#Import
x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
###WORKAROUND 1###y_train, y_test = (y_train.astype(dtype='float32'), y_test.astype(dtype='float32'))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1]*x_train.shape[2])) #reshaping 28 x 28 images to 1D vectors, similar to Flatten layer in Keras
batch_size = 32
#Create a tf.data.Dataset object equivalent to this data
tfdata_dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
tfdata_dataset_train = tfdata_dataset_train.batch(batch_size).repeat()
#Creates model
keras_model = tf.keras.models.Sequential([
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2, seed=1),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
#Compile the model
keras_model.compile(optimizer='adam',
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
#Train with numpy arrays
keras_training_history = keras_model.fit(x_train,
y_train,
initial_epoch=0,
epochs=1,
batch_size=batch_size
)
#Train with tf.data datasets
#keras_training_history = keras_model.fit(tfdata_dataset_train,
# initial_epoch=0,
# epochs=1,
# steps_per_epoch=60000//batch_size
# )
print(keras_training_history.history)
The error observed when using tf.data.Dataset as input is:
(...)
ValueError: Tensor conversion requested dtype uint8 for Tensor with dtype float32: 'Tensor("metrics/acc/Cast:0", shape=(?,), dtype=float32)'
During handling of the above exception, another exception occurred:
(...)
TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type uint8 of argument 'x'.
The error when removing the comment from line 13, as commented above in OBS3, is:
(...)
tensorflow.python.framework.errors_impl.InvalidArgumentError: In[0] is not a matrix
[[Node: dense/MatMul = MatMul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/dense/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sequential_input_0_0, dense/MatMul/ReadVariableOp)]]
Any help would be appreciated, including comments that you were able to reproduce the errors, so I can report the bug if it is the case.
I just upgraded to Tensorflow 1.10 to execute this code. I think that is the answer which is also discussed in the other Stackoverflow thread
This code executes but only if I remove the normalization as that line seems to use too much CPU memory. I see messages indicating that. I also reduced the cores.
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.set_random_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply(tf.contrib.data.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
keras_training_history = keras_model.fit(
training_set.make_one_shot_iterator(),
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set.make_one_shot_iterator(),
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
Installing the tf-nightly build, together with changing dtypes of some tensors (the error changes after installing tf-nightly), solved the problem, so it is an issue which (hopefully) will be solved in 1.11.
Related material: https://github.com/tensorflow/tensorflow/issues/21894
I am wondering how Keras is able to do 5 epochs when the
make_one_shot_iterator() which only supports iterating once through a
dataset?
could be given smth like iterations = len(y_train) * epochs - here shown for tf.v1
the code from Mohan Radhakrishnan still works in tf.v2 with little corrections in objects' belongings to new classes (in tf.v2) fixings - to make the code up-to-date... No more make_one_shot_iterator() needed
# >> author: Mohan Radhakrishnan
import tensorflow as tf
import tensorflow.keras
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.random.set_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply( tf.data.experimental.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch( tf.data.experimental.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
# training_set.make_one_shot_iterator() - 'PrefetchDataset' object has no attribute 'make_one_shot_iterator'
keras_training_history = keras_model.fit(
training_set,
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set,
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
not loading data locally, just easy DataFlow - that is very convinient - Thanks a lot - hope my corrections are proper

Resources