I am trying to train some models in parallel using a loop. However it hangs and does not proceeds with the .fit method.
def crossover_during_training(pair):
print("FOR PAIR NUMBER " + str(pair))
for index in [1, 5, 10, 15]:
print("one")
model = model_keras(x_train, x_test, y_train, y_test, 0)
model_one = model_keras(x_train, x_test, y_train, y_test, pair)
model_two = model_keras(x_train, x_test, y_train, y_test, pair+100)
print(model, model_one, model_two)
print(x_train.shape)
model_information_parent_one = model_one.fit(x_train, y_train, epochs=index,
batch_size=128, verbose=True, validation_data=(x_test, y_test))
print(model_information_parent_one)
weights_nn_one = model_one.get_weights()
model_information_parent_two = model_two.fit(x_train, y_train, epochs=index,
batch_size=128, verbose=True, validation_data=(x_test, y_test))
weights_nn_two = model_two.get_weights()
print("two")
This is how I simply use the multiprocessing module
all_args = [pair for pair in range(2)]
pool = Pool(2)
results = pool.map(crossover_during_training, all_args)
The crossover_during_training function runs but never goes past the model.fit.. In other words it never reaches the print("two") bit.
Is there anything I am doing wrong in calling the fit method within a processor?
Related
from sklearn.model_selection import train_test_split
import lazypredict
from lazypredict.Supervised import LazyClassifier
y = np.array(skin_new_df['diagnostic'])
X = np.array(skin_new_df.drop(['diagnostic'], axis=1))
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)
clf = LazyClassifier(verbose=0,
ignore_warnings=True,
custom_metric = None)
models,predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
I run this code and get empty frame at the output
(2298, 25)
(2298,)
100%|██████████| 29/29 [00:08<00:00, 3.61it/s]
Accuracy Balanced Accuracy ROC AUC F1 Score Time Taken
Model
I want to get all models accuracy
I am trying to optimize hyperparameters for the IMBD dataset using keras hyperas but I am getting an error. I used this (https://www.kaggle.com/kt66nf/hyperparameter-optimization-using-keras-hyperas) code as a reference
CODE
def vectorize_sequences(sequences, dimension=10000):
# Create an all-zero matrix of shape (len(sequences), dimension)
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1. # set specific indices of results[i] to 1s
return results
def data():
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
x_train = vectorize_sequences(train_data) #vectorized training data
x_test = vectorize_sequences(test_data)
y_train = np.asarray(train_labels).astype('float32') # vectorized labels
y_test = np.asarray(test_labels).astype('float32')
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(Dropout({{uniform(0.5, 1)}}))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(Dropout({{uniform(0.5, 1)}}))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', metrics=['accuracy'],
optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})
model.fit(x_train, y_train,
batch_size={{choice([16, 32, 64])}},
epochs={{choice([25, 50, 75, 100])}},
validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:', acc)
return {'loss': -acc, 'status': STATUS_OK, 'model': model}
***then I loaded google drive and set the path to the notebook
best_run, best_model = optim.minimize(model= create_model,
data=data,
max_evals=15,
algo=tpe.suggest,
notebook_name='Copy of imbd', #name of the notebook
trials= Trials())
this last code is where I got the error
"File "", line 182
]
^
SyntaxError: invalid syntax"
I have a dataset with 100 samples, I want to split it into 75%, 25%, 25% for both Train Validate, and Test respectively, then I want to do that again with different ratios such as 80%, 10%, 10%.
For this purpose, I was using the code down, but I think that it's not splitting the data correctly on the second step, because it will split the data from 85% to (85% x 85%), and (15% x 15%).
My question is that:
Is there a nice clear way to do the splitting in the correct way for any given ratios?
from sklearn.model_selection import train_test_split
# Split Train Test Validate
X_, X_val, Y_, Y_val = train_test_split(X, Y, test_size=0.15, random_state=42)
X_train, X_test, Y_train, Y_test = train_test_split(X_, Y_, test_size=0.15, random_state=42)
You could always do it manually. A bit messy but you can create a function
def my_train_test_split(X, y, ratio_train, ratio_val, seed=42):
idx = np.arange(X.shape[0])
np.random.seed(seed)
np.random.shuffle(idx)
limit_train = int(ratio_train * X.shape[0])
limit_val = int((ratio_train + ratio_val) * X.shape[0])
idx_train = idx[:limit_train]
idx_val = idx[limit_train:limit_val]
idx_test = idx[limit_val:]
X_train, y_train = X[idx_train], y[idx_train]
X_val, y_val = X[idx_val], y[idx_val]
X_test, y_test = X[idx_test], y[idx_test]
return X_train, X_val, X_test, y_train, y_val, y_test
Ratio test is assumed to be 1-(ratio_train+ratio_val).
I am preparing input to feed into a Keras Neural network for a multiclass problem as:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.06, random_state=42)
After having trained the model, I try to run the following lines to obtain a prediction that reflects the original class names:
y_pred = model.predict_classes(X_test)
y_pred = encoder.inverse_transform(y_pred)
y_test = np.argmax(y_test, axis = 1)
y_test = encoder.inverse_transform(y_test)
However, I obtain surpisingly low levels of accuracy (0.36), as oppoes to training and validations, that reach 0.98. Is this the right way of transforming classes back into the original labels?
I compute accuracies as:
# For training
history.history['acc']
# For testing
accuracy_score(y_test, y_pred)
I am currently using this code for training:
for x, y in generator.flow(x_train, y_train, batch_size=10240):
fit = model.fit(x, y[:, 0],
batch_size=1024,
epochs=10,
verbose=1,
validation_split=.3,
shuffle=True,
callbacks=[checkpoint]
)
How can I convert it to use model.fit_generator() and behave the same way?
gen = generator.flow(x_train, y_train, batch_size=10240)
model.fit_generator(gen, steps_per_epoch=number_of_samples//batch_size, ...)