I am learning PyTorch. In some codes such as this one, I have seen there is a snippet that I do not understand how it works:
from torch_geometric.data import DataLoader, DenseDataLoader as DenseLoader
if 'adj' in train_dataset[0]:
train_loader = DenseLoader(train_dataset, self.p.batch_size, shuffle=True)
val_loader = DenseLoader(val_dataset, self.p.batch_size, shuffle=False)
test_loader = DenseLoader(test_dataset, self.p.batch_size, shuffle=False)
else:
train_loader = DataLoader(train_dataset, self.p.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, self.p.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, self.p.batch_size, shuffle=False)
First of all, what is if 'adj' in train_dataset[0] and secondly what is the difference between DenseLoader and DataLoader?
Related
I'm trying to fix the class imbalance of my dataset for a classification-task by adding augmented images. As the network didn't improve, I noticed, that I'm transforming the whole dataset while not keeping the original image.
What is the best method to fix that?
My training function looks like this (excerpt):
def train(mu,lr,batch_size,n_epochs,k,model,use_gpu,size_image,seed,num_workers,root):
set_seed(seed, use_gpu)
train_loader, test_loader, dataset_attributes = get_data(size_image,root,batch_size, num_workers)
criteria = CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=mu, nesterov=True)
best_acc = 0.0
for epoch in tqdm(range(n_epochs), desc='epoch', position=0):
t = time.time()
optimizer = update_optimizer(optimizer, lr_schedule=dataset_attributes['lr_schedule'], epoch=epoch)
loss_epoch_train, f1_epoch_train, acc_epoch_train, topk_acc_epoch_train = train_epoch(model, optimizer, train_loader,
criteria, loss_train, f1_train, acc_train,
topk_acc_train, k,
dataset_attributes['n_train'],
use_gpu)
if acc_epoch_test > best_acc:
best_acc = acc_epoch_test
save(model, optimizer, epoch, os.path.join(save_dir, 'weights_best_acc.tar'))
This in an excerpt of my get_data function:
def get_data(size_image,root,batch_size, num_workers):
transform = transforms.Compose(
[MaxCenterCrop(),
transforms.Resize(size_image),
transforms.ToTensor()])
trainset = Plantnet(root, 'images_train', transform=transform)
testset = Plantnet(root, 'images_test', transform=transform)
train_class_to_num_instances = Counter(trainset.targets)
test_class_to_num_instances = Counter(testset.targets)
...
sampler = WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
sampler=sampler,
shuffle=False, num_workers=num_workers)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=num_workers)
return trainloader, testloader, dataset_attributes
Now my idea for an easy fix would be to add a transformed dataset and concatenate it to the original one. But I think this idea would have a bad impact on performance and wouldn't really fix the problem of class imbalance.
I'm thinking that applying the tranformation on each batch would make the most sense. But how do I add this to my code?
I am trying to overfit my model on a single batch to check model integrity. I am using Keras and TensorFlow for the implementation of my model and coding style for this project.
I know how to get the single batch and overfit the model in PyTorch but don't have an idea in Keras.
to get a single batch in PyTorch I used:
images, labels = next(iter(train_dataset))
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
for epoch in range(epochs):
print(f"Epoch [{epoch}/{epochs}]")
# for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
data = data.reshape(data.shape[0], -1)
# forward
score = model(data)
loss = criterion(score, target)
print(f"Loss: {loss.item()}")
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
How to do it in keras any helping matrial?
Thank you everyone for coming here. I found a solution and here it is:
datagen = ImageDataGenerator(rescale=1 / 255.0,
rotation_range=20,
zoom_range=0.2,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.2,
horizontal_flip=True,
fill_mode="nearest"
)
# preprocessing_function=preprocess_input,
# Declare an image generator for validation & testing without generation
test_datagen = ImageDataGenerator(rescale = 1./255,)#preprocessing_function=preprocess_input
# Declare generators for training, validation, and testing from DataFrames
train_gen = datagen.flow_from_directory(directory_train,
target_size=(512, 512),
color_mode='rgb',
batch_size=BATCH_SIZE,
class_mode='binary',
shuffle=True)
val_gen = test_datagen.flow_from_directory(directory_val,
target_size=(512, 512),
color_mode='rgb',
batch_size=BATCH_SIZE,
class_mode='binary',
shuffle=False)
test_gen = test_datagen.flow_from_directory(directory_test,
target_size=(512, 512),
color_mode='rgb',
batch_size=BATCH_SIZE,
class_mode='binary',
shuffle=False)
train_images, train_labels = next(iter(train_gen))
val_images, val_labels = next(iter(val_gen))
test_images, test_labels = next(iter(val_gen))
#check shape for selected Batch
print("Length of Train images : {}".format(len(train_images)))
print("shape of Train images : {}".format(train_images.shape))
print("shape of Train labels : {}".format(train_labels.shape))
Length of Train images : 32
shape of Train images : (32, 512, 512, 3)
shape of Train labels : (32,)
history = model.fit(train_images, train_labels,
use_multiprocessing=True,
workers=16,
epochs=100,
class_weight=class_weights,
validation_data=(val_images, val_labels),
shuffle=True,
callbacks=call_backs)
I try to apply k-fold cross validation to the cnn classification problem
let say I have a carA, carB
so I made the subfolder
car/trainCross/fold0 car/trainCross/fold1
car/validCross/fold0 car/validCross/fold1
and following code
model_path = '../carPrediction/model/'+ 'saved.hdf5'
for i in range(2):
print('training->',i,' split')
train_generator = train_datagen.flow_from_directory(TRAIN_CROPPED_PATH +'fold'+str(i),
target_size=(image_size, image_size),
batch_size=batch_size,
class_mode='categorical',
seed=2019,
color_mode='rgb')
print(VALID_CROPPED_PATH+'fold'+str(i))
validation_generator = valid_datagen.flow_from_directory(
VALID_CROPPED_PATH+'fold'+str(i),
target_size=(image_size,image_size),
batch_size=batch_size,
class_mode='categorical',
seed=2019,
color_mode='rgb'
)
test_generator = test_datagen.flow_from_dataframe(
dataframe=df_test,
directory=TEST_CROPPED_PATH,
x_col='img_file',
y_col=None,
target_size= (image_size,image_size),
color_mode='rgb',
class_mode=None,
batch_size=batch_size,
shuffle=False
)
try:
model = load_model(model_path, compile=True)
except Exception as OSError:
pass
patient = 2
callbacks1 = [
EarlyStopping(monitor='val_loss', patience=patient, mode='min', verbose=1),
ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = patient / 2, min_lr=0.00001, verbose=1, mode='min'),
ModelCheckpoint(filepath=model_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min'),
]
history = model.fit_generator(
train_generator,
steps_per_epoch=get_steps(nb_train_sample, batch_size),
epochs=2,
validation_data=validation_generator,
validation_steps=get_steps(nb_validation_sample, batch_size),
verbose=1,
callbacks = callbacks1
)
but not sure in this way is correct
any thought?
Hello everyone I started training a network ana it got stuck, it did not finish the first epoch.
Here is the code I used:
top_model_weights_path = '/data/fc_model.h5'
img_width, img_height = 150, 150
-train_data_dir = '/data/train'
validation_data_dir = '/data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
print('Model loaded.')
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
top_model.load_weights(top_model_weights_path)
model = Model(inputs= model.input, outputs= top_model(model.output))
for layer in model.layers[:25]:
layer.trainable = False
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
epochs=epochs,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples)
I am using Transfer Learning. I followed this tutorial online :
Tutorial
Please help thank you.
I have used below data augmentation for memory saving.
total number of test images = 400
batch size = 128
when i check accuracy of testset using model.evaluate_generator, it is different with final validation_accuracy from last epoch of my model.
Furthermore, output of model.evaluate_generator is changed when i repeat this.
below is my code.
please help!
train_datagen = ImageDataGenerator(
rescale=1./255,)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
color_mode= "grayscale",
target_size=(img_width, img_height),
batch_size=128,
class_mode='categorical',)
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
color_mode= "grayscale",
target_size=(img_width, img_height),
batch_size=128,
class_mode='categorical')
#%%
hist = model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
nb_epoch=nb_epoch,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples)
scoreSeg = model.evaluate_generator(validation_generator, 400)
print("Accuracy = ",scoreSeg[1])