How do I reduce memory usage for deep reinforcement learning algorithms? - python-3.x

I wrote a script of DQN to play BreakoutDeterministic and ran it on my school GPU server. However, it seems that the code is taking up 97% of the total RAM memory (more than 100GB)!
I would like to know which part of the script is demanding this high usage of RAM? I used memory-profiler for 3 episodes and it seems that the memory requirement increases linearly with each time step on my laptop.
I wrote the script in PyCharm, python 3.6. My laptop 12GB RAM with no GPU but the school server is using Ubuntu, p100 GPU.
import gym
import numpy as np
import random
from collections import deque
from keras.layers import Dense, Input, Lambda, convolutional, core
from keras.models import Model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import time as dt
plt.switch_backend('agg')
def preprocess(state):
process_state = np.mean(state, axis=2).astype(np.uint8)
process_state = process_state[::2, ::2]
process_state_size = list(process_state.shape)
process_state_size.append(1)
process_state = np.reshape(process_state, process_state_size)
return process_state
class DQNAgent:
def __init__(self, env):
self.env = env
self.action_size = env.action_space.n
self.state_size = self.select_state_size()
self.memory = deque(maxlen=1000000) # specify memory size
self.gamma = 0.99
self.eps = 1.0
self.eps_min = 0.01
self.decay = 0.95
self.lr = 0.00025
self.start_life = 5 # get from environment
self.tau = 0.125 # special since 2 models to be trained
self.model = self.create_cnnmodel()
self.target_model = self.create_cnnmodel()
def select_state_size(self):
process_state = preprocess(self.env.reset())
state_size = process_state.shape
return state_size
def create_cnnmodel(self):
data_input = Input(shape=self.state_size, name='data_input', dtype='int32')
normalized = Lambda(lambda x: x/255)(data_input)
conv1 = convolutional.Convolution2D(32, 8, strides=(4, 4), activation='relu')(normalized)
conv2 = convolutional.Convolution2D(64, 4, strides=(2,2), activation='relu')(conv1)
conv3 = convolutional.Convolution2D(64, 3, strides=(1,1), activation='relu')(conv2)
conv_flatten = core.Flatten()(conv3) # flatten to feed cnn to fc
h4 = Dense(512, activation='relu')(conv_flatten)
prediction_output = Dense(self.action_size, name='prediction_output', activation='linear')(h4)
model = Model(inputs=data_input, outputs=prediction_output)
model.compile(optimizer=Adam(lr=self.lr),
loss='mean_squared_error') # 'mean_squared_error') keras.losses.logcosh(y_true, y_pred)
return model
def remember(self, state, action, reward, new_state, done): # store past experience as a pre-defined table
self.memory.append([state, action, reward, new_state, done])
def replay(self, batch_size):
if batch_size > len(self.memory):
return
all_states = []
all_targets = []
samples = random.sample(self.memory, batch_size)
for sample in samples:
state, action, reward, new_state, done = sample
target = self.target_model.predict(state)
if done:
target[0][action] = reward
else:
target[0][action] = reward + self.gamma*np.max(self.target_model.predict(new_state)[0])
all_states.append(state)
all_targets.append(target)
history = self.model.fit(np.vstack(all_states), np.vstack(all_targets), epochs=1, verbose=0)
return history
def act(self, state):
self.eps *= self.decay
self.eps = max(self.eps_min, self.eps)
if np.random.random() < self.eps:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def train_target(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = (1-self.tau)*target_weights[i] + self.tau*weights[i]
self.target_model.set_weights(target_weights) #
def main(episodes):
env = gym.make('BreakoutDeterministic-v4')
agent = DQNAgent(env, cnn)
time = env._max_episode_steps
batch_size = 32
save_model = 'y'
filepath = os.getcwd()
date = dt.strftime('%d%m%Y')
clock = dt.strftime('%H.%M.%S')
print('++ Training started on {} at {} ++'.format(date, clock))
start_time = dt.time()
tot_r = []
tot_loss = []
it_r = []
it_loss = []
tot_frames = 0
for e in range(episodes):
r = []
loss = []
state = env.reset()
state = preprocess(state)
state = state[None,:]
current_life = agent.start_life
for t in range(time):
if rend_env == 'y':
action = agent.act(state)
new_state, reward, terminal_life, life = env.step(action)
new_state = preprocess(new_state)
new_state = new_state[None,:]
if life['ale.lives'] < current_life:
reward = -1
current_life = life['ale.lives']
agent.remember(state, action, reward, new_state, terminal_life)
hist = agent.replay(batch_size)
agent.train_target()
state = new_state
r.append(reward)
tot_frames += 1
if hist is None:
loss.append(0.0)
else:
loss.append(hist.history['loss'][0])
if t%20 == 0:
print('Frame : {}, Cum Reward = {}, Avg Loss = {}, Curr Life: {}'.format(t,
np.sum(r),
round(np.mean(loss[-20:-1]),3),
current_life))
agent.model.save('{}/Mod_Fig/DQN_BO_model_{}.h5'.format(filepath, date))
agent.model.save_weights('{}/Mod_Fig/DQN_BO_weights_{}.h5'.format(filepath, date))
if current_life == 0 or terminal_life:
print('Episode {} of {}, Cum Reward = {}, Avg Loss = {}'.format(e, episodes, np.sum(r), np.mean(loss)))
break
tot_r.append(np.sum(r))
tot_loss.append(np.mean(loss))
it_r.append(r)
it_loss.append(loss)
print('Training ended on {} at {}'.format(date, clock))
run_time = dt.time() - start_time
print('Total Training time: %d Hrs %d Mins $d s' % (run_time // 3600, (run_time % 3600) // 60),
(run_time % 3600) % 60 // 1)
if save_model == 'y':
agent.model.save('{}/Mod_Fig/DQN_BO_finalmodel_{}_{}.h5'.format(filepath, date, clock))
agent.model.save_weights('{}/Mod_Fig/DQN_BO_finalweights_{}_{}.h5'.format(filepath, date, clock))
agent.model.summary()
return tot_r, tot_loss, it_r, it_loss, tot_frames
if __name__ == '__main__':
episodes = 3
total_reward, total_loss, rewards_iter, loss_iter, frames_epi = main(episodes=episodes)
Would really appreciate your comments and help on writing memory and speed efficient deep RL codes! I hope to train my DQN on breakout for 5000 episodes but the remote server only allows maximum of 48 hours of training. Thanks in advance!

It sounds like you have a memory leak.
This line
agent.remember(state, action, reward, new_state, terminal_life)
gets called 5000 * env._max_episode_steps times, and each state is a (210, 160, 3) array. The first thing to try would be to reduce the size of self.memory = deque(maxlen=1000000) # specify memory size to verify that this is the sole cause.
If you really believe you need that much capacity, you should dump self.memory to disk and keep a only a small subsample in memory.
Additionally: subsampling from deque is very slow, deque is implemented as a linked list so each subsample is O(N*M). You should consider implementing your own ring buffer for self.memory.
Alternatively: you might consider a probabilistic buffer (I don't know the proper name), where each time you would append to a full buffer, remove an element at random and append the new element. This means any (state, action, reward, ...) tuple that is encountered has a nonzero probability of being contained in the buffer, with recent tuples being more likely than older ones.

I had similar problems with memory and I still do.
The main cause of the large memory consumption are the states. But here's what I did to make it better:
Step 1: Resize them to a 84 x 84 sample using openCV. Some people instead downsample the images to 84 x 84. This results in each state having the shape (84,84,3).
Step 2: Convert these frames to grayscale (basically, black and white). This should change the shape to (84,84,1).
Step 3: Use dtype=np.uint8 for storing states. They consume minimal memory and are perfect for the pixel intensity values ranged 0-255.
Additional Info
I run my code on free Google Collab notebooks (K80 Tesla GPU and 13GB RAM), periodically saving the replay buffer to my drive.
For steps 1 and 2, consider using the OpenAI baseline Atari wrappers, as there is no point in reinventing the wheel.
You could also this snippet to check the amount of RAM used by your own program at each step, like I did:
import os
import psutil
def show_RAM_usage(self):
py = psutil.Process(os.getpid())
print('RAM usage: {} GB'.format(py.memory_info()[0]/2. ** 30))
This snippet is modified to use in my own program from the original answer

Related

How to run one (pre-trained lstm) model on CPU and second (pose estimation) model on GPU simultaneously using multithreading?

!!! My problem definition might be long and makes you boring, however, I am trying to make my case and error clear to you.
**
The definition what I am doing:**
I am implementing socket transmission in python. In client side, object detection is performed, and detected number of people and detected frames are sent to server side. The server side consists of multiple threaded classes to handle data from client and perform pose estimation to monitor and log GPU utilization. When I run code, the whole code is running while logging GPU memory usage about 46% as expected. Here, I provide below DataManager.py (to handle data from client) and ChildProcess.py (to get frames data from queue and perform pose estimation) which are part of the whole code.
DataManager.py
class DataManagerThread(Thread):
def __init__(self, queue, sock, index):
super().__init__()
self.image_queue = queue
self.server_socket = sock
self.index = index
def run(self):
data = b""
payload_size = struct.calcsize("Q")
while True:
while len(data) < payload_size:
packet = self.server_socket.recv(4*1024) # The server_socket attribute is no longer None, so this should work
if not packet:
break
data += packet
packed_msg_size = data[:payload_size]
data = data[payload_size:]
msg_size = struct.unpack("Q", packed_msg_size)[0]
while len(data) < msg_size:
data += self.server_socket.recv(4*1024)
frame_data = data[:msg_size]
data = data[msg_size:]
data_dict = pickle.loads(frame_data)
# extract frame and detection information from data dictionary
img = data_dict['frame']
people = data_dict['people']
print(f'Detected number of people: {people}')
# TODO: Passing data to the process manager thread as a queue
self.put_data_to_queue(img)
else:
print("[System] end socket")
self.put_data_to_queue("End")
def put_data_to_queue(self, image):
self.image_queue.put(image)
ChildProcess.py
import warnings
warnings.filterwarnings(action="ignore")
from multiprocessing import Process
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
import argparse
import cv2
import logging
import time
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def init_logger():
logger = logging.getLogger('TfPoseEstimator-WebCam')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
class ChildProcess(Process):
def __init__(self, queue):
self.start_time = time.time()
super().__init__()
self.image_queue = queue
self.start_time = time.time()
def __del__(self):
pass
def run(self):
args, w, h, e = self.init_model()
print("[Time]", time.time() - self.start_time)
while True:
print("[System] Run motion")
image = self.image_queue.get()
if type(image) is str:
print("[System end process]")
break
else:
self.motionTracking(args, e, w, h, image)
def motionTracking(self, args, e, w, h, decimg):
humans = e.inference(decimg, resize_to_default=(w > 0 and h > 0),
upsample_size=args.resize_out_ratio)
y1 = [0.0]
y = 0
image = TfPoseEstimator.draw_humans(decimg, humans, imgcopy=False)
for human in humans:
for i in range(len(humans)):
try:
a = human.body_parts[0]
x = a.x * image.shape[1]
y = a.y * image.shape[0]
y1.append(y)
except:
pass
if ((y - y1[len(y1) - 2]) > 30):
pass
cv2.imshow('tf-pose-estimation result', image)
_ = 0xFF & cv2.waitKey(1)
def init_model(self):
print("[System] model init")
parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
parser.add_argument('--camera', type=int, default=0)
parser.add_argument('--resize', type=str, default='0x0',
help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
parser.add_argument('--resize-out-ratio', type=float, default=4.0,
help='if provided, resize heatmaps before they are post-processed. default=1.0')
parser.add_argument('--model', type=str, default='mobilenet_thin',
help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
parser.add_argument('--show-process', type=bool, default=False,
help='for debug purpose, if enabled, speed for inference is dropped.')
parser.add_argument('--tensorrt', type=str, default="False",
help='for tensorrt process.')
args = parser.parse_args()
print('[System] initialization %s : %s' % (args.model, get_graph_path(args.model)))
w, h = model_wh(args.resize)
if w > 0 and h > 0:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt))
else:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368), trt_bool=str2bool(args.tensorrt))
print("[System] End model")
return args, w, h, e
The definition what I want to do and what error I am getting:
Here, I want to add my LSTM code to predict (people = data_dict['p#ople']) in DataManager.py file.
DataManager(added_lstm).py
# import some necessary libraries
config = tf.compat.v1.ConfigProto()
graph = tf.compat.v1.get_default_graph()
first_session = tf.compat.v1.Session(config=config)
with graph.as_default(), first_session.as_default():
with graph.as_default():
with tf.device('CPU:0'):
model = tf.keras.models.load_model('/home/tf-pose-estimation/modules/lstm_model/model1.h5', compile=False)
print(model.summary())
def make_prediction(m):
WINDOW_SIZE, alpha, theta = 5, 0.9, 3
forecast_ewma, forecast_values, theta_values, arr_of_num = [0], [], [], [1,1,1,1,1]
arr_of_num.append(m)
if len(arr_of_num)>WINDOW_SIZE:
arr_of_num = arr_of_num[1:]
if len(arr_of_num)==WINDOW_SIZE:
actual = arr_of_num[-1]
with graph.as_default(), first_session.as_default():
forecast = model.predict(np.array(arr_of_num[-WINDOW_SIZE:]).reshape(1, WINDOW_SIZE, 1))[0][0]
forecast_values.append(forecast)
a = alpha * forecast + (1 - alpha) * forecast_ewma[-1]
theta += 1 if a > 0.5 else -1
theta = min(max(theta, 0), 2)
theta_values.append(theta)
forecast_ewma.append(a)
return actual, forecast
class DataManagerThread(Thread):
def __init__(self, queue,sock, index):
super().__init__()
self.image_queue = queue
self.server_socket = sock
self.index = index
def run(self):
data = b""
payload_size = struct.calcsize("Q")
while True:
while len(data) < payload_size:
packet = self.server_socket.recv(4*1024) # The server_socket attribute is no longer None, so this should work
if not packet:
break
data += packet
packed_msg_size = data[:payload_size]
data = data[payload_size:]
msg_size = struct.unpack("Q", packed_msg_size)[0]
while len(data) < msg_size:
data += self.server_socket.recv(4*1024)
frame_data = data[:msg_size]
data = data[msg_size:]
data_dict = pickle.loads(frame_data)
# extract frame and detection information from data dictionary
img = data_dict['frame']
people = data_dict['people']
print(f'Detected number of people: {people}')
self.put_data_to_queue(img)
pred = make_prediction(people) # Added for lstm prediction
print(f"Predictions: {pred}")
def put_data_to_queue(self, image):
self.image_queue.put(image)
Here, I am adding only to DataManager.py file that loading lstm model, defining make_prediction function and use pred = make_prediction(people) inside of DataManagerThread(Thread) class to make prediction. Other code remained unchanged in this file.
When I run both models simultaneously, only lstm is predicting and pose estimation is just frozen. Also, even lstm model is forced to utilize CPU, about 84 % of GPU memory is occupied. Why? I do not know. However, my expectation is that lstm model should use cpu and pose estimation model should use gpu, and both models should bre run simultaneously.
When I run every model separately (i.e., lstm on CPU and pose estimation on GPU), they are working pretty well. Specifically, I tested LSTM model seoerately by generating random number, it worked as expected. LSTM model is trained in Tensorflow 2.5 and both models are running in Tensorflow 2.5.
Below is my PC and Env specifications:
GPU: NVIDIA GeForce RTX 2070 SUPER
Driver Version: 525
CUDA Version: 11.6
Python: 3.9.12
Tensorflow-gpu: 2.5.0
Is there possible or relevant solution for running the lstm model on CPU and the pose estimation model on GPU simultaneously using multithreading?
Any help appreciated!!!

Solving MountainCar-v0 by predicting future velocity? Why does this work?

I solved the MountainCar-v0 OpenAI gym challenge by predicting future velocity based on current action. This system uses the absolute value of velocity as the reward function.
My question is why does this work?
My hypothesis is that this is working similar to q-learning, where we are creating a map/table of all actions and their results, then the action picker just picks the highest velocity. This works since velocity is linear, so picking the highest future velocity will very likely help you reach the next highest velocity.
Here is the code. It should take less than 3 minutes to reach it's goal.
import numpy as np
import gym
from collections import deque, namedtuple
import random
import torch
import torch.nn as nn
import torch.optim as optim
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001
env = gym.make("MountainCar-v0")
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.layer1 = nn.Linear(2, 128)
self.relu1 = nn.ReLU()
self.layer2 = nn.Linear(128, 3)
def forward(self, x):
l1 = self.relu1(self.layer1(x))
output = self.layer2(l1)
return output
model = Model()
target = Model()
target.load_state_dict(model.state_dict())
mse = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
actionPoint = namedtuple("actionPoint", ["currentState", "action", "reward", "observation", "done"])
actionMap = deque()
epsilon = 1
for epoch in range(100000):
observation, info = env.reset(return_info=True)
currentState = observation
totalReward = 0
maxSpeed = 0
done = False
while not done:
if np.random.random() > epsilon:
action = model(torch.from_numpy(currentState))
action = torch.argmax(action).item()
else:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
totalReward += reward
maxSpeed = max(maxSpeed, abs(observation[1]))
if done and totalReward != -200:
print("reached goal", totalReward)
actionMap.append(actionPoint(currentState, action, reward, observation, done))
currentState = observation
if len(actionMap) > 128:
samples = random.sample(actionMap, 128)
sampleReward = torch.FloatTensor([abs(s.observation[1]) for s in samples])
sampleCurrentState = torch.from_numpy(np.array([s.currentState for s in samples]))
futurePrediction = target(sampleCurrentState)
for i, s in enumerate(samples):
futurePrediction[i][s.action] = sampleReward[i]
currentPrediction = model(sampleCurrentState)
optimizer.zero_grad()
loss = mse(currentPrediction, futurePrediction.detach())
loss.backward()
optimizer.step()
print(maxSpeed)
target.load_state_dict(model.state_dict())
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(epsilon, MIN_EPSILON)
env.close()

Evaluating Parzen window log-likelihood for GAN

I am trying to reimplement the original GAN paper by Ian Goodfellow et al. And I need to show that my implementation achieves same or similar results as the authors achieved. But I am not sure how to evaluate this metric. I took a look at their implementation but I got some funny results. In the paper they report 225 +- 2 on the MNIST for this metric, while the results I get are bellow -400000000. I thought that maybe the model is bad, but it generates really good images of MNIST digits.
Can someone tell me what am I doing wrong?
Bellow is the code which I used. I copied the part of the code from the official implementation.
Note: valid variable are images taken from the MNIST dataset.
def get_nll(x, parzen, batch_size=10):
"""
Credit: Yann N. Dauphin
"""
inds = range(x.shape[0])
n_batches = int(numpy.ceil(float(len(inds)) / batch_size))
print("N batches:", n_batches)
times = []
nlls = []
for i in range(n_batches):
begin = time.time()
nll = parzen(x[inds[i::n_batches]])
end = time.time()
times.append(end-begin)
nlls.extend(nll)
if i % 10 == 0:
print(i, numpy.mean(times), numpy.mean(nlls))
return numpy.array(nlls)
def log_mean_exp(a):
"""
Credit: Yann N. Dauphin
"""
max_ = a.max(1)
return max_ + T.log(T.exp(a - max_.dimshuffle(0, 'x')).mean(1))
def cross_validate_sigma(samples, data, sigmas, batch_size):
lls = []
for sigma in sigmas:
print("Sigma:", sigma)
parzen = theano_parzen(samples, sigma)
tmp = get_nll(data, parzen, batch_size = batch_size)
lls.append(numpy.asarray(tmp).mean())
del parzen
gc.collect()
ind = numpy.argmax(lls)
print(max(lls))
return sigmas[ind]
noise = torch.randn((10000, 100), device=device)
gen_model.eval()
gan_out = gen_model(noise)
sigma_range = numpy.logspace(-1., 0., num=10)
sigma = cross_validate_sigma(gan_out.reshape(10000,-1), valid[0:10000], sigma_range, 100)

Input shape in Keras

I am creating a deep neural network using Keras using images from the Gym library from Open AI.
I tried to reshape the images using the following code:
def reshape_dimensions(observation):
processed = np.mean(observation,2,keepdims = False)
cropped = processed[35:195]
result = cropped[::2,::2]
return result
This gives me an image of shape (80,80) but every time I try to input that shape in the first layer of the Keras network it doesn't work.
What should be the shape I should use so I can further develop the network?
Attached the whole code:
PART I retrieves the training data
import gym
import random
import numpy as np
from statistics import mean, median
from collections import Counter
### GAME VARIABLE SETTINGS ###
env = gym.make('MsPacman-v0')
env.reset()
goal_steps = 2000
score_requirement = 250
initial_games = 200
print('Options to play: ',env.unwrapped.get_action_meanings())
### DEFINE FUNCTIONS ####
def reshape_dimensions(observation):
processed = np.mean(observation,2,keepdims = False)
cropped = processed[35:195]
result = cropped[::2,::2]
return result
def initial_population():
training_data = []
scores = []
accepted_scores = []
for _ in range(initial_games):
score = 0
game_memory = []
prev_obvservation = []
for _ in range(goal_steps):
#env.render()
action = env.action_space.sample() #Take random action in the env
observation, reward, done, info = env.step(action)
reshape_observation = reshape_dimensions(observation)
if len(prev_obvservation) > 0:
game_memory.append([prev_obvservation, action])
prev_obvservation = reshape_observation
score = score + reward
if done:
break
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
if data[1] == 0:
output = [1,0,0,0,0,0,0,0,0]
elif data[1] == 1:
output = [0,1,0,0,0,0,0,0,0]
elif data[1] == 2:
output = [0,0,1,0,0,0,0,0,0]
elif data[1] == 3:
output = [0,0,0,1,0,0,0,0,0]
elif data[1] == 4:
output = [0,0,0,0,1,0,0,0,0]
elif data[1] == 5:
output = [0,0,0,0,0,1,0,0,0]
elif data[1] == 6:
output = [0,0,0,0,0,0,1,0,0]
elif data[1] == 7:
output = [0,0,0,0,0,0,0,1,0]
elif data[1] == 8:
output = [0,0,0,0,0,0,0,0,1]
training_data.append([data[0],output])
env.reset()
scores.append(score)
print('Average accepted scores:', mean(accepted_scores))
print('Median accepted scores:', median(accepted_scores))
print(Counter(accepted_scores))
return training_data
### RUN CODE ###
training_data = initial_population()
np.save('data_for_training_200.npy', training_data)
PART II trains the model
import gym
import random
import numpy as np
import keras
from statistics import mean, median
from collections import Counter
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
### LOAD DATA ###
raw_training_data = np.load("data_for_training_200.npy")
training_data = [i[0:2] for i in raw_training_data]
print(np.shape(training_data))
### DEFINE FUNCTIONS ###
def neural_network_model():
network = Sequential()
network.add(Dense(100, activation = 'relu', input_shape = (80,80)))
network.add(Dense(9,activation = 'softmax'))
optimizer = Adam(lr = 0.001)
network.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics=['accuracy'])
return network
def train_model(training_data):
X = [i[0] for i in training_data]
y = [i[1] for i in training_data]
#X = np.array([i[0] for i in training_data])
#y = np.array([i[1] for i in training_data])
print('shape of X: ', np.shape(X))
print('shape of y: ', np.shape(y))
early_stopping_monitor = EarlyStopping(patience = 3)
model = neural_network_model()
model.fit(X, y, epochs = 20, callbacks = [early_stopping_monitor])
return model
train_model(training_data = training_data)
It seems like you are pre-processing individual images correctly but putting them inside a list instead of an input tensor. From the error message you have a list of 36859 (80,80) arrays while you would like to have a single array of shape (36859, 80, 80). You have the code that does this commented out X = np.array([i[0] for i in training_data]), you have to ensure that every i[0] is of same shape (80,80) for this to work.

Incremental PCA

I've never used incremental PCA which exists in sklearn and I'm a bit confused about it's parameters and not able to find a good explanation of them.
I see that there is batch_size in the constructor, but also, when using partial_fit method you can again pass only a part of your data, I've found the following way:
n = df.shape[0]
chunk_size = 100000
iterations = n//chunk_size
ipca = IncrementalPCA(n_components=40, batch_size=1000)
for i in range(0, iterations):
ipca.partial_fit(df[i*chunk_size : (i+1)*chunk_size].values)
ipca.partial_fit(df[iterations*chunk_size : n].values)
Now, what I don't understand is the following - when using partial fit, does the batch_size play any role at all, or not? And how are they related?
Moreover, if both are considered, how should I change their values properly, when wanting to increase the precision while increasing memory footprint (and the other way around, decrease the memory consumption for the price of decreased accuracy)?
The docs say:
batch_size : int or None, (default=None)
The number of samples to use for each batch. Only used when calling fit...
This param is not used within partial_fit, where the batch-size is controlled by the user.
Bigger batches will increase memory-consumption, smaller ones will decrease it.
This is also written in the docs:
This algorithm has constant memory complexity, on the order of batch_size, enabling use of np.memmap files without loading the entire file into memory.
Despite some checks and parameter-heuristics, the whole fit-function looks like this:
for batch in gen_batches(n_samples, self.batch_size_):
self.partial_fit(X[batch], check_input=False)
Here is some an incremental PCA code based on https://github.com/kevinhughes27/pyIPCA which is an implementation of CCIPCA method.
import scipy.sparse as sp
import numpy as np
from scipy import linalg as la
import scipy.sparse as sps
from sklearn import datasets
class CCIPCA:
def __init__(self, n_components, n_features, amnesic=2.0, copy=True):
self.n_components = n_components
self.n_features = n_features
self.copy = copy
self.amnesic = amnesic
self.iteration = 0
self.mean_ = None
self.components_ = None
self.mean_ = np.zeros([self.n_features], np.float)
self.components_ = np.ones((self.n_components,self.n_features)) / \
(self.n_features*self.n_components)
def partial_fit(self, u):
n = float(self.iteration)
V = self.components_
# amnesic learning params
if n <= int(self.amnesic):
w1 = float(n+2-1)/float(n+2)
w2 = float(1)/float(n+2)
else:
w1 = float(n+2-self.amnesic)/float(n+2)
w2 = float(1+self.amnesic)/float(n+2)
# update mean
self.mean_ = w1*self.mean_ + w2*u
# mean center u
u = u - self.mean_
# update components
for j in range(0,self.n_components):
if j > n: pass
elif j == n: V[j,:] = u
else:
# update the components
V[j,:] = w1*V[j,:] + w2*np.dot(u,V[j,:])*u / la.norm(V[j,:])
normedV = V[j,:] / la.norm(V[j,:])
normedV = normedV.reshape((self.n_features, 1))
u = u - np.dot(np.dot(u,normedV),normedV.T)
self.iteration += 1
self.components_ = V / la.norm(V)
return
def post_process(self):
self.explained_variance_ratio_ = np.sqrt(np.sum(self.components_**2,axis=1))
idx = np.argsort(-self.explained_variance_ratio_)
self.explained_variance_ratio_ = self.explained_variance_ratio_[idx]
self.components_ = self.components_[idx,:]
self.explained_variance_ratio_ = (self.explained_variance_ratio_ / \
self.explained_variance_ratio_.sum())
for r in range(0,self.components_.shape[0]):
d = np.sqrt(np.dot(self.components_[r,:],self.components_[r,:]))
self.components_[r,:] /= d
You can test it with
import pandas as pd, ccipca
df = pd.read_csv('iris.csv')
df = np.array(df)[:,:4].astype(float)
pca = ccipca.CCIPCA(n_components=2,n_features=4)
S = 10
print df[0, :]
for i in range(150): pca.partial_fit(df[i, :])
pca.post_process()
The resulting eigenvectors / values will not exaactly be the same as the batch PCA. Results are approximate, but they are useful.

Resources