python- tf.write_file does ot work in tensorflow - python-3.x

I have a code that train an object coordinate for object detection.
I used CNN network and the output layer is regression layer (called bound_box_output) that return (x0,y0, height, width) for an object in an image. After this layer I try to save the image directly before loss step.
i = 0
image_decoded = tf.image.decode_jpeg(tf.read_file('3.jpg'), channels=3)
cropped = tf.image.crop_to_bounding_box(image = image_decoded,
offset_height = tf.cast(bound_box_output[i,0], tf.int32),
offset_width = tf.cast(bound_box_output[i,1], tf.int32),
target_height = tf.cast(bound_box_output[i,2], tf.int32),
target_width = tf.cast(bound_box_output[i,3], tf.int32))
enc = tf.image.encode_jpeg(cropped)
fname = tf.constant('4.jpeg')
fwrite = tf.write_file(fname, enc)
and in tf.train.SessionRunHook I run it
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss)
def after_run(self, run_context, run_values):
if self._step % LOG_FREQUENCY == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = LOG_FREQUENCY * BATCH_SIZE / duration
sec_per_batch = float(duration / LOG_FREQUENCY)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
if self._step == MAX_STEPS-1:
loss_value = run_values.results
print("The final value of loss is:: ")
print(loss_value)
print(fwrite)
tf.train.SessionRunArgs(fwrite)
the problem is that it does not save the '4.jpeg' image in a specific folder
Note: I use tensorflow 1.1.3 and python3.5

TLDR; Substitute tf.train.SessionRunArgs(fwrite) with run_context.session.run(fwrite).
SessionRunArgs doesn't actually run the supplied operation. SessionRunArgs are returned from the before_run() call. Their role is to add arguments to the next session.run() call.
if self._step == MAX_STEPS-1:
loss_value = run_values.results
print("The final value of loss is:: ")
print(loss_value)
print(fwrite)
tf.train.SessionRunArgs(fwrite) # problematic line
You are attempting to run the fwrite operation in the end of after_run(). However, it merely instantiates the SessionRunArgs object.
One option to achieve the desired behavior is to take advantage of the run_context argument supplied to after_run(). run_context is of type SessionRunContext, the type that contains a session reference.
run_context.session.run(fwrite) should do the trick for you.

Related

How to run one (pre-trained lstm) model on CPU and second (pose estimation) model on GPU simultaneously using multithreading?

!!! My problem definition might be long and makes you boring, however, I am trying to make my case and error clear to you.
**
The definition what I am doing:**
I am implementing socket transmission in python. In client side, object detection is performed, and detected number of people and detected frames are sent to server side. The server side consists of multiple threaded classes to handle data from client and perform pose estimation to monitor and log GPU utilization. When I run code, the whole code is running while logging GPU memory usage about 46% as expected. Here, I provide below DataManager.py (to handle data from client) and ChildProcess.py (to get frames data from queue and perform pose estimation) which are part of the whole code.
DataManager.py
class DataManagerThread(Thread):
def __init__(self, queue, sock, index):
super().__init__()
self.image_queue = queue
self.server_socket = sock
self.index = index
def run(self):
data = b""
payload_size = struct.calcsize("Q")
while True:
while len(data) < payload_size:
packet = self.server_socket.recv(4*1024) # The server_socket attribute is no longer None, so this should work
if not packet:
break
data += packet
packed_msg_size = data[:payload_size]
data = data[payload_size:]
msg_size = struct.unpack("Q", packed_msg_size)[0]
while len(data) < msg_size:
data += self.server_socket.recv(4*1024)
frame_data = data[:msg_size]
data = data[msg_size:]
data_dict = pickle.loads(frame_data)
# extract frame and detection information from data dictionary
img = data_dict['frame']
people = data_dict['people']
print(f'Detected number of people: {people}')
# TODO: Passing data to the process manager thread as a queue
self.put_data_to_queue(img)
else:
print("[System] end socket")
self.put_data_to_queue("End")
def put_data_to_queue(self, image):
self.image_queue.put(image)
ChildProcess.py
import warnings
warnings.filterwarnings(action="ignore")
from multiprocessing import Process
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
import argparse
import cv2
import logging
import time
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
def init_logger():
logger = logging.getLogger('TfPoseEstimator-WebCam')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
class ChildProcess(Process):
def __init__(self, queue):
self.start_time = time.time()
super().__init__()
self.image_queue = queue
self.start_time = time.time()
def __del__(self):
pass
def run(self):
args, w, h, e = self.init_model()
print("[Time]", time.time() - self.start_time)
while True:
print("[System] Run motion")
image = self.image_queue.get()
if type(image) is str:
print("[System end process]")
break
else:
self.motionTracking(args, e, w, h, image)
def motionTracking(self, args, e, w, h, decimg):
humans = e.inference(decimg, resize_to_default=(w > 0 and h > 0),
upsample_size=args.resize_out_ratio)
y1 = [0.0]
y = 0
image = TfPoseEstimator.draw_humans(decimg, humans, imgcopy=False)
for human in humans:
for i in range(len(humans)):
try:
a = human.body_parts[0]
x = a.x * image.shape[1]
y = a.y * image.shape[0]
y1.append(y)
except:
pass
if ((y - y1[len(y1) - 2]) > 30):
pass
cv2.imshow('tf-pose-estimation result', image)
_ = 0xFF & cv2.waitKey(1)
def init_model(self):
print("[System] model init")
parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
parser.add_argument('--camera', type=int, default=0)
parser.add_argument('--resize', type=str, default='0x0',
help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
parser.add_argument('--resize-out-ratio', type=float, default=4.0,
help='if provided, resize heatmaps before they are post-processed. default=1.0')
parser.add_argument('--model', type=str, default='mobilenet_thin',
help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
parser.add_argument('--show-process', type=bool, default=False,
help='for debug purpose, if enabled, speed for inference is dropped.')
parser.add_argument('--tensorrt', type=str, default="False",
help='for tensorrt process.')
args = parser.parse_args()
print('[System] initialization %s : %s' % (args.model, get_graph_path(args.model)))
w, h = model_wh(args.resize)
if w > 0 and h > 0:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt))
else:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368), trt_bool=str2bool(args.tensorrt))
print("[System] End model")
return args, w, h, e
The definition what I want to do and what error I am getting:
Here, I want to add my LSTM code to predict (people = data_dict['p#ople']) in DataManager.py file.
DataManager(added_lstm).py
# import some necessary libraries
config = tf.compat.v1.ConfigProto()
graph = tf.compat.v1.get_default_graph()
first_session = tf.compat.v1.Session(config=config)
with graph.as_default(), first_session.as_default():
with graph.as_default():
with tf.device('CPU:0'):
model = tf.keras.models.load_model('/home/tf-pose-estimation/modules/lstm_model/model1.h5', compile=False)
print(model.summary())
def make_prediction(m):
WINDOW_SIZE, alpha, theta = 5, 0.9, 3
forecast_ewma, forecast_values, theta_values, arr_of_num = [0], [], [], [1,1,1,1,1]
arr_of_num.append(m)
if len(arr_of_num)>WINDOW_SIZE:
arr_of_num = arr_of_num[1:]
if len(arr_of_num)==WINDOW_SIZE:
actual = arr_of_num[-1]
with graph.as_default(), first_session.as_default():
forecast = model.predict(np.array(arr_of_num[-WINDOW_SIZE:]).reshape(1, WINDOW_SIZE, 1))[0][0]
forecast_values.append(forecast)
a = alpha * forecast + (1 - alpha) * forecast_ewma[-1]
theta += 1 if a > 0.5 else -1
theta = min(max(theta, 0), 2)
theta_values.append(theta)
forecast_ewma.append(a)
return actual, forecast
class DataManagerThread(Thread):
def __init__(self, queue,sock, index):
super().__init__()
self.image_queue = queue
self.server_socket = sock
self.index = index
def run(self):
data = b""
payload_size = struct.calcsize("Q")
while True:
while len(data) < payload_size:
packet = self.server_socket.recv(4*1024) # The server_socket attribute is no longer None, so this should work
if not packet:
break
data += packet
packed_msg_size = data[:payload_size]
data = data[payload_size:]
msg_size = struct.unpack("Q", packed_msg_size)[0]
while len(data) < msg_size:
data += self.server_socket.recv(4*1024)
frame_data = data[:msg_size]
data = data[msg_size:]
data_dict = pickle.loads(frame_data)
# extract frame and detection information from data dictionary
img = data_dict['frame']
people = data_dict['people']
print(f'Detected number of people: {people}')
self.put_data_to_queue(img)
pred = make_prediction(people) # Added for lstm prediction
print(f"Predictions: {pred}")
def put_data_to_queue(self, image):
self.image_queue.put(image)
Here, I am adding only to DataManager.py file that loading lstm model, defining make_prediction function and use pred = make_prediction(people) inside of DataManagerThread(Thread) class to make prediction. Other code remained unchanged in this file.
When I run both models simultaneously, only lstm is predicting and pose estimation is just frozen. Also, even lstm model is forced to utilize CPU, about 84 % of GPU memory is occupied. Why? I do not know. However, my expectation is that lstm model should use cpu and pose estimation model should use gpu, and both models should bre run simultaneously.
When I run every model separately (i.e., lstm on CPU and pose estimation on GPU), they are working pretty well. Specifically, I tested LSTM model seoerately by generating random number, it worked as expected. LSTM model is trained in Tensorflow 2.5 and both models are running in Tensorflow 2.5.
Below is my PC and Env specifications:
GPU: NVIDIA GeForce RTX 2070 SUPER
Driver Version: 525
CUDA Version: 11.6
Python: 3.9.12
Tensorflow-gpu: 2.5.0
Is there possible or relevant solution for running the lstm model on CPU and the pose estimation model on GPU simultaneously using multithreading?
Any help appreciated!!!

Gradient is equal to 'None'

I have two networks. The output of the first network is the input to the other. In order to calculate the loss for the second network, I use vanilla policy gradient. I want to backpropagate this loss into the first network. After checking if the gradeints has changed, I see that they are all none.
I first load the first network (a pre-trained autoencoer in my network this way):
def load_checkpoint(filepath, model):
checkpoint = torch.load(filepath)
model.load_state_dict(checkpoint['state_dict'])
for parameter in model.parameters():
parameter.requires_grad = True
model.train()
return model
Then I define the optimizers for both networks this way:
class MultipleOptimizer(object):
def __init__(self, *op):
self.optimizers = op
def zero_grad(self):
for op in self.optimizers:
op.zero_grad()
def step(self):
for op in self.optimizers:
op.step()
opt = MultipleOptimizer(SGD(model.parameters(), lr=1, momentum=0.9), Adam(logits_net.parameters(), lr=lr))
the reward function is:
#Reward function
def reward(x, act):
#print('action', act)
#print('x type', type(x))
km = KMeans(act, n_init=20, n_jobs=4)
y_pred = km.fit_predict(x.detach().cpu().numpy())# seems we can only get a centre from batch
#print('k-means output type', type(y_pred))
sil_score = sil(x.detach().cpu().numpy(), y_pred)
#print('sil score', sil_score)
return sil_score
The architecture of the second neural net and an alternative to avoid (logits=logits.mean(0)):
def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity):
# Build a feedforward neural network. outputs are the logits
layers = []
for j in range(len(sizes)-1):
act = activation if j < len(sizes)-2 else output_activation
layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
return nn.Sequential(*layers)
class mlp2(torch.nn.Module):
def __init__(self):
super(mlp2, self).__init__()
self.linear1 = nn.Linear(10,100)
self.relu1 = nn.ReLU(inplace=True)
self.linear2 = torch.nn.Linear(100,100)
self.linear3 = torch.nn.Linear(100,20)
self.linear4 = torch.nn.Linear(2000,100)
self.ident = nn.Identity()
def forward(self, x):
a = self.linear1(x)
a = self.relu1(a)
a = self.linear2(a)
a = self.relu1(a)
a = self.linear3(a)
a = torch.flatten(a)
a = self.linear4(a)
a = self.relu1(a)
a = self.linear3(a)
out = self.ident(a)
return out
Loss is calculated as in the following order:
def get_policy(obs):
logits = logits_net(obs)
return Categorical(logits=logits.mean(0))
def get_action(obs):
return get_policy(obs).sample().item()
def Logp(obs, act):
logp = get_policy(obs).log_prob(act.cuda())
return logp
def compute_loss(logp, weights):
return -(logp * weights).mean()
def train_one_epoch():
# make some empty lists for logging.
batch_obs = [] # for observations
batch_acts = [] # for actions
batch_weights = [] # for R(tau) weighting in policy gradient
batch_logp = []
# reset episode-specific variables
j = 1 # signal from environment that episode is over
ep_rews = [] # list for rewards accrued throughout ep
for i, data in enumerate(train_loader):
#Create the mean image out of those 100 images
x, label = data
x = model(x.cuda())#torch.Size([100, 10])
obs = x.data.cpu().numpy()#[100, 10] - a trajectory with only one state
# Save obs
batch_obs.append(obs.copy())
#act in the environment
#act = get_action(torch.as_tensor(obs, dtype=torch.float32))
act = get_action(x)
print('action type', type(act))
#log probability
#logp = Logp(torch.as_tensor(obs, dtype=torch.float32),act = torch.as_tensor(act, dtype=torch.int32))
logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32))
#rew = reward(obs, act+2)
rew = reward(x, act+2)
# save action, reward
batch_acts.append(act)
batch_weights.append(rew)#episode rewards
batch_logp.append(logp)
opt.zero_grad()
batch_logp = torch.stack(batch_logp, dim=0)
batch_loss = compute_loss(logp = torch.as_tensor(batch_logp, dtype=torch.float32),
weights = torch.as_tensor(batch_weights, dtype=torch.float32))
batch_loss.backward() #does it return anything? gradients? print them!
opt.step()
for name, param in logits_net.named_parameters():
print(name, param.grad)
I applied some changes with the assumption that maybe recreating some of the tensors maybe the issue:
I have the output of the first network, obs, converted like obs = x.data.cpu().numpy() this and then sent to get_action function: act = get_action(torch.as_tensor(obs, dtype=torch.float32)). I changes this to act = get_action(x) so, x is sent directly to this function. Also, change arguments of logp to logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32)).
After these changes, I still get the none value for the gradient. Is there anyway possible to backpropagate the gradient when loss is calculated this way? any changes that I can apply?
any help is appreciated.

RuntimeError: Error(s) in loading state_dict for Actor - torch.load()

I have created a custom environment in open ai gym and i am facing error while loading the weights Could some one help me to resolve the issue . I am training a TD3 network in a custom environment and i have trained successfully but while inferencing i am facing this issue
class Actor(nn.Module):
def __init__(self, state_dim, action_dim, max_action):
super(Actor, self).__init__()
self.layer_1 = nn.Linear(state_dim, 400)
self.layer_2 = nn.Linear(400, 300)
self.layer_3 = nn.Linear(300, action_dim)
self.max_action = max_action
def forward(self, x):
x = F.relu(self.layer_1(x))
x = F.relu(self.layer_2(x))
x = self.max_action * torch.tanh(self.layer_3(x))
return x
class Critic(nn.Module):
def __init__(self, state_dim, action_dim):
super(Critic, self).__init__()
# Defining the first Critic neural network
self.layer_1 = nn.Linear(state_dim + action_dim, 400)
self.layer_2 = nn.Linear(400, 300)
self.layer_3 = nn.Linear(300, 1)
# Defining the second Critic neural network
self.layer_4 = nn.Linear(state_dim + action_dim, 400)
self.layer_5 = nn.Linear(400, 300)
self.layer_6 = nn.Linear(300, 1)
def forward(self, x, u):
xu = torch.cat([x, u], 1)
# Forward-Propagation on the first Critic Neural Network
x1 = F.relu(self.layer_1(xu))
x1 = F.relu(self.layer_2(x1))
x1 = self.layer_3(x1)
# Forward-Propagation on the second Critic Neural Network
x2 = F.relu(self.layer_4(xu))
x2 = F.relu(self.layer_5(x2))
x2 = self.layer_6(x2)
return x1, x2
def Q1(self, x, u):
xu = torch.cat([x, u], 1)
x1 = F.relu(self.layer_1(xu))
x1 = F.relu(self.layer_2(x1))
x1 = self.layer_3(x1)
return x1
# Selecting the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Building the whole Training Process into a class
class TD3(object):
def __init__(self, state_dim, action_dim, max_action):
self.actor = Actor(state_dim, action_dim, max_action).to(device)
self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
self.actor_target.load_state_dict(self.actor.state_dict())
self.actor_optimizer = torch.optim.Adam(self.actor.parameters())
self.critic = Critic(state_dim, action_dim).to(device)
self.critic_target = Critic(state_dim, action_dim).to(device)
self.critic_target.load_state_dict(self.critic.state_dict())
self.critic_optimizer = torch.optim.Adam(self.critic.parameters())
self.max_action = max_action
def select_action(self, state):
state = torch.Tensor(state.reshape(1, -1)).to(device)
return self.actor(state).cpu().data.numpy().flatten()
def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2):
for it in range(iterations):
# Step 4: We sample a batch of transitions (s, s’, a, r) from the memory
batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = replay_buffer.sample(batch_size)
state = torch.Tensor(batch_states).to(device)
next_state = torch.Tensor(batch_next_states).to(device)
action = torch.Tensor(batch_actions).to(device)
reward = torch.Tensor(batch_rewards).to(device)
done = torch.Tensor(batch_dones).to(device)
# Step 5: From the next state s’, the Actor target plays the next action a’
next_action = self.actor_target(next_state)
# Step 6: We add Gaussian noise to this next action a’ and we clamp it in a range of values supported by the environment
noise = torch.Tensor(batch_actions).data.normal_(0, policy_noise).to(device)
noise = noise.clamp(-noise_clip, noise_clip)
next_action = (next_action + noise).clamp(-self.max_action, self.max_action)
# Step 7: The two Critic targets take each the couple (s’, a’) as input and return two Q-values Qt1(s’,a’) and Qt2(s’,a’) as outputs
target_Q1, target_Q2 = self.critic_target(next_state, next_action)
# Step 8: We keep the minimum of these two Q-values: min(Qt1, Qt2)
target_Q = torch.min(target_Q1, target_Q2)
# Step 9: We get the final target of the two Critic models, which is: Qt = r + γ * min(Qt1, Qt2), where γ is the discount factor
target_Q = reward + ((1 - done) * discount * target_Q).detach()
# Step 10: The two Critic models take each the couple (s, a) as input and return two Q-values Q1(s,a) and Q2(s,a) as outputs
current_Q1, current_Q2 = self.critic(state, action)
# Step 11: We compute the loss coming from the two Critic models: Critic Loss = MSE_Loss(Q1(s,a), Qt) + MSE_Loss(Q2(s,a), Qt)
critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)
# Step 12: We backpropagate this Critic loss and update the parameters of the two Critic models with a SGD optimizer
self.critic_optimizer.zero_grad()
critic_loss.backward()
self.critic_optimizer.step()
# Step 13: Once every two iterations, we update our Actor model by performing gradient ascent on the output of the first Critic model
if it % policy_freq == 0:
actor_loss = -self.critic.Q1(state, self.actor(state)).mean()
self.actor_optimizer.zero_grad()
actor_loss.backward()
self.actor_optimizer.step()
# Step 14: Still once every two iterations, we update the weights of the Actor target by polyak averaging
for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
# Step 15: Still once every two iterations, we update the weights of the Critic target by polyak averaging
for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
# Making a save method to save a trained model
def save(self, filename, directory):
torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))
torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))
# Making a load method to load a pre-trained model
def load(self, filename, directory):
self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))
self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename)))
def evaluate_policy(policy, eval_episodes=10):
avg_reward = 0.
for _ in range(eval_episodes):
obs = env.reset()
done = False
while not done:
action = policy.select_action(np.array(obs))
obs, reward, done, _ = env.step(action)
avg_reward += reward
avg_reward /= eval_episodes
print ("---------------------------------------")
print ("Average Reward over the Evaluation Step: %f" % (avg_reward))
print ("---------------------------------------")
return avg_reward
env_name = "Pygame-v0"
seed = 0
file_name = "%s_%s_%s" % ("TD3", env_name, str(seed))
print ("---------------------------------------")
print ("Settings: %s" % (file_name))
print ("---------------------------------------")
eval_episodes = 10
save_env_vid = True
env = gym.make(env_name)
max_episode_steps = env._max_episode_steps
if save_env_vid:
env = wrappers.Monitor(env, monitor_dir, force = True)
env.reset()
env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])
policy = TD3(state_dim, action_dim, max_action)
#policy.load(file_name, './pytorch_models/')
policy.load(file_name,"/content/gdrive/My Drive/reinforce/gym_game/pytorch_models")
_ = evaluate_policy(policy, eval_episodes=eval_episodes)
Traceback:
I am facing a runtime error while loading the state_dict for actor model .I searched google but couldnt find similar issues .
RuntimeError: Error(s) in loading state_dict for Actor:
Missing key(s) in state_dict: "layer_1.weight", "layer_1.bias", "layer_2.weight", "layer_2.bias", "layer_3.weight", "layer_3.bias".
Unexpected key(s) in state_dict: "encoder.0.weight", "encoder.0.bias", "encoder.2.weight", "encoder.2.bias", "encoder.2.running_mean", "encoder.2.running_var", "encoder.2.num_batches_tracked", "encoder.3.weight", "encoder.3.bias", "encoder.5.weight", "encoder.5.bias", "encoder.5.running_mean", "encoder.5.running_var", "encoder.5.num_batches_tracked", "encoder.6.weight", "encoder.6.bias", "encoder.8.weight", "encoder.8.bias", "encoder.8.running_mean", "encoder.8.running_var", "encoder.8.num_batches_tracked", "encoder.10.weight", "encoder.10.bias", "encoder.12.weight", "encoder.12.bias", "encoder.12.running_mean", "encoder.12.running_var", "encoder.12.num_batches_tracked", "encoder.13.weight", "encoder.13.bias", "encoder.15.weight", "encoder.15.bias", "encoder.15.running_mean", "encoder.15.running_var", "encoder.15.num_batches_tracked", "encoder.16.weight", "encoder.16.bias", "linear.0.weight", "linear.0.bias", "linear.2.weight", "linear.2.bias".
it was answered by #MicaelJungo
The weights you saved were not from the model you are using here. Make sure to load the correct checkpoint, which was created when training this particular model.

Inference time varies over different GPUs using Torch

I get a bug when running the below inference code. In the function recognize(), it takes 0.4s to finish prediction. It takes another 3s to return the result preds_str to the caller function. I found that if I set gpu_id=0 in file config, it returns instantly. How can I fix this bug? Thanks in advance.
def recognize(imgs, model, demo_loader):
t = time()
model.eval()
with torch.no_grad():
for image_tensors, image_path_list in demo_loader:
batch_size = image_tensors.size(0)
image = image_tensors.to(config.device)
# For max length prediction
length_for_pred = torch.IntTensor([config.batch_max_length] * batch_size).to(config.device)
text_for_pred = torch.LongTensor(batch_size, config.batch_max_length + 1).fill_(0).to(config.device)
preds = model(image, text_for_pred, is_train=False)
_, preds_index = preds.max(2)
preds_str = converter.decode(preds_index, length_for_pred)
print('time elapsed before return:'time()-t) #0.4s
return preds_str
def main():
model = Model()
self.model.cuda(config.device)
model = torch.nn.DataParallel(model, device_ids=[config.device], output_device=[config.device]).to(config.device)
model.load_state_dict(torch.load(config.saved_model, map_location=config.device))
AlignCollate_demo = AlignCollate(imgH=config.imgH, imgW=config.imgW, keep_ratio_with_pad=config.PAD)
imgs_dataset = ImageDataset(imgs)
demo_loader = torch.utils.data.DataLoader(imgs_dataset, batch_size=config.batch_size,shuffle=False,num_workers=int(config.workers),collate_fn=AlignCollate_demo, pin_memory=True)
start_time = time()
# imgs = [img1, img2, ....]
preds_str = recognize(imgs, model, demo_loader)
print('time elapsed after return', time()-start_time) #3.4s
Config file:
class ConfigWordRecognizer:
gpu_id = 1 #troublesome line here
device = torch.device('cuda:{}'.format(gpu_id) if torch.cuda.is_available() else 'cpu')
imgH = 32
imgW = 100
batch_size = 80
workers = 8
batch_max_length = 25
I found the solution from this post.
I set CUDA_VISIBLE_DEVICES=1, gpu_id=0. Then, I remove
model = torch.nn.DataParallel(model, device_ids=[config.device], output_device=[config.device]).to(config.device)
and change
model.load_state_dict(torch.load(config.saved_model, map_location=config.device))
to
model.load_state_dict(self.copyStateDict(torch.load(self.config.saved_model, map_location=self.config.device)))
Copy stateDict function:
def copyStateDict(self, state_dict):
if list(state_dict.keys())[0].startswith("module"):
start_idx = 1
else:
start_idx = 0
new_state_dict = OrderedDict()
for k, v in state_dict.items():
name = ".".join(k.split(".")[start_idx:])
new_state_dict[name] = v
return new_state_dict
The model works well on gpu1. But I still don't understand why if I set 'gpu_id=0', it works well on gpu0 without copyStateDict

Keras: merge two models with different inputs and use fit_generator to train merged model

I want to merge two model with different models, and use fit_generator to train the merged model. And the generator is by myself.
This is one of the generators.
def image_generator(self, batch_size, train_test, data_type, concat=False):
train, test = self.split_train_test()
data = train if train_test == 'train' else test
print("Creating %s generator with %d samples." % (train_test, len(data)))
print ("image_generator")
while 1:
X, y = [], []
# Generate batch_size samples.
for _ in range(batch_size):
# Reset to be safe.
sequence = None
# Get a random sample.
sample = random.choice(data)
# Check to see if we've already saved this sequence.
if data_type is "images":
# Get and resample frames.
frames = self.get_frames_for_sample(sample)
frames = self.rescale_list(frames, self.seq_length)
# Build the image sequence
sequence = self.build_image_sequence(frames)
else:
# Get the sequence from disk.
sequence = self.get_image_sequence(data_type, sample, train_test)
if sequence is None:
print("Can't find sequence. Did you generate them?")
sys.exit() # TODO this should raise
if concat:
# We want to pass the sequence back as a single array. This
# is used to pass into an MLP rather than an RNN.
sequence = np.concatenate(sequence).ravel()
X.append(sequence)
y.append(self.get_class_one_hot(sample[1]))
yield np.array(X), np.array(y)
This is get_image_sequences:
def get_image_sequence(self, data_type, sample, train_test):
"""get the images shaped with array."""
# train,ApplyEyeMakeup,v_ApplyEyeMakeup_g10_c02,99
num = random.randint(1, int(sample[3]))
path = glob.glob('./data/' + train_test + '/' + sample[1] + '/' + sample[2] + '-' + '*' + num + '.jpg')
if os.path.isfile(path):
img = Image.open(path)
if img.size != target_size:
img = img.resize(target_size)
img = img_to_array(img)
img = np.expand_dims(img, axis=0)
img /= 255
return img
else:
print ("path is error" + path)
return None
Now, merge and fit it:
modeltmp = merge([model1.output, model2.output], mode='concat', concat_axis=1)
modeltmp = BatchNormalization()(modeltmp)
modeltmp = Dense(1024, activation='relu')(modeltmp)
modeltmp = Dense(len(classes), activation='softmax')(modeltmp)
model = Model(input=[model1.input, model2.input], outputs=modeltmp)
# model1 --- generator
train_gen_1 = data.image_generator(batch_size, 'train', cnn_lstm_datatype, concat)
test_gen_1 = data.image_generator(batch_size, 'test', cnn_lstm_datatype, concat)
# model2 ---- generator
train_gen_2 = data.frame_generator(batch_size=batch_size, train_test='train', data_type=cnn_lstm_datatype, concat=concat)
test_gen_2 = data.frame_generator(batch_size=batch_size, train_test='test', data_type=cnn_lstm_datatype, concat=concat)
model.fit_generator([train_gen_1, train_gen_2],
verbose=1,
steps_per_epoch=batch_size,
validation_steps=10,
epochs=10000,
callbacks=[checkpointer, tb, early_stopper, csv_logger],
validation_data=[test_gen_1, test_gen_2]
)
However, I get the error:
TypeError: Error when checking model input: data should be a Numpy array, or list/dict of Numpy arrays. Found: generator object image_generator at 0x12205df00 ...
How can I solve it?Thanks!

Resources