reading textfile returning empty variable in tensorflow - text

I have a text file which has 110 rows and 1024 columns of float values. I am trying to load the textfile and it doesnt read any thing.
filename = '300_faults.txt'
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TextLineReader()
_,a = reader.read(filename_queue)
#x = np.loadtxt('300_faults.txt') # working
#a = tf.constant(x,tf.float32) # working
model = tf.initialize_all_variables()
with tf.Session() as session:
session.run(model)
print(session.run(tf.shape(a)))
printing the shape of the variable returns [].

Firstly - tf.shape(a) == [] doesn't mean that variable is empty. All scalars and strings have shape [].
https://www.tensorflow.org/programmers_guide/dims_types
May be you can check "rank" instead - it would be 0 for scalars and strings.
Other than that it looks like string_input_producer is a queue and it needs additional wiring to make ti work.
Please try this
filename = '300_faults.txt'
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TextLineReader()
_,a = reader.read(filename_queue)
#x = np.loadtxt('300_faults.txt') # working
#a = tf.constant(x,tf.float32) # working
model = tf.initialize_all_variables()
with tf.Session() as session:
session.run(model)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print(session.run(tf.shape(a)))
print(session.run((a)))
coord.request_stop()
coord.join(threads)

Related

Why are albumentations Augmentations (Yolo / YoloV5) altering Bounding Boxes if no augmentations are being placed?

I was using the Albumentations library in order to perform some data augmentations on an object detection dataset that I intended to train a YoloV5 model on.
I have to perform the augmentations seperately and save the images locally to disk, but when I do I noticed that some of the output bounding boxes returned aren't generating properly.
I have my augmentations set up in a seperate aug.py file, shown below (augmentations purposefully removed in debugging attempts, see below) -
import albumentations as A
import cv2
PROB = 0.5
bbp = A.BboxParams(format="yolo")
horizontal_flip_transform = A.Compose([
], bbox_params = bbp)
vertical_flip_transform = A.Compose([
], bbp)
pixel_dropout_transform = A.Compose([
], bbox_params = bbp)
random_rotate = A.Compose([
], bbox_params = bbp )
#NOTE: THIS METHOD IMPLIES THAT THE IMAGE WIDTHS MUST BE AT LEAST 50 PIXELS
#Remove this aug to remove this constraint
random_crop = A.Compose([
], bbox_params = bbp)
augs = [horizontal_flip_transform, vertical_flip_transform, pixel_dropout_transform, random_rotate, random_crop]
def get_augmentations():
return augs
And the relevant parts of my implementation for performing the augmentations and saving them to disk is below:
def run_augments_on_image(img_name, bboxes, max_images_to_generate = 500):
ret = []
img = np.array(Image.open(img_name), dtype=np.uint8)
transforms = get_augmentations()
for i in range(min(len(transforms), max_images_to_generate)):
transformed = transforms[i](image=img, bboxes = bboxes)
ret.append((transformed["image"] , transformed["bboxes"]))
return ret
def run_and_save_augments_on_image_sets(batch_img_names, bboxes_urls, max_images_to_generate, dataset_dir, trainval):
num_images = 0
for i in range(len(batch_img_names)):
bboxes = []
with open(os.path.join(dataset_dir, trainval, 'labels', bboxes_urls[i]), 'r') as f:
for row in f:
x = row.strip().split(' ')
x.append(row[0])
x.pop(0)
x[0] = float(x[0])
x[1] = float(x[1])
x[2] = float(x[2])
x[3] = float(x[3])
bboxes.append(x)
trans = run_augments_on_image(os.path.join(dataset_dir, trainval, 'images', batch_img_names[i]), bboxes)
img_index = len(os.listdir(os.path.join(dataset_dir, 'train' , 'images'))) + len(os.listdir(os.path.join(dataset_dir, 'valid', 'images'))) + 1
for j in range(len(trans)):
img_trans, bboxes_trans = trans[j]
p = Image.fromarray(img_trans).save(os.path.join(dataset_dir, trainval, 'images' , f'image-{img_index}.{batch_img_names[j].split(".")[-1]}'))
with open(os.path.join(dataset_dir, trainval, 'labels', f'image-{img_index}.txt'), 'w') as f:
for boxs in bboxes_trans:
print(f'{boxs[-1]} {boxs[0]} {boxs[1]} {boxs[2]} {boxs[3]}', file=f)
num_images += 1
img_index += 1
if num_images >= max_images_to_generate:
break
if num_images >= max_images_to_generate:
break
For testing purposes (some of the bounding boxes were off), I removed all the actual augmentations, expecting the input image label (one augmented image example shown below) to be equal to augmented label since there were no augmentations. But, as you can see, the two labels are different.
img-original.txt
0 0.5662285714285714 0.2740066225165563 0.5297714285714286 0.4837913907284769
img-augmented.txt
0 0.51488 0.47173333333333334 0.6405099999999999 0.6527333333333334
(The labels above are in normalized xywh YOLO format)
Why is albumentations altering the labels? None of the augmentations in augs.py contain anything.

RuntimeError: Trying to backward through the graph a second time. Saved intermediate values of the graph are freed when you call .backward()

I am trying to train SRGAN from scratch. I have read solutions for this type of problem, but it would be great if someone could help me debug my code. The exact error is: "RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad()" Here is the snippet I am trying to train:
gen_model = Generator().to(device, non_blocking=True)
disc_model = Discriminator().to(device, non_blocking=True)
opt_gen = optim.Adam(gen_model.parameters(), lr=0.01)
opt_disc = optim.Adam(disc_model.parameters(), lr=0.01)
from torch.nn.modules.loss import BCELoss
def train_model(gen, disc):
for epoch in range(20):
run_loss_disc = 0
run_loss_gen = 0
for data in train:
low_res, high_res = data[0].to(device, non_blocking=True, dtype=torch.float).permute(0, 3, 1, 2),data[1].to(device, non_blocking=True, dtype=torch.float).permute(0, 3, 1, 2)
#--------Discriminator-----------------
gen_image = gen(low_res)
gen_image = gen_image.detach()
disc_gen = disc(gen_image)
disc_real = disc(high_res)
p=nn.BCEWithLogitsLoss()
loss_gen = p(disc_real, torch.ones_like(disc_real))
loss_real = p(disc_gen, torch.zeros_like(disc_gen))
loss_disc = loss_gen + loss_real
opt_disc.zero_grad()
loss_disc.backward()
run_loss_disc+=loss_disc
#---------Generator--------------------
cont_loss = vgg_loss(high_res, gen_image)
adv_loss = 1e-3*p(disc_gen, torch.ones_like(disc_gen))
gen_loss = cont_loss+(10^-3)*adv_loss
opt_gen.zero_grad()
gen_loss.backward()
opt_disc.step()
opt_gen.step()
run_loss_gen+=gen_loss
print("Run Loss Discriminator: %d", run_loss_disc)
print("Run Loss Generator: %d", run_loss_gen)
train_model(gen_model, disc_model)
Apparently your disc_gen value was discarded by the first backward() call, as it says.
It should work if you change the discriminator part a bit:
gen_image = gen(low_res)
disc_gen = disc(gen_image.detach())
and add this at the start of the generator part:
disc_gen = disc(gen_image)

getting TypeError: Expected int32, got None of type 'NoneType' instead

I have implemented sequence to sequence model with attention layer if I 300000 data points I'm not getting any error if I use all of my data points I'm getting following error model.fit
TypeError: Expected int32, got None of type 'NoneType' instead.
what would be the reason for this?
the code before model.fit is
class encoder_decoder(tf.keras.Model):
def __init__(self,embedding_size,encoder_inputs_length,output_length,vocab_size,output_vocab_size,score_fun,units):
super(encoder_decoder,self).__init__()
self.vocab_size = vocab_size
self.enc_units = units
self.embedding_size = embedding_size
self.encoder_inputs_length = encoder_inputs_length
self.output_length = output_length
self.lstm_output = 0
self.state_h = 0
self.state_c = 0
self.output_vocab_size = output_vocab_size
self.dec_units = units
self.score_fun = score_fun
self.att_units = units
self.encoder=Encoder(self.vocab_size,self.embedding_size,self.enc_units,self.encoder_inputs_length)
self.decoder = Decoder(self.output_vocab_size, self.embedding_size, self.output_length, self.dec_units ,self.score_fun ,self.att_units)
# self.dense = Dense(self.output_vocab_size,activation = "softmax")
def call(self,data):
input,output = data[0],data[1]
encoder_hidden = self.encoder.initialize_states(input.shape[0])
encoder_output,encoder_hidden,encoder_cell = self.encoder(input,encoder_hidden)
decoder_hidden = encoder_hidden
decoder_cell =encoder_cell
decoder_output = self.decoder(output,encoder_output,decoder_hidden,decoder_cell)
return decoder_output
Inside the call function I'm initializing states for the encoder where I'm getting
the number of rows from input using the following line of code
encoder_hidden = self.encoder.initialize_states(input.shape[0])
If I print input, I'm getting shape as (None,55)
That's the reason I'm getting this error.
Here my total number data points is 330614 when I use all my data I getting this
error, when I use only 330000 data points I'm getting this error,
if I print batch inside def method I'm getting shape as (64,55)
Please find my below code for creating dataset for my sequence to sequence model
the function to reprocess the data and the function to create the dataset
and a function the load the dataset
def preprocess_sentence(w):
# w = unicode_to_ascii(w.lower().strip())
w = re.sub(r"([?.!,¿])", r" \1 ", w)
w = re.sub(r'[" "]+', " ", w)
w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
w = w.strip()
w = '<start> ' + w + ' <end>'
return w
def create_dataset(path, num_examples):
lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
# lines1 = lines[330000:]
# lines = lines[0:323386]+lines1
word_pairs = [[preprocess_sentence(w) for w in l.split('\t')] for l in lines[:num_examples]]
word_pairs = [[i[0],i[1]] for i in word_pairs]
return zip(*word_pairs)
def tokenize(lang):
lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
filters='')
lang_tokenizer.fit_on_texts(lang)
tensor = lang_tokenizer.texts_to_sequences(lang)
tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,padding='post')
return tensor, lang_tokenizer
def load_dataset(path, num_examples=None):
# creating cleaned input, output pairs
targ_lang, inp_lang = create_dataset(path, num_examples)
input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
target_tensor, targ_lang_tokenizer = tokenize(targ_lang)
return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer,targ_lang,inp_lang
# Try experimenting with the size of that dataset
num_examples = None
input_tensor, target_tensor, inp_lang, targ_lang,targ_lang_text,inp_lang_text = load_dataset(path, num_examples)
# Calculate max_length of the target tensors
max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]
max_length_targ,max_length_inp
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)
the shape of datasets as follows
shape of input train (269291, 55)
shape of target train (269291, 53)
shape of input test (67323, 55)
shape of target test (67323, 53)
You can share the code block before the model.fit.
NoneType error is indicating that the final array which is passed to the model is for some reason empty. You can add print statements at previous steps to understand where along the way your array became empty.
Compare the scenario to the case where you are taking all your data points so that you can understand where the array is changing and how it is handled prior to passing it through model.fit.

Python 3 Multiprocessing and openCV problem with dictionary sharing between processor

I would like to use multiprocessing to compute the SIFT extraction and SIFT matching for object detection.
For now, I have a problem with the return value of the function that does not insert data in the dictionary.
I'm using Manager class and image that are open inside the function. But does not work.
Finally, my idea is:
Computer the keypoint for every reference image, use this keypoint as a parameter of a second function that compares and match with the keypoint and descriptors of the test image.
My code is:
# %% Import Section
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os
from datetime import datetime
from multiprocessing import Process, cpu_count, Manager, Lock
import argparse
# %% path section
tests_path = 'TestImages/'
references_path = 'ReferenceImages2/'
result_path = 'ResultParametrizer/'
#%% Number of processor
cpus = cpu_count()
# %% parameter section
eps = 1e-7
useTwo = False # using the m and n keypoint better with False
# good point parameters
distanca_coefficient = 0.75
# gms parameter
gms_thresholdFactor = 3
gms_withRotation = True
gms_withScale = True
# flann parameter
flann_trees = 5
flann_checks = 50
#%% Locker
lock = Lock()
# %% function definition
def keypointToDictionaries(keypoint):
x, y = keypoint.pt
pt = float(x), float(y)
angle = float(keypoint.angle) if keypoint.angle is not None else None
size = float(keypoint.size) if keypoint.size is not None else None
response = float(keypoint.response) if keypoint.response is not None else None
class_id = int(keypoint.class_id) if keypoint.class_id is not None else None
octave = int(keypoint.octave) if keypoint.octave is not None else None
return {
'point': pt,
'angle': angle,
'size': size,
'response': response,
'class_id': class_id,
'octave': octave
}
def dictionariesToKeypoint(dictionary):
kp = cv2.KeyPoint()
kp.pt = dictionary['pt']
kp.angle = dictionary['angle']
kp.size = dictionary['size']
kp.response = dictionary['response']
kp.octave = dictionary['octave']
kp.class_id = dictionary['class_id']
return kp
def rootSIFT(dictionary, image_name, image_path,eps=eps):
# SIFT init
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
sift = cv2.xfeatures2d.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(image, None)
descriptors /= (descriptors.sum(axis=1, keepdims=True) + eps)
descriptors = np.sqrt(descriptors)
print('Finito di calcolare, PID: ', os.getpid())
lock.acquire()
dictionary[image_name]['keypoints'] = keypoints
dictionary[image_name]['descriptors'] = descriptors
lock.release()
def featureMatching(reference_image, reference_descriptors, reference_keypoints, test_image, test_descriptors,
test_keypoints, flann_trees=flann_trees, flann_checks=flann_checks):
# FLANN parameter
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=flann_trees)
search_params = dict(checks=flann_checks) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params, search_params)
flann_matches = flann.knnMatch(reference_descriptors, test_descriptors, k=2)
matches_copy = []
for i, (m, n) in enumerate(flann_matches):
if m.distance < distanca_coefficient * n.distance:
matches_copy.append(m)
gsm_matches = cv2.xfeatures2d.matchGMS(reference_image.shape, test_image.shape, keypoints1=reference_keypoints,
keypoints2=test_keypoints, matches1to2=matches_copy,
withRotation=gms_withRotation, withScale=gms_withScale,
thresholdFactor=gms_thresholdFactor)
#%% Starting reference list file creation
reference_init = datetime.now()
print('Start reference file list creation')
reference_image_process_list = []
manager = Manager()
reference_image_dictionary = manager.dict()
reference_image_list = manager.list()
for root, directories, files in os.walk(references_path):
for file in files:
if file.endswith('.DS_Store'):
continue
reference_image_path = os.path.join(root, file)
reference_name = file.split('.')[0]
image = cv2.imread(reference_image_path, cv2.IMREAD_GRAYSCALE)
reference_image_dictionary[reference_name] = {
'image': image,
'keypoints': None,
'descriptors': None
}
proc = Process(target=rootSIFT, args=(reference_image_list, reference_name, reference_image_path))
reference_image_process_list.append(proc)
proc.start()
for proc in reference_image_process_list:
proc.join()
reference_end = datetime.now()
reference_time = reference_end - reference_init
print('End reference file list creation, time required: ', reference_time)
I faced pretty much the same error. It seems that the code hangs at detectAndCompute in my case, not when creating the dictionary. For some reason, sift feature extraction is not multi-processing safe (to my understanding, it is the case in Macs but I am not totally sure.)
I found this in a github thread. Many people say it works but I couldn't get it worked. (Edit: I tried this later which works fine)
Instead I used multithreading which is pretty much the same code and works perfectly. Of course you need to take multithreading vs multiprocessing into account

AttributeError: 'DType' object has no attribute 'type' Tensorflow Serving

I am trying to use a function (from another module) inside tensorflow. The function accepts a numpy array and returns the changepoints. My main goal is to deploy this model on tensorflow serving. I am running into error
AttributeError: 'DType' object has no attribute 'type'
There are 2 functions, one is create_data() that creates a numpy array and returns it, another is change() which accepts numpy array and uses the before mentioned function to return changepoints. I have created a placeholder to accept input data, an operation to execute the function. Problem is, if i try to send data through placeholder, i run into error. If i send the data directly into the function, it runs. Following is my code.
def create_data():
np.random.seed(0)
size = 100
mean_a = 0.0
mean_b = 10.0
mean_c = 0
var = 0.1
data_a = np.random.normal(mean_a, var, size)
data_b = np.random.normal(mean_b, var, size)
data_c = np.random.normal(mean_c, var, size)
data = np.concatenate([data_a, data_b, data_c])
return data
def change(data):
# what else i tried
# data = np.array(data, dtype=np.float)
# above line gives another error mentioned after code
cpts = (pelt(normal_mean(x, np.var(x)), len(x)))
return cpts
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[300, ], name="myInput")
y = tf.convert_to_tensor(change(x),np.float32,name="myOutput")
z = sess.run(y,feed_dict={x:create_data()})
If i try the code data = np.array(data, dtype=np.float) in the function change(), it gives me error
ValueError: setting an array element with a sequence.
I also tried data = np.hstack((data)).astype(np.float) and data = np.vstack((data)).astype(np.float) but it runs into a separate error that says use tf.map_fn. I also tried to use tf.eval() to convert the numbers but i couldn't get them to run inside a function with placeholders.
But if i send in the output directly,
y = tf.convert_to_tensor(change(create_data()),np.float32,name="myOutput")
It works.
How should i send in the input to make it work?
EDIT: The function in question is this if anyone wants to know.
This error is raised when you try to pass a Tensor into a numpy function
You need to use tf.py_func to include python function into tensorflow graph
(also, your change() functin uses data as argument instead of x)
Here is the code that worked for me
import numpy as np
import tensorflow as tf
from changepy import pelt
from changepy.costs import normal_mean
def create_data():
np.random.seed(0)
size = 100
mean_a = 0.0
mean_b = 10.0
mean_c = 0
var = 0.1
data_a = np.random.normal(mean_a, var, size)
data_b = np.random.normal(mean_b, var, size)
data_c = np.random.normal(mean_c, var, size)
data = np.concatenate([data_a, data_b, data_c])
return data
def change(x):
# what else i tried
# data = np.array(data, dtype=np.float)
# above line gives another error mentioned after code
cpts = (pelt(normal_mean(x, np.var(x)), len(x)))
return cpts
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[300, ], name="myInput")
y = tf.convert_to_tensor(tf.compat.v1.py_func(change, [x], 3*[tf.int64]),np.float32,name="myOutput")
z = sess.run(y,feed_dict={x:create_data()})
print(z)

Resources