I have a Set of Rotation Matrices Rs:
Rs.shape = [62x3x3]
And a Set of Translation Components Js:
Js.shape = [62x3]
I have been trying to find an efficient way to combine them into a [62x4x4] matrix which is 62 homogenous transform matrices. Currently I am doing it with a stupid for loop:
def make_A(R, t):
R_homo = torch.cat([R, torch.zeros(1, 3).cuda()], dim = 0)
t_homo = torch.cat([t.view(3,1), torch.ones(1, 1).cuda()], dim = 0)
return torch.cat([R_homo, t_homo], dim=1)
transforms = self.NUM_JOINTS*[None]
for idj in range(0, self.NUM_JOINTS):
transforms[idj] = make_A(Rs[idj, :], Js[idj,:])
FinalMatrix = torch.stack(transforms, dim=0)
This is highly inefficient, and takes almost 10ms to form. How can I tensorize this?
Not sure if it helps efficiency, but this should vectorize your code:
def make_A(Rs, Js):
R_homo = torch.cat((Rs, torch.zeros(Rs.shape[0], 1, 3)), dim=1)
t_homo = torch.cat((Js, torch.ones(Js.shape[0], 1)), dim=1)
return torch.cat((R_homo, t_homo.unsqueeze(2)), dim=2)
Related
I am trying to make an audio Siamese network while in the training loop I get a size mismatch in my tensors stack expects each tensor to be equal size, but got [1, 128, 121] at entry 0 and [1, 128, 205] at entry 1.
I am unsure where I messed up with my data since while gathering my data I made sure to pad all my audio clips to same size with background audio. So I have to implement a way to pad the audio clips some other way. I thought about padding clips to a static size bigger than all my clips in my custom dataloader but that still causes me to get the same error. Any ideas where I am messing up?
class OHDataset(data.Dataset):
def __init__(self, audio_dir, audio_dataset, transform = "mel_spectrogram"):
self.audio_labels = pd.read_csv(audio_dataset)
self.audio_dir = audio_dir
self.output_format = transform
def __len__(self):
return len(self.audio_labels)
def __getitem__(self, item, n_fft = 200, hop_length = 120):
positive = self.audio_labels.iloc[item, 0]
if(not bool(re.search(r'\d', positive))):
positive = self.audio_labels.iloc[item+1, 0]
anchor = re.sub(r'\d+', '', self.audio_labels.iloc[item, 0])
negative = self.audio_labels.iloc[random.randint(0, len(self.audio_labels)), 0]
pos_audio_path = os.path.join(self.audio_dir, positive + ".wav")
neg_audio_path = os.path.join(self.audio_dir, negative + ".wav")
anchor_audio_path = os.path.join(self.audio_dir, anchor + ".wav")
if(self.output_format == "spectrogram"):
pos_spectrogram = getSpectrogram(pos_audio_path, n_fft, hop_length)
neg_spectrogram = getSpectrogram(neg_audio_path, n_fft, hop_length)
anchor_spectrogram = getSpectrogram(anchor_audio_path, n_fft, hop_length)
return anchor_spectrogram, pos_spectrogram, neg_spectrogram
elif(self.output_format == "mel_spectrogram"):
pos_mel_spectrogram = getMELSpectrogram(pos_audio_path, n_fft, hop_length)
neg_mel_spectrogram = getMELSpectrogram(neg_audio_path, n_fft, hop_length)
anchor_mel_spectrogram = getMELSpectrogram(anchor_audio_path, n_fft, hop_length)
return anchor_mel_spectrogram, pos_mel_spectrogram, neg_mel_spectrogram
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (A, P, N) in enumerate(dataloader):
anchor = model(A).to(device)
positive = model(P).to(device)
negative = model(N).to(device)
loss = loss_fn(anchor, positive, negative)
optimizer.zero_grad()
loss.backward()
optimizer.step()
I have this model
alpha = np.empty(18, dtype=object)
beta = np.empty(18, dtype=object)
y_obs = np.empty((18, 18), dtype=object)
epsilon = np.empty((18, 18), dtype=object)
mu = np.empty((18, 18), dtype=object)
basic_model = pm.Model()
with basic_model:
y = pd.read_excel('Book1.xlsx', index_col=0)
_mu = pm.Normal('_mu', mu=0, sigma=50)
sigma = pm.InverseGamma('sigma', alpha=0, beta=0)
for i in range(0, 18):
alpha[i] = pm.Normal(f'alpha_{i}', mu=0, sigma=50)
beta[i] = pm.Normal(f'beta_{i}', mu=0, sigma=50)
for i in range(0, 18):
for j in range(0, 18 - i):
mu[i][j] = _mu + alpha[i] + beta[j]
epsilon[i][j] = pm.Normal(f'epsilon_{i}_{j}', sigma=sigma)
y_obs[i][j] = pm.StudentT(f'y_obs{i}_{j}', mu=mu[i][j]+epsilon[i][j], nu=2.52, observed=y.iloc[i, j])
# inference
trace = pm.sample(tune=2000)
Judging by the graph output, the model is as I expect it to be, but when run I receive this error at the final (sampling) step:
error: bracket nesting level exceeded maximum of 256
I also receive this advice but not sure how to implement it, nor am I sure it would help much:
note: use -fbracket-depth=N to increase maximum nesting level
Upon doing some reading, I am guessing the looping is causing some problems, but am unsure on an alternative approach. Is there a way to re-write this in such a way that would preserve the model but eliminate the error?
I need help creating a custom metric callback that Keras can track during training. I'm running:
Windows 10
Python 3.6
scikit-learn==0.23.2
pandas==0.25.3
numpy==1.18.5
tensorflow==2.3.0
keras==2.4.3
The formula I want to use looks like this:
step_1 = (True_Positives - False_Positives) / Sum_of_y_true
result = (step_1 -- 1)/(1 -- 1) # For scaling range of (-1, 1) to (0, 1)
I know Keras offers the TruePositives() and FalsePositives() classes, so I'd like to take advantage of that in a custom function that can be used as a callback, pseudo-code I imagine would look something like:
def custom_metric():
Get True_Positives
Get False_Positives
Get Sum_of_y_true
Perform the above formula
Return that result into a "tensor" friendly form that can be used for callback
Or maybe this could be a one-liner return, I don't know. I'm unclear about how to make a custom metric "Keras friendly", as it doesn't appear to like numpy arrays or just regular float numbers?
Thanks!
UPDATE
What I've attempted so far looks like this. Not sure if it's correct but would like to know if I'm on the right track:
def custom_metric(y_true, y_pred):
TP = np.logical_and(backend.eval(y_true) == 1, backend.eval(y_pred) == 1)
FP = np.logical_and(backend.eval(y_true) == 0, backend.eval(y_pred) == 1)
TP = backend.sum(backend.variable(TP))
FP = backend.sum(backend.variable(FP))
SUM_TRUES = backend.sum(backend.eval(y_true) == 1)
# Need help with this part?
result = (TP-FP)/SUM_TRUES
result = (result -- 1)/(1--1)
return result
Figured it out!
def custom_m(y_true, y_pred):
true_positives = backend.sum(backend.round(backend.clip(y_true * y_pred, 0, 1)))
predicted_positives = backend.sum(backend.round(backend.clip(y_pred, 0, 1)))
false_positives = predicted_positives - true_positives
possible_positives = backend.sum(backend.round(backend.clip(y_true, 0, 1)))
step_1 = (true_positives - false_positives) / possible_positives
result = (step_1 -- 1)/(1 -- 1)
return result
I am trying to implement Neural Style Transfer using Keras and trying to keep it as simple as possible. While trying to find gradient using backend.gradients() function of keras, it returns [None]. My code is as follows:
content_image = cv2.imread("C:/Users/Max/Desktop/IMG_20170331_103755.jpg")
content_image = cv2.resize(content_image, (512,512))
style_image = cv2.imread("C:/Users/Max/Desktop/starry.jpg")
style_image = cv2.resize(style_image, (512,512))
content_array = np.asarray(content_image, dtype=np.float32)
content_array = np.expand_dims(content_array, axis=0)
style_array = np.asarray(style_image, dtype=np.float32)
style_array = np.expand_dims(style_array, axis=0)
# Constants:
epochs = 1
height = 512
width = 512
num_channels = 3
step_size = 10
content_layer = ['block2_conv2']
style_layer = ['block1_conv2', 'block2_conv2', 'block3_conv3','block4_conv3', 'block5_conv3']
loss_total = backend.variable(0.0)
# VGG16 Model:
model = VGG16(input_shape = [height, width, num_channels],weights='imagenet', include_top=False)
# Defining losses:
def content_loss(Content, Mixed):
content_loss = backend.mean(backend.square(Mixed - Content))
return content_loss
def gram(layer):
flat = backend.reshape(layer, shape=[1, -1])
gram = backend.dot(flat, backend.transpose(flat))
return gram
def style_loss(Style, Mixed):
S_G = gram(Style)
M_G = gram(Mixed)
size = height*width
return backend.sum(backend.square(S_G - M_G)) / (4. * (num_channels ** 2) * (size ** 2))
'''
def denoise(Image):
loss = backend.mean(backend.abs(Image[:,1:,:,:] - Image[:,:-1,:,:]) + backend.abs(Image[:,:,1:,:] - Image[:,:,:-1,:]))
return loss
'''
# Backend Functions:
output_c = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(content_layer[0]).output])
output_s = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(layer).output for layer in style_layer])
content_output = output_c([content_array])
style_output = output_s([style_array])
# Randomly generated image:
Mixed = np.random.uniform(0, 255, [1, height, width, 3]) - 128
# Loop:
for i in range(epochs):
mixed_c = output_c([Mixed])
mixed_c = mixed_c[0]
loss_c = content_loss(content_output[0], mixed_c)
total = []
mixed_s = output_s([Mixed])
for i in range(len(style_layer)):
style = style_loss(style_output[i], mixed_s[i])
total.append(style)
loss_s = backend.sum(total)
#loss_d = denoise(Mixed)
loss_total = w_c * loss_c + w_s * loss_s #+ w_d * loss_d
gradient = backend.gradients(loss_total, Mixed)
gradient = np.squeeze(gradient)
step_size = step_size / (np.std(gradient) + 1e-8)
Mixed -= gradient * step_size
What changes should i make to get the gradients working properly. I am clueless as to what went wrong.
Thanks!
You're taking gradient of Mixed which is a numpy array and not a variable. You need to define a tensor which will then have value of Mixed.
From Keras documentation:
gradients
keras.backend.gradients(loss, variables)
Returns the gradients of variables w.r.t. loss.
Arguments
loss: Scalar tensor to minimize.
variables: List of variables.
Hi I am following the http://deeplearning.net/tutorial/code/convolutional_mlp.py code to implement a conv neural net. I have input images where the channel is important and hence I want to have 3 channel feature map as layer 0 input.
So I need something like this
layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels
instead of
layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images
which will be used here
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 240, 135),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)
where that x is provided to theano as
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
So - my question is - how should I create (shape) that train_set_x ?
With (gray scale intensity - i.e single channel) train_set_x is created as
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
where data_x is a flattened numpy array of length 784 (for 28*28 pixels)
Thanks a lot for advice
I was able to get it working. I am pasting some code here which might help some one. Not very elegant - but works.
def shuffle_in_unison(a, b):
#courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
def createDataSet(imagefolder):
os.chdir(imagefolder)
# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])
# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize
# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]
# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}
i_counter = 0
train_X = []
train_y = []
test_X = []
test_y = []
val_X = []
val_y = []
for item in os.listdir('.'):
if not os.path.isfile(os.path.join('.', item)):
continue
if item.endswith('.pkl'):
continue
print 'Processing item ' + item
item_y = item.split('_')[0]
item_x = cv2.imread(item)
height, width = item_x.shape[:2]
# this was my requirement - skip it if you do not need it
if(height != 135 or width != 240):
continue
# get 3 channels
b,g,r = cv2.split(item_x)
item_x = [b,g,r]
item_x = np.array(item_x)
item_x = item_x.reshape(3,135*240)
if i_counter in test_index_array:
test_X.append(item_x)
test_y.append(item_y)
elif i_counter in validate_index_array:
val_X.append(item_x)
val_y.append(item_y)
else:
train_X.append(item_x)
train_y.append(item_y)
i_counter = i_counter + 1
# fix the dimensions. Flatten out the channel and intensity dimensions
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])
train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)
# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)
# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`
Once you have this pickle file
You can use it in convolutional_mlp.py like this.
layer0_input = x.reshape((batch_size, 3, 135, 240))
# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 135, 240),
filter_shape=(nkerns[0], 3, 8, 5),
poolsize=(2, 2)
)
The load_data function in logistic_sgd.py will need a small change as below
f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()
Hope this helps