Calculate transformation based on anchor in image opencv2 / py - python-3.x

I would like to calculate transformation matrix (rotation, scaling and translation) according to an anchor in an image.
My image is a picture of a label, which will always contains a datamatrix.
I use a third-party library to detect datamatrix.
Then, I get its size, orientation (using the result of cv2.minAreaRect(dm_contour)), and position.
I build what I call my "anchor" with those parameters.
In a second step I get what I call a job, which is composed of ROIs defined by user and the anchor of the picture on which the user defined the ROI.
With these few steps I can correctly place my ROIs according to new label context if it has only a translfation (shifted to left, right, top, bottom).
But as soon as I try to replace ROIs on a rotated label, it doesn't work.
If think my issue is with my rotation matrix and the whole "translate to origen and back to position" process. But I can't find what I m doing wrong...
My code to transform ROIs position looks like that :
def process_job(anchor, img, job, file_path):
"""
Process job file on current picture
#param anchor = Current scene anchor
#param img = Current picture
#param job = Job object
#param file_path = Job file path
"""
print("Processing job " + file_path)
""" Unpack detected anchor """
a_x, a_y = (anchor[0], anchor[1])
rotation = anchor[2]
anchor_size = int(anchor[3])
for item_i in job:
item = job[item_i]
if 'anchor' in item:
""" Apply size rate """
size_rate = anchor_size / int(item['anchor']['size'])
"""" Item anchor pos """
i_a_x, i_a_y = int(item['anchor']['x']), int(item['anchor']['y'])
""" Calculate transformation """
""" Scaling """
S = np.array([
[size_rate, 0, 0],
[ 0, size_rate, 0],
[ 0, 0, 1]
])
""" Rotation """
angle = rotation - int(item['anchor']['o'])
theta = np.radians(angle)
c, s = np.cos(theta), np.sin(theta)
R = np.array((
(c, s, 0),
(-s, c, 0),
(0, 0, 1)
))
""" Translation """
x_scale = a_x - i_a_x
y_scale = a_y - i_a_y
T = np.array([
[1, 0, x_scale],
[0, 1, y_scale],
[0, 0, 1]
])
""" Shear """
shx_factor = 0
Shx = np.array([
[1, shx_factor, 0],
[0, 1, 0],
[0, 0, 1]
])
shy_factor = 0
Shy = np.array([
[1,0, 0],
[shy_factor, 1, 0],
[0, 0, 1]
])
print("Scaling: " + str(size_rate) + " Rotation:" + str(angle) + " Translation:" + str((x_scale, y_scale)))
if 'rect' in item:
""" Unpack rectangle """
""" (r_x1, r_y1) top-left corner """
""" (r_x2, r_y2) bottom right corner """
r_x1, r_y1, r_x2, r_y2 = (int(item['rect']['x1']), int(item['rect']['y1']), int(item['rect']['x2']), int(item['rect']['y2']))
""" As np arrays """
rect_1 = np.array([r_x1, r_y1, 1])
rect_2 = np.array([r_x2, r_y2, 1])
""" Translate to origen """
T_c_1 = np.array([
[1, 0, -r_x1],
[0, 1, -r_y1],
[0, 0, 1]
])
""" Translate to origen """
T_c_2 = np.array([
[1, 0, -r_x2],
[0, 1, -r_y2],
[0, 0, 1]
])
""" Back to postion """
T_r1 = np.array([
[1, 0, r_x1],
[0, 1, r_y1],
[0, 0, 1]
])
""" Back to postion """
T_r2 = np.array([
[1, 0, r_x2],
[0, 1, r_y2],
[0, 0, 1]
])
""" Apply transformations """
final_1 = T # T_r1 # R # T_c_1 # S # rect_1
final_2 = T # T_r2 # R # T_c_2 # S # rect_2
x1, y1, x2, y2 = final_1[0], final_1[1], final_2[0], final_2[1]
print("From " + str((r_x1, r_y1, r_x2, r_y2)))
print("To " + str((int(x1), int(y1), int(x2), int(y2))))
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), \
(0,0,0), 2)
cv2.imwrite('./output/job.png', img)
And here a fex sample of my images :
Thanks in advance for your help,

So,
I don't even know if someone took the time to read my question, but if it can be of any help, here is what I did.
In my first code version, I tried to calculate the following transformation matrix:
Translation matrix 'T'
Rotation 'R'
Scaling 'S'
But was missing two of them:
Sheer X 'ShX'
Sheer Y 'ShY'
My first second version looked like roi_pos = ShX # ShY # S # T # T_to_pos # R # T_to_origin # item_roi
Results were very clumsy and the ROI I difined with my model were not correctly located on my test samples. But rotation was right and somehow ROIs would fall near the expected results.
Then I thought about optimizing my Datamatrix detection, so I went throught all the trouble to implement my own python/numpy/openCV version of a DM detection algorithm.
A sharped DM detection helped me evaluate better my orientation and scale parameter but ROIs were still off.
So I discovered homography, which exactly do what I want.
Its takes points in a known plan and same points in a destination plan. It then calculate the transformation that occured between the two plans.
With this matrix 'H', I know can do roi_pos = H # item_roi which is much more accurate.
That's it, hope it helps,

Related

PyTorch: how to use torchvision.transforms.AugMIx with torch.float32?

PyTorch: how to use torchvision.transforms.AugMIx with torch.float32?
I am trying to apply data augmentation in image dataset by using torchvision.transforms.AugMIx, but I have the following error: TypeError: Only torch.uint8 image tensors are supported, but found torch.float32.
I tried to convert it to int, but I have another error.
My code where I am trying to use the AugMix function:
transform = torchvision.transforms.Compose(
[
torchvision.transforms.Resize((224, 224)), # resize to 224*224
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # normalization
torchvision.transforms.AugMix()
]
)
to_tensor = torchvision.transforms.ToTensor()
Image.MAX_IMAGE_PIXELS = None
class BreastDataset(torch.utils.data.Dataset):
def __init__(self, json_path, data_dir_path='./dataset', clinical_data_path=None, is_preloading=True):
self.data_dir_path = data_dir_path
self.is_preloading = is_preloading
with open(json_path) as f:
print(f"load data from {json_path}")
self.json_data = json.load(f)
def __len__(self):
return len(self.json_data)
def __getitem__(self, index):
label = int(self.json_data[index]["label"])
patient_id = self.json_data[index]["id"]
patch_paths = self.json_data[index]["patch_paths"]
data = {}
if self.is_preloading:
data["bag_tensor"] = self.bag_tensor_list[index]
else:
data["bag_tensor"] = self.load_bag_tensor([os.path.join(self.data_dir_path, p_path) for p_path in patch_paths])
data["label"] = label
data["patient_id"] = patient_id
data["patch_paths"] = patch_paths
return data
def load_bag_tensor(self, patch_paths):
"""Load a bag data as tensor with shape [N, C, H, W]"""
patch_tensor_list = []
for p_path in patch_paths:
patch = Image.open(p_path).convert("RGB")
patch_tensor = transform(patch) # [C, H, W]
patch_tensor = torch.unsqueeze(patch_tensor, dim=0) # [1, C, H, W]
patch_tensor_list.append(patch_tensor)
bag_tensor = torch.cat(patch_tensor_list, dim=0) # [N, C, H, W]
return bag_tensor
Any help is appreciated! Thank you in advance!
For me applying AugMix first and then ToTensor() worked
transformation = transforms.Compose([
transforms.AugMix(severity= 6,mixture_width=2),
transforms.ToTensor(),
transforms.RandomErasing(),
transforms.RandomGrayscale(p = 0.35)
])
torchvision.transforms.AugMix takes images at format uint8. It means that every pixels is 1 (gray) or 3 (rgb) numbers between 0 and 255 that is a classic format of image.
torch.Tensor.type(torch.float32) cast a uint8 tensor to float32 but it is not likely the single transformation that was applied in your image. The float32 images are often normalized to be in range [-1, 1] or [0, 1]. The common way to do so are:
img = img.type(torch.float32) / 128.0 - 1.0 # [-1, 1]
img = img.type(torch.float32) / 255.0 # [0, 1]
When you know in what cases you are you can recast to uint8:
img = (img + 1.0) * 128.0 # case [-1, 1]
img = img * 255.0 # case [0, 1]
img = torch.clip(img, 0.0, 255.0)
img = img.type(torch.uint8)

Google Foobar Challenge: "Prepare the Bunnies' Escape" - Dijkstra's Algorithm Implementation

I'm having a go at the Google Foobar Challenge, currently on the Prepare the Bunnies' Escape problem.
Problem definition:
You're awfully close to destroying the LAMBCHOP doomsday device and
freeing Commander Lambda's bunny workers, but once they're free of the
work duties the bunnies are going to need to escape Lambda's space
station via the escape pods as quickly as possible. Unfortunately, the
halls of the space station are a maze of corridors and dead ends that
will be a deathtrap for the escaping bunnies. Fortunately, Commander
Lambda has put you in charge of a remodeling project that will give
you the opportunity to make things a little easier for the bunnies.
Unfortunately (again), you can't just remove all obstacles between the
bunnies and the escape pods - at most you can remove one wall per
escape pod path, both to maintain structural integrity of the station
and to avoid arousing Commander Lambda's suspicions.
You have maps of parts of the space station, each starting at a work
area exit and ending at the door to an escape pod. The map is
represented as a matrix of 0s and 1s, where 0s are passable space and
1s are impassable walls. The door out of the station is at the top
left (0,0) and the door into an escape pod is at the bottom right
(w-1,h-1).
Write a function solution(map) that generates the length of the
shortest path from the station door to the escape pod, where you are
allowed to remove one wall as part of your remodeling plans. The path
length is the total number of nodes you pass through, counting both
the entrance and exit nodes. The starting and ending positions are
always passable (0). The map will always be solvable, though you may
or may not need to remove a wall. The height and width of the map can
be from 2 to 20. Moves can only be made in cardinal directions; no
diagonal moves are allowed.
I thought I would have a go at solving the problem using Dijkstra's algorithm and managed to get all the test cases passing except for one hidden one.
Fortunately I have been able to come up with a test case that can replicate the situation.
My implementation is below:
#!/usr/bin/env python2.7
from typing import List, Tuple, Dict, Optional
from Queue import PriorityQueue
def neighbors(map, coord):
# type: (List[List[int]], Tuple[int, int]) -> List[Tuple[int, int]]
adjacent = []
if coord[0] > 0:
adjacent.append((coord[0] - 1, coord[1]))
if coord[0] < len(map) - 1:
adjacent.append((coord[0] + 1, coord[1]))
if coord[1] > 0:
adjacent.append((coord[0], coord[1] - 1))
if coord[1] < len(map[coord[0]]) - 1:
adjacent.append((coord[0], coord[1] + 1))
return adjacent
def heuristic(a, b):
# type: (Tuple[int, int], Tuple[int, int]) -> float
# Manhattan Distance heuristic
return abs(b[0] - a[0]) + abs(b[1] - a[1])
def reconstruct_path(came_from, current):
# type: (Tuple[int, int], Tuple[int, int]) -> List[Tuple[int, int]]
# Backtrack from 'current' back up to the start
path = [current]
while current in came_from:
current = came_from[current]
if current is not None:
path.insert(0, current)
return path
def dijkstras_algo(map, start, end, num_removable_walls):
# type: (List[List[int]], Tuple[int, int], Tuple[int, int], int) -> int
open = PriorityQueue()
open.put((0, start))
# num_removable_walls[n] is the remaining number of walls we could remove when we landed on the square
num_removable_walls = { start: num_removable_walls }
# came_from[n] is the node immediately preceding it on the cheapest path from 'start'
came_from = { start: None } # type: Dict[Tuple[int, int], Optional[Tuple[int, int]]]
# cost_so_far[n] is the cost to travel from 'start' to n
cost_so_far = { start: 0 } # type: Dict[Tuple[int, int], int]
while not open.empty():
(cost, current) = open.get()
if current == end:
return reconstruct_path(came_from, current)
for next in neighbors(map, current):
if map[next[0]][next[1]] == 1 and num_removable_walls[current] <= 0:
continue
# For this problem, distance between any two neighbouring cells is _always_ 1
new_cost = cost_so_far[current] + 1
# The problem is in this check, for the failing test case, we stop revisiting cells
if (next not in cost_so_far or new_cost <= cost_so_far[next]):
num_removable_walls[next] = num_removable_walls[current] - (1 if map[next[0]][next[1]] == 1 else 0)
cost_so_far[next] = new_cost
open.put((new_cost + heuristic(next, end), next))
came_from[next] = current
def solution(map):
# type: (List[List[int]]) -> int
num_rows = len(map)
num_cols = len(map[0])
start = (0, 0)
end = (num_rows - 1, len(map[num_rows - 1]) - 1)
path = dijkstras_algo(map, start, end, 1)
return None if path == None else len(path)
assert solution([[0, 1, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0]]) == 7, '2x6 failed!'
assert solution([[0, 1, 0, 0, 0],
[0, 1, 0, 1, 0],
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 0]]) == 12, '4x5 failed!'
assert solution([[0, 1, 0, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 1, 1, 0],
[0, 1, 1, 0, 0],
[0, 1, 1, 0, 0]]) == 9, '5x5 failed!'
Running the code above will show the 4x5 array test failing.
I have ascertained that it is because the algorithm is not revisiting cells to continue searching. This is because of the if-statement if (next not in cost_so_far or new_cost <= cost_so_far[next]):. The algorithm initially finds a low-cost route to the wall cells at (0, 1) and (1, 1), and uses up it's one-time ability to remove a wall on those squares immediately. This cost is then recorded and because it is low, the aforementioned check in the algorithm will not revisit the surrounding cells as a result - as it has found the shortest path to that cell. This is a problem because we can see that we need to save our one-time ability to remove a wall until we encounter cell (2, 4) in order to reach the end.
Although I know where the problem lies, I am having a hard time coming up with a way to incorporate exploring the map while taking the wall-removing ability into account. Any ideas would be greatly appreciated.
I successfully implemented Dijkstra's algorithm to explore the grid, not passing through walls as per the vanilla implementation. However I haven't been successful incorporating the possibility of finding the shortest path, given that you can remove one wall from the map/maze.

vtk: how to obtain the image pixel index from a world point

If I pick a world point from a image, How can I convert the world coordinate to image index?
import vtk
import numpy as np
from vtk.util.numpy_support import numpy_to_vtk
def numpyToVTK(data, multi_component=False, type='float'):
if type == 'float':
data_type = vtk.VTK_FLOAT
elif type == 'char':
data_type = vtk.VTK_UNSIGNED_CHAR
else:
raise RuntimeError('unknown type')
if multi_component == False:
if len(data.shape) == 2:
data = data[:, :, np.newaxis]
flat_data_array = data.transpose(2,1,0).flatten()
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = data.shape
else:
assert len(data.shape) == 3, 'only test for 2D RGB'
flat_data_array = data.transpose(1, 0, 2)
flat_data_array = np.reshape(flat_data_array, newshape=[-1, data.shape[2]])
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = [data.shape[0], data.shape[1], 1]
img = vtk.vtkImageData()
img.GetPointData().SetScalars(vtk_data)
img.SetDimensions(shape[0], shape[1], shape[2])
return img
global sphereActor, textActor
sphereActor = None
textActor = None
def mouseMoveEvent(iren, event):
x, y = iren.GetEventPosition()
picker = vtk.vtkWorldPointPicker()
picker.Pick(x, y, 0, render)
worldPoint = picker.GetPickPosition()
##############################################
## convert world point to image index
##############################################
sphere = vtk.vtkSphereSource()
sphere.SetCenter(worldPoint[0], worldPoint[1], worldPoint[2])
sphere.SetRadius(2)
sphere.Update()
sphereMapper = vtk.vtkPolyDataMapper()
sphereMapper.SetInputData(sphere.GetOutput())
global sphereActor, textActor
if sphereActor != None:
render.RemoveActor(sphereActor)
sphereActor = vtk.vtkActor()
sphereActor.SetMapper(sphereMapper)
sphereActor.GetProperty().SetColor(255, 0, 0)
render.AddActor(sphereActor)
render.Render()
if textActor != None:
render.RemoveActor(textActor)
textActor = vtk.vtkTextActor()
textActor.SetInput('world coordinate: (%.2f, %.2f, %.2f)'%(worldPoint[0], worldPoint[1], worldPoint[2]))
textActor.GetTextProperty().SetColor(1, 0, 0)
textActor.GetTextProperty().SetFontSize(15)
render.AddActor(textActor)
img = np.zeros(shape=[128, 128])
for i in range(128):
for j in range(128):
img[i, j] = i+j
vtkImg = numpyToVTK(img)
imgActor = vtk.vtkImageActor()
imgActor.SetInputData(vtkImg)
render = vtk.vtkRenderer()
render.AddActor(imgActor)
# render.Render()
renWin = vtk.vtkRenderWindow()
renWin.AddRenderer(render)
renWin.Render()
iren = vtk.vtkRenderWindowInteractor()
iren.SetRenderWindow(renWin)
iren.SetInteractorStyle(vtk.vtkInteractorStyleTrackballCamera())
iren.Initialize()
iren.AddObserver('MouseMoveEvent', mouseMoveEvent)
iren.Start()
In the above code, if I don't rotate the image, the world point is (x, y, 0):
And it is agree with what I know. For the world point (x, y, z) and the image index (i, j, k), the conversion should be:
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
In the above code, the image is converted from numpy, thus:
directionX = [1, 0, 0]
directionY = [0, 1, 0]
directionZ = [0, 0, 1]
originPoint=[0, 0, 0]
spacingX=1
spacingY=1
spacingZ=1
In this way, x=i, y=j, z=k. Since this image is a 2D image, the k should be 0 and 'z' should also be 0.
Then, I rotate the image, z is not 0. Like the following picture.
I don't know why z is -0.24.
It means the following conversion is wrong. And how can I obtain the image index by the world point?
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
Any suggestion is appreciated!
vtkImageData has the method TransformPhysicalPointToContinuousIndex for going from world space to image space and TransformIndexToPhysicalPoint to go the other way.
I don't think the computation you're doing is right, since direction is 3x3 rotation matrix.

When ı compile yolov3 ı get take warnings

"""YOLO v3 output
"""
import numpy as np
import keras.backend as K
from keras.models import load_model
import os
class YOLO:
def __init__(self, obj_threshold, nms_threshold):
"""Init.
# Arguments
obj_threshold: Integer, threshold for object.
nms_threshold: Integer, threshold for box.
"""
self._t1 = obj_threshold
self._t2 = nms_threshold
self._yolo = load_model('data/yolo.h5')
def _process_feats(self, out, anchors, mask):
"""process output features.
# Arguments
out: Tensor (N, N, 3, 4 + 1 +80), output feature map of yolo.
anchors: List, anchors for box.
mask: List, mask for anchors.
# Returns
boxes: ndarray (N, N, 3, 4), x,y,w,h for per box.
box_confidence: ndarray (N, N, 3, 1), confidence for per box.
box_class_probs: ndarray (N, N, 3, 80), class probs for per box.
"""
grid_h, grid_w, num_boxes = map(int, out.shape[1: 4])
anchors = [anchors[i] for i in mask]
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.variable(anchors),
[1, 1,len(anchors), 2])
out = out[0]
box_xy = K.get_value(K.sigmoid(out[..., :2]))
box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor)
box_confidence = K.get_value(K.sigmoid(out[..., 4]))
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = K.get_value(K.sigmoid(out[..., 5:]))
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy /= (grid_w, grid_h)
box_wh /= (416, 416)
box_xy -= (box_wh / 2.)
boxes = np.concatenate((box_xy, box_wh), axis=-1)
return boxes, box_confidence, box_class_probs
def _filter_boxes(self, boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold.
# Arguments
boxes: ndarray, boxes of objects.
box_confidences: ndarray, confidences of objects.
box_class_probs: ndarray, class_probs of objects.
# Returns
boxes: ndarray, filtered boxes.
classes: ndarray, classes for boxes.
scores: ndarray, scores for boxes.
"""
box_scores = box_confidences * box_class_probs
box_classes = np.argmax(box_scores, axis=-1)
box_class_scores = np.max(box_scores, axis=-1)
pos = np.where(box_class_scores >= self._t1)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
return boxes, classes, scores
def _nms_boxes(self, boxes, scores):
"""Suppress non-maximal boxes.
# Arguments
boxes: ndarray, boxes of objects.
scores: ndarray, scores of objects.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2]
h = boxes[:, 3]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1)
h1 = np.maximum(0.0, yy2 - yy1 + 1)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= self._t2)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def _yolo_out(self, outs, shape):
"""Process output of yolo base net.
# Argument:
outs: output of yolo base net.
shape: shape of original image.
# Returns:
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
boxes, classes, scores = [], [], []
for out, mask in zip(outs, masks):
b, c, s = self._process_feats(out, anchors, mask)
b, c, s = self._filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
boxes = np.concatenate(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
# Scale boxes back to original image shape.
width, height = shape[1], shape[0]
image_dims = [width, height, width, height]
boxes = boxes * image_dims
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = self._nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def predict(self, image, shape):
"""Detect the objects with yolo.
# Arguments
image: ndarray, processed input image.
shape: shape of original image.
# Returns
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
outs = self._yolo.predict(image)
boxes, classes, scores = self._yolo_out(outs, shape)
return boxes, classes, scores
This is the yolo v3 code and when ı work main program ı take this error
InvalidArgumentError: Incompatible shapes: [13,13,2] vs. [1,1,3,2] [Op:Mul]
Main part is
import cv2
import numpy as np
from yolo_model import YOLO
yolo = YOLO(0.6, 0.5)
file = "data/coco_classes.txt"
with open(file) as f:
class_name = f.readlines()
all_classes = [c.strip() for c in class_name]
print("A")
f = "dog_cat.jpg"
path = "images/"+f
image = cv2.imread(path)
cv2.imshow("image",image)
pimage = cv2.resize(image, (416,416))
pimage = np.array(pimage, dtype = "float32")
pimage /= 255.0
pimage = np.expand_dims(pimage, axis = 0)
# yolo
boxes, classes, scores = yolo.predict(pimage, image.shape)
for box, score, cl in zip(boxes, scores, classes):
x,y,w,h = box
top = max(0, np.floor(x + 0.5).astype(int))
left = max(0, np.floor(y + 0.5).astype(int))
right = max(0, np.floor(x + w + 0.5).astype(int))
bottom = max(0, np.floor(y + h + 0.5).astype(int))
cv2.rectangle(image, (top,left), (right, bottom),(255,0,0),2)
cv2.putText(image, "{} {}".format(all_classes[cl],score),(top,left-6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0,0,255),1,cv2.LINE_AA)
cv2.imshow("yolo",image)
I take problem in box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor). Is multiply necessary? And what do box_wh?

Cellular automata - repeated simulation imshow()

I've got the problem with repeating the animation. I want to use matplotlib in order to visualise each simulation on the matrix. Here is the code for simulation:
import numpy as np
from matplotlib import pyplot as plt
s = np.array([[1,1,1], [1,10,1], [1,1,1]], dtype=np.int8) #Matrix of wages
e = np.zeros((19,), dtype=np.int8) # Vector of rules
e[3]=1
e[12]=1
e[13]=1
mama = np.array([[1, 0, 1], [0, 1, 0], [0, 0, 1]], dtype=np.int8) #The matrix to be tested
def simulation(ma): #Simulation on a given matrix
n, m = ma.shape
p = np.zeros((n+2, m+2), dtype=np.int8) #Creates an extended matrix, avoiding conflicts at the edges of the initial matrix. Here I construct a torus
p[1:-1, 1:-1] = ma #middle
p[0, 1:-1] = ma[n-1] #the first row of p, the last of ma
p[-1, 1:-1] = ma[0] #the last row of p, the first of ma
p[1:-1, 0] = ma[0:, -1] #left col p, right of ma
p[1:-1, -1] = ma[0:, 0] #right col of p, left of ma
p[-1, 0] = ma[0, -1] #left bottom corner
p[-1, -1] = ma[0, 0] #right bottom corner
p[0, 0] = ma[-1, -1] #left upper corner
p[0, -1] = ma[-1, 0] #right upper corner
new = np.zeros(ma.shape, dtype=np.int8)
v, c = p.shape #verses and columns
for i in range(1, v):
for j in range(1, c):
if p[i-1:i+2, j-1:j+2].shape == (3, 3):
new[i-1, j-1] = e[np.sum(p[i-1:i+2,j-1:j+2]*s)]
return new
However, I want to run simulation over the specified number of repetitions and visualise each step of the simulation, so I have tried the code:
def rep(fun, mac, ti): #function, matrix, repetitions (time)
if ti == 1:
plt.imshow(fun(mac))
plt.title("Celllar automaton")
plt.show()
else:
f = fun(rep(fun, mac, ti-1))
plt.imshow(f)
plt.title("Cellular automaton")
plt.show()
I get an error:
n, m = ma.shape
AttributeError: 'NoneType' object has no attribute 'shape'
Please, could you help me? I got really tired of my unability to visualise my work.
ADDITIONALLY:
I have substituted with rep with:
def shoow(fig):
plt.imshow(fig)
plt.title("Cellular automaton")
plt.show()
def repet(fun, mac, ti):
c1 = mac
for i in range(ti):
f = fun(c1)
shoow(f)
c1 = f
However, it creates each time a new figure. How can I get a continuous simulation?

Resources