how to Load YOLOv7 using torch.hub - pytorch

how to load yolov7 model using torch.hub for make prediction
I directly use torch.hub.load method of yolov5 but it didn't work

The answer is :
First method
!# Download YOLOv7 code
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
from hubconf import custom
model = custom(path_or_model='yolov7.pt') # custom example
# model = create(name='yolov7', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example
# Verify inference
import numpy as np
from PIL import Image
imgs = [np.zeros((640, 480, 3))]
results = model(imgs) # batched inference
results.print()
results.save()
Second method
Load YOLOv7 using torch.hub
!# Download YOLOv7 code
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
from pathlib import Path
import torch
from models.yolo import Model
from utils.general import check_requirements, set_logging
from utils.google_utils import attempt_download
from utils.torch_utils import select_device
dependencies = ['torch', 'yaml']
check_requirements(Path("/content/yolov7/").parent / 'requirements.txt', exclude=('pycocotools', 'thop'))
set_logging()
def custom(path_or_model='path/to/model.pt', autoshape=True):
"""custom mode
Arguments (3 options):
path_or_model (str): 'path/to/model.pt'
path_or_model (dict): torch.load('path/to/model.pt')
path_or_model (nn.Module): torch.load('path/to/model.pt')['model']
Returns:
pytorch model
"""
model = torch.load(path_or_model, map_location=torch.device('cpu')) if isinstance(path_or_model, str) else path_or_model # load checkpoint
if isinstance(model, dict):
model = model['ema' if model.get('ema') else 'model'] # load model
hub_model = Model(model.yaml).to(next(model.parameters()).device) # create
hub_model.load_state_dict(model.float().state_dict()) # load state_dict
hub_model.names = model.names # class names
if autoshape:
hub_model = hub_model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available
return hub_model.to(device)
model = custom(path_or_model='yolov7.pt') # custom example
# model = create(name='yolov7', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example
# Verify inference
import numpy as np
from PIL import Image
imgs = [np.zeros((640, 480, 3))]
results = model(imgs) # batched inference
results.print()
results.save()
df_prediction = results.pandas().xyxy
df_prediction
full code on google colab

The Yolov7 model was fine-tuned on custom dataset with this repo: https://github.com/WongKinYiu/yolov7
import torch
import os
import cv2
# Load fine-tuned custom model
model = torch.hub.load('WongKinYiu/yolov7', 'custom', '/path/to/custom_model.pt',
force_reload=True, trust_repo=True)
# Declaring some variables
TABLE_CONFIDENCE = 0.50
CELL_CONFIDENCE = 0.50
OUTPUT_DIR = 'output'
# Bounding Boxes color scheme
ALPHA = 0.2
TABLE_BORDER = (0, 0, 255)
CELL_FILL = (0, 0, 200)
CELL_BORDER = (0, 0, 255)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Run the Inference and draw predicted bboxes
results = model(image_path)
df = results.pandas().xyxy[0]
table_bboxes = []
cell_bboxes = []
for _, row in df.iterrows():
if row['class'] == 0 and row['confidence'] > TABLE_CONFIDENCE:
table_bboxes.append([int(row['xmin']), int(row['ymin']),
int(row['xmax']), int(row['ymax'])])
if row['class'] == 1 and row['confidence'] > CELL_CONFIDENCE:
cell_bboxes.append([int(row['xmin']), int(row['ymin']),
int(row['xmax']), int(row['ymax'])])
image = cv2.imread(image_path)
overlay = image.copy()
for table_bbox in table_bboxes:
cv2.rectangle(image, (table_bbox[0], table_bbox[1]),
(table_bbox[2], table_bbox[3]), TABLE_BORDER, 1)
for cell_bbox in cell_bboxes:
cv2.rectangle(overlay, (cell_bbox[0], cell_bbox[1]),
(cell_bbox[2], cell_bbox[3]), CELL_FILL, -1)
cv2.rectangle(image, (cell_bbox[0], cell_bbox[1]),
(cell_bbox[2], cell_bbox[3]), CELL_BORDER, 1)
image_new = cv2.addWeighted(overlay, ALPHA, image, 1-ALPHA, 0)
image_filename = image_path.split('/')[-1]
cv2.imwrite(f'{OUTPUT_DIR}/{image_filename}', image_new)

Related

yoloV8: how I can to predict and save the image with boxes on the objects with pytorch

import torch
import glob
import os
import pathlib
from ultralytics import YOLO
model_name='MyBest.pt'
model = torch.hub.load(<?>, 'custom', source='local', path = model_name, force_reload = True)
results = model(person.png) # predict on an image
results.save()
What should I write instead: <?>
I'm trying to get an image with BOX on all objects I want the code to use both yoloV8 and pytorch
According to the official python usage source, release 8.0.20:
from ultralytics.yolo.engine.model import YOLO
model = YOLO("yolov8s.pt")
results = model.predict(source='ultralytics/assets', save=True, save_txt=True)
You can use these codes for more details:
for result in results:
boxes = result.boxes # Boxes object for bbox outputs
masks = result.masks # Masks object for segmenation masks outputs
probs = result.probs # Class probabilities
print(boxes)
print(masks)
print(probs)
boxes = results[0].boxes
box = boxes[0] # returns one box
box.xyxy
boxes.xyxy # box with xyxy format, (N, 4)
boxes.xywh # box with xywh format, (N, 4)
boxes.xyxyn # box with xyxy format but normalized, (N, 4)
boxes.xywhn # box with xywh format but normalized, (N, 4)
boxes.conf # confidence score, (N, 1)
boxes.cls # cls, (N, 1)
boxes.data # raw bboxes tensor, (N, 6) or boxes.boxes .
You can go to the docs.ultralytics page for more information
https://docs.ultralytics.com/predict/

DuplicateWidgetID: with st.button widgets with key='predict' for a image classification problem in Streamlit

I'm trying to write a web app, which will offer the user to input an image, select a deep learning model and click the Get a prediction button to classify the image as 'human detected' or 'no human detected' (it is a binomial classification problem).
I've got this error.
Below is my code with Streamlit, which gives me the error.
import streamlit as st
import pandas as pd
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import load_model
st.title("Binary Human Detection Web App")
# loading images
def load_image(uploaded_file):
image = uploaded_file.resize((224,224))
im_array = np.array(image)/255 # a normalised 2D array
im_array = im_array.reshape(-1, 224, 224, 3) # to shape as (1, 224, 224, 3)
return im_array
st.sidebar.subheader("Select a Model")
model_name = st.sidebar.selectbox("Model", ("CNN", "ResNet50"))
if st.button("Try with the Default Image"):
image = Image.open('C:/Users/maria/Jupiter_Notebooks/Dataset_Thermal_Project/Camera_videos/Images_3sec_newdata_v2/oneman/image21.jpg')
st.subheader("Human is detected")
st.image(image)
# predicting images
if model_name == 'CNN':
st.write("Try out the CNN model with the default image or upload an image")
if st.sidebar.button("Get prediction", key='predict'):
st.subheader("Upload an image file")
uploaded_file = st.file_uploader("Upload a JPG image file", type=["jpg", "jpeg"])
if uploaded_file is not None:
image = load_image(Image.open(uploaded_file))
st.image(image)
st.subheader("CNN Results")
model_cnn = load_model("C:/Users/.../Camera_videos/Saved_models/cnn_model.h5")
model_cnn_ = tf.keras.models.Model(model_cnn.inputs, model_cnn.outputs)
pred_label = model_cnn_.predict(image)[0]
#if model_name == 'ResNet50':
else:
st.write("Try out the ResNet50 model with the default image or upload an image")
if st.sidebar.button("Get prediction", key='predict'):
st.subheader("Upload an image file")
uploaded_file = st.file_uploader("Upload a JPG image file", type=["jpg", "jpeg"])
if uploaded_file is not None:
image = load_image(Image.open(uploaded_file))
st.image(image)
st.subheader("ResNet50 Results")
model_resnet = load_model("C:/Users/.../Camera_videos/Saved_models/model_resnet.h5")
model_resnet_ = tf.keras.models.Model(model_resnet.inputs, model_resnet.outputs)
pred_label = model_resnet_.predict(image)[0]
st.write('Human is detected') if pred_label>0.5 else st.write('No human is detected')
I have also tried writing this code in a different way, but I dont know how to pass the 'uploaded_file' i.e. the file supplied by the user, into my initialize_model() function.
...
# loading images
def load_image(uploaded_file):
image = uploaded_file.resize((224,224))
im_array = np.array(image)/255 # a normalised 2D array
im_array = im_array.reshape(-1, 224, 224, 3) # to shape as (1, 224, 224, 3)
return im_array
st.sidebar.subheader("Select a NN Model")
model_name = st.sidebar.selectbox("Model", ("CNN", "ResNet50", "VGG16"))
# predicting images
def initialize_model(model_name, image):
if model_name == 'CNN':
st.write("Try out the CNN model with the default image or upload an image")
if st.sidebar.button("Get prediction", key='predict'):
st.subheader("CNN Results")
model_cnn = load_model("C:/Users/.../Camera_videos/Saved_models/cnn_model.h5")
model_cnn_ = tf.keras.models.Model(model_cnn.inputs, model_cnn.outputs)
# image = load_image(Image.open(image))
pred_label = model_cnn_.predict(image)[0]
if model_name == 'ResNet50':
if st.sidebar.button("Get prediction", key='predict'):
st.subheader("ResNet50 Results")
model_resnet = load_model("C:/Users/.../Camera_videos/Saved_models/model_resnet.h5")
model_resnet_ = tf.keras.models.Model(model_resnet.inputs, model_resnet.outputs)
#image = load_image(uploaded_file)
pred_label = model_resnet_.predict(image)[0]
if st.button("Try with the Default Image"):
d_image = Image.open('C:/Users/.../oneman/image21.jpg')
st.image(d_image)
st.subheader("Human is detected")
st.image(initialize_model(model_name,d_image))
st.subheader("Upload an image file")
uploaded_file = st.file_uploader("Upload a JPG image file", type=["jpg", "jpeg"])
if uploaded_file is not None:
sel_image = load_image(Image.open(uploaded_file))
st.image(sel_image)
Can anyone help me with this problem? thank you for reading it.
Every button should have a unique key value and you have multiple buttons with same key value.
st.sidebar.button("Get prediction", key='predict_btn1'): # make key value unique

How to crop segmented objects from an RCNN?

I'm trying to crop segmented objects outputed by an MASK RCNN the only problem is that when i do the cropping i get the segments with mask colors and not with their original colors.
Here's the outputed image with the segments :
and here's one segment (we have 17 segments in this image ) :
as you can see , we have the segment with the mask color and not the original color.
here's the code that i'm using :
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
import numpy as np
import colorsys
import argparse
import imutils
import random
import cv2
import os
import matplotlib.image as mpimg
import cv2
import matplotlib.pyplot as plt
import numpy as np
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-w", "--weights", required=True,
help="path to Mask R-CNN model weights pre-trained on COCO")
ap.add_argument("-l", "--labels", required=True,
help="path to class labels file")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-i", "--image", required=True,
help="path to input image to apply Mask R-CNN to")
args = vars(ap.parse_args())
# load the class label names from disk, one label per line
CLASS_NAMES = open(args["labels"]).read().strip().split("\n")
# generate random (but visually distinct) colors for each class label
# (thanks to Matterport Mask R-CNN for the method!)
hsv = [(i / len(CLASS_NAMES), 1, 1.0) for i in range(len(CLASS_NAMES))]
COLORS = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
random.seed(42)
random.shuffle(COLORS)
class SimpleConfig(Config):
# give the configuration a recognizable name
NAME = "fashion"
# set the number of GPUs to use along with the number of images
# per GPU
GPU_COUNT = 1
IMAGES_PER_GPU = 1
NUM_CLASSES = 1 + 3
# Skip detections with < 90% confidence
DETECTION_MIN_CONFIDENCE = args["confidence"]
# initialize the inference configuration
config = SimpleConfig()
# initialize the Mask R-CNN model for inference and then load the
# weights
print("[INFO] loading Mask R-CNN model...")
model = modellib.MaskRCNN(mode="inference", config=config,
model_dir=os.getcwd())
model.load_weights(args["weights"], by_name=True)
# load the input image, convert it from BGR to RGB channel
# ordering, and resize the image
# default value 512 form the width
image = cv2.imread(args["image"])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = imutils.resize(image, width=1150)
# perform a forward pass of the network to obtain the results
print("[INFO] making predictions with Mask R-CNN...")
r = model.detect([image], verbose=1)[0]
image = visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
['BG', 'top', 'boots' , 'bag'], r['scores'],
title="")
# get and then save the segmented objects
i = 0
mask = r["masks"]
for i in range(mask.shape[2]):
image = cv2.imread(args["image"])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = imutils.resize(image, width=1150)
for j in range(image.shape[2]):
image[:,:,j] = image[:,:,j] * mask[:,:,i]
filename = "Output/segment_%d.jpg"%i
cv2.imwrite(filename,image)
i+=1
Any Help on how to resolve this issue would be much appreciated , thank you.
I think you need to change this line line in visualize display_intance, and change facecolor from none to None.
I think it is creating random colors even if you don't specify it explicitly
I found the Error , as it has been suggested to me in Github , i had to remove the
`image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)`
Line, because my image was already converted to RGB.

Why does my little Flask app with keras and tensorflow backend crash on subsequent image uploads?

for an assignment, we did image classifcation with Keras and TF that image classifies dogs vs cats. I wrote a simple Flask app that allows for a demo of uploading a pic and display whether the uploaded image scores dog or cat.
I can run a single upload and that displays nicely, but if I upload another image, I get:
TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder:0", shape=(3, 3, 3, 32), dtype=float32) is not an element of this graph.
with the line:
model = load_model('/Volumes/T5_500G/Capstone/v2/flask/models/model_weights.h5')
highlighted in flask debug page
I'm new to this so I'm confused as to how to fix this. Here is my code:
#app.route('/upload_image', methods=['GET', 'POST'])
def upload_image():
prediction_results = {}
animals = ['% Cat Score', '% Dog Score']
animals_scores = []
imageUrl = ""
if request.method == 'POST':
if request.files:
import keras
from keras.models import load_model
from keras import backend as K
import numpy as np
model = load_model('/Volumes/T5_500G/Capstone/v2/flask/models/model_weights.h5')
print(request.files)
print(request.files['image'].filename)
imageUrl = "/static/uploads/"+request.files['image'].filename
image2 = app.config['IMAGE_UPLOADS']+"/"+request.files['image'].filename
request.files['image'].save(image2)
img_path = image2
img = keras.preprocessing.image.load_img(img_path, target_size=(224,224))
img_array = keras.preprocessing.image.img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = expanded_img_array / 255. # Preprocess the image
prediction = model.predict(preprocessed_img)
pred_list = prediction.tolist()
animals_scores.append(pred_list[0][0])
animals_scores.append(pred_list[0][1])
print(prediction_results)
imageUrl = "/static/uploads/"+request.files['image'].filename
return render_template('/upload_image.html',imageUrl=imageUrl, animals=animals, animals_scores=animals_scores)
The upload paths are paths on my laptop. I'm hoping to fix this an push to heroku or related.
I've read some posts about clearing the keras session. Is this the issue?
Thanks for any assistance.
I've changed your code a bit according to my comment, it should be working now.
import keras
from keras.models import load_model
from keras import backend as K
import numpy as np
model = load_model('/Volumes/T5_500G/Capstone/v2/flask/models/model_weights.h5')
#app.route('/upload_image', methods=['GET', 'POST'])
def upload_image():
prediction_results = {}
animals = ['% Cat Score', '% Dog Score']
animals_scores = []
imageUrl = ""
if request.method == 'POST':
if request.files:
print(request.files)
print(request.files['image'].filename)
imageUrl = "/static/uploads/"+request.files['image'].filename
image2 = app.config['IMAGE_UPLOADS']+"/"+request.files['image'].filename
request.files['image'].save(image2)
img_path = image2
img = keras.preprocessing.image.load_img(img_path, target_size=(224,224))
img_array = keras.preprocessing.image.img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = expanded_img_array / 255. # Preprocess the image
prediction = model.predict(preprocessed_img)
pred_list = prediction.tolist()
animals_scores.append(pred_list[0][0])
animals_scores.append(pred_list[0][1])
print(prediction_results)
imageUrl = "/static/uploads/"+request.files['image'].filename
return render_template('/upload_image.html',imageUrl=imageUrl, animals=animals, animals_scores=animals_scores)```

Using Multithreading for Inference in Tensorflow with OpenCV

I know using Multithreading is usefull training a DNN with Tensorflow.
But does it make any sense to use it for inference? For example if you are using Googles Object Detection API for realtime object detection in video streams?
And if Yes, how is it implemented?
I created a github repo (https://github.com/GustavZ/realtime_object_detection) that allows easy Real Time Object Detection but i am not satisfied with the generated FPS, So i thougth about using Multithreading to speed it up.
Has anybody Experience with this or could help me implement it in my code?
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 21 12:01:40 2017
#author: GustavZ
"""
import numpy as np
import os
import six.moves.urllib as urllib
import tarfile
import tensorflow as tf
import cv2
# Protobuf Compilation (once necessary)
os.system('protoc object_detection/protos/*.proto --python_out=.')
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from stuff.helper import FPS2
# Define Video Input
# Must be OpenCV readable
# 0 = Default Camera
video_input = 0
width = 640
height = 480
fps_interval = 3
# Model preparation
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = 'models/' + MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
LABEL_MAP = 'mscoco_label_map.pbtxt'
PATH_TO_LABELS = 'object_detection/data/' + LABEL_MAP
NUM_CLASSES = 90
# Download Model
if not os.path.isfile(PATH_TO_CKPT):
print('Model not found. Downloading it now.')
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
os.remove('../' + MODEL_FILE)
else:
print('Model found. Proceed.')
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Start Video Stream
video_stream = cv2.VideoCapture(video_input)
video_stream.set(cv2.CAP_PROP_FRAME_WIDTH, width)
video_stream.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
# Detection
print ("Press 'q' to Exit")
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess: # config=tf.ConfigProto(log_device_placement=True)
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# fps calculation
fps = FPS2(fps_interval).start()
while video_stream.isOpened():
ret_val,image_np = video_stream.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object_detection', image_np)
# Exit Option
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.update()
# End everything
video_stream.release()
cv2.destroyAllWindows()
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
It makes sense only if you run it on a device where your computing capacities are limited.
Basically what you would do is to run in different threads the image processing and the inference.
The result would be a smooth video display, and your inference would lag behind without impacting your display framerate.
You can see on this file an example (just a draft, not tested yet) about how the multi threading would look like.
I am loading my model and starting my session, then looping over the video captured, feeding my prediction queue if I have capacities to infer it.

Resources