Fixing bugs on yolo update

This commit is contained in:
Elijah Harmon 2022-09-25 20:27:52 -04:00
parent 115a454637
commit 65cb7a2111
64 changed files with 3697 additions and 459 deletions

3
.gitignore vendored
View File

@ -8,3 +8,6 @@ yolov5s.engine
yolov5 yolov5
tensorrt-8.4.1.5-cp39-none-win_amd64.whl tensorrt-8.4.1.5-cp39-none-win_amd64.whl
tensorrt-8.4.1.5-cp310-none-win_amd64.whl tensorrt-8.4.1.5-cp310-none-win_amd64.whl
__pycache__
utils/__pycache__
models/__pycache__

36
main.py
View File

@ -5,12 +5,13 @@ import gc
import numpy as np import numpy as np
import cv2 import cv2
import time import time
import win32api, win32con import win32api
import win32con
import pandas as pd import pandas as pd
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (cv2, non_max_suppression, xyxy2xywh)
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
import dxcam import dxcam
def main(): def main():
# Window title of the game, don't need the entire name # Window title of the game, don't need the entire name
videoGameWindowTitle = "Counter" videoGameWindowTitle = "Counter"
@ -63,8 +64,10 @@ def main():
# sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080} # sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080}
# Starting screenshoting engine # Starting screenshoting engine
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2) left = aaRightShift + \
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2 ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
top = videoGameWindow.top + \
(videoGameWindow.height - screenShotHeight) // 2
right, bottom = left + screenShotWidth, top + screenShotHeight right, bottom = left + screenShotWidth, top + screenShotHeight
region = (left, top, right, bottom) region = (left, top, right, bottom)
@ -81,7 +84,8 @@ def main():
sTime = time.time() sTime = time.time()
# Loading Yolo5 Small AI Model # Loading Yolo5 Small AI Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True) model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
pretrained=True, force_reload=True)
stride, names, pt = model.stride, model.names, model.pt stride, names, pt = model.stride, model.names, model.pt
model.half() model.half()
@ -109,8 +113,8 @@ def main():
results = model(im, size=screenShotHeight) results = model(im, size=screenShotHeight)
# Suppressing results that dont meet thresholds # Suppressing results that dont meet thresholds
pred = non_max_suppression(results, 0.25, 0.25, 0, False, max_det=1000) pred = non_max_suppression(
results, 0.25, 0.25, 0, False, max_det=1000)
# Converting output to usable cords # Converting output to usable cords
targets = [] targets = []
@ -123,9 +127,11 @@ def main():
s += f"{n} {names[int(c)]}, " # add to string s += f"{n} {names[int(c)]}, " # add to string
for *xyxy, conf, cls in reversed(det): for *xyxy, conf, cls in reversed(det):
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh targets.append((xyxy2xywh(torch.tensor(xyxy).view(
1, 4)) / gn).view(-1).tolist()) # normalized xywh
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"]) targets = pd.DataFrame(
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
# If there are people in the center bounding box # If there are people in the center bounding box
if len(targets) > 0: if len(targets) > 0:
@ -134,7 +140,8 @@ def main():
targets['last_mid_x'] = last_mid_coord[0] targets['last_mid_x'] = last_mid_coord[0]
targets['last_mid_y'] = last_mid_coord[1] targets['last_mid_y'] = last_mid_coord[1]
# Take distance between current person mid coordinate and last person mid coordinate # Take distance between current person mid coordinate and last person mid coordinate
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1) targets['dist'] = np.linalg.norm(
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
targets.sort_values(by="dist", ascending=False) targets.sort_values(by="dist", ascending=False)
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance) # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -151,7 +158,8 @@ def main():
# Moving the mouse # Moving the mouse
if win32api.GetKeyState(0x14): if win32api.GetKeyState(0x14):
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
last_mid_coord = [xMid, yMid] last_mid_coord = [xMid, yMid]
else: else:
@ -165,7 +173,8 @@ def main():
halfH = round(targets["height"][i] / 2) halfH = round(targets["height"][i] / 2)
midX = targets['current_mid_x'][i] midX = targets['current_mid_x'][i]
midY = targets['current_mid_y'][i] midY = targets['current_mid_y'][i]
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH) (startX, startY, endX, endY) = int(
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
confidence = .5 confidence = .5
@ -197,5 +206,6 @@ def main():
exit() exit()
camera.stop() camera.stop()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -5,13 +5,14 @@ import gc
import numpy as np import numpy as np
import cv2 import cv2
import time import time
import win32api, win32con import win32api
import win32con
import pandas as pd import pandas as pd
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (cv2, non_max_suppression, xyxy2xywh)
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
import dxcam import dxcam
import torch import torch
def main(): def main():
# Window title to go after and the height of the screenshots # Window title to go after and the height of the screenshots
videoGameWindowTitle = "Counter" videoGameWindowTitle = "Counter"
@ -61,8 +62,10 @@ def main():
"height": screenShotHeight} "height": screenShotHeight}
# Starting screenshoting engine # Starting screenshoting engine
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2) left = aaRightShift + \
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2 ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
top = videoGameWindow.top + \
(videoGameWindow.height - screenShotHeight) // 2
right, bottom = left + 320, top + 320 right, bottom = left + 320, top + 320
region = (left, top, right, bottom) region = (left, top, right, bottom)
@ -82,7 +85,8 @@ def main():
so = ort.SessionOptions() so = ort.SessionOptions()
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider']) ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
'CUDAExecutionProvider'])
# Used for colors drawn on bounding boxes # Used for colors drawn on bounding boxes
COLORS = np.random.uniform(0, 255, size=(1500, 3)) COLORS = np.random.uniform(0, 255, size=(1500, 3))
@ -99,7 +103,8 @@ def main():
im = torch.from_numpy(outputs[0]).to('cpu') im = torch.from_numpy(outputs[0]).to('cpu')
pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10) pred = non_max_suppression(
im, confidence, confidence, 0, False, max_det=10)
targets = [] targets = []
for i, det in enumerate(pred): for i, det in enumerate(pred):
@ -111,9 +116,11 @@ def main():
s += f"{n} {int(c)}, " # add to string s += f"{n} {int(c)}, " # add to string
for *xyxy, conf, cls in reversed(det): for *xyxy, conf, cls in reversed(det):
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh targets.append((xyxy2xywh(torch.tensor(xyxy).view(
1, 4)) / gn).view(-1).tolist()) # normalized xywh
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"]) targets = pd.DataFrame(
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
# If there are people in the center bounding box # If there are people in the center bounding box
if len(targets) > 0: if len(targets) > 0:
@ -122,7 +129,8 @@ def main():
targets['last_mid_x'] = last_mid_coord[0] targets['last_mid_x'] = last_mid_coord[0]
targets['last_mid_y'] = last_mid_coord[1] targets['last_mid_y'] = last_mid_coord[1]
# Take distance between current person mid coordinate and last person mid coordinate # Take distance between current person mid coordinate and last person mid coordinate
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1) targets['dist'] = np.linalg.norm(
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
targets.sort_values(by="dist", ascending=False) targets.sort_values(by="dist", ascending=False)
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance) # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -139,7 +147,8 @@ def main():
# Moving the mouse # Moving the mouse
if win32api.GetKeyState(0x14): if win32api.GetKeyState(0x14):
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
last_mid_coord = [xMid, yMid] last_mid_coord = [xMid, yMid]
else: else:
@ -153,7 +162,8 @@ def main():
halfH = round(targets["height"][i] / 2) halfH = round(targets["height"][i] / 2)
midX = targets['current_mid_x'][i] midX = targets['current_mid_x'][i]
midY = targets['current_mid_y'][i] midY = targets['current_mid_y'][i]
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH) (startX, startY, endX, endY) = int(midX + halfW), int(midY +
halfH), int(midX - halfW), int(midY - halfH)
idx = 0 idx = 0
# draw the bounding box and label on the frame # draw the bounding box and label on the frame
@ -183,5 +193,6 @@ def main():
camera.stop() camera.stop()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -6,13 +6,14 @@ import gc
import numpy as np import numpy as np
import cv2 import cv2
import time import time
import win32api, win32con import win32api
import win32con
import pandas as pd import pandas as pd
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (cv2, non_max_suppression, xyxy2xywh)
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
import dxcam import dxcam
import torch import torch
def main(): def main():
# Window title to go after and the height of the screenshots # Window title to go after and the height of the screenshots
videoGameWindowTitle = "Counter" videoGameWindowTitle = "Counter"
@ -62,8 +63,10 @@ def main():
"height": screenShotHeight} "height": screenShotHeight}
# Starting screenshoting engine # Starting screenshoting engine
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2) left = aaRightShift + \
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2 ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
top = videoGameWindow.top + \
(videoGameWindow.height - screenShotHeight) // 2
right, bottom = left + 320, top + 320 right, bottom = left + 320, top + 320
region = (left, top, right, bottom) region = (left, top, right, bottom)
@ -81,7 +84,8 @@ def main():
so = ort.SessionOptions() so = ort.SessionOptions()
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider']) ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
'CUDAExecutionProvider'])
# Used for colors drawn on bounding boxes # Used for colors drawn on bounding boxes
COLORS = np.random.uniform(0, 255, size=(1500, 3)) COLORS = np.random.uniform(0, 255, size=(1500, 3))
@ -98,7 +102,8 @@ def main():
im = torch.from_numpy(outputs[0]).to('cpu') im = torch.from_numpy(outputs[0]).to('cpu')
pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10) pred = non_max_suppression(
im, confidence, confidence, 0, False, max_det=10)
targets = [] targets = []
for i, det in enumerate(pred): for i, det in enumerate(pred):
@ -110,9 +115,11 @@ def main():
s += f"{n} {int(c)}, " # add to string s += f"{n} {int(c)}, " # add to string
for *xyxy, conf, cls in reversed(det): for *xyxy, conf, cls in reversed(det):
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh targets.append((xyxy2xywh(torch.tensor(xyxy).view(
1, 4)) / gn).view(-1).tolist()) # normalized xywh
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"]) targets = pd.DataFrame(
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
# If there are people in the center bounding box # If there are people in the center bounding box
if len(targets) > 0: if len(targets) > 0:
@ -121,7 +128,8 @@ def main():
targets['last_mid_x'] = last_mid_coord[0] targets['last_mid_x'] = last_mid_coord[0]
targets['last_mid_y'] = last_mid_coord[1] targets['last_mid_y'] = last_mid_coord[1]
# Take distance between current person mid coordinate and last person mid coordinate # Take distance between current person mid coordinate and last person mid coordinate
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1) targets['dist'] = np.linalg.norm(
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
targets.sort_values(by="dist", ascending=False) targets.sort_values(by="dist", ascending=False)
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance) # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -138,7 +146,8 @@ def main():
# Moving the mouse # Moving the mouse
if win32api.GetKeyState(0x14): if win32api.GetKeyState(0x14):
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
last_mid_coord = [xMid, yMid] last_mid_coord = [xMid, yMid]
else: else:
@ -152,7 +161,8 @@ def main():
halfH = round(targets["height"][i] / 2) halfH = round(targets["height"][i] / 2)
midX = targets['current_mid_x'][i] midX = targets['current_mid_x'][i]
midY = targets['current_mid_y'][i] midY = targets['current_mid_y'][i]
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH) (startX, startY, endX, endY) = int(midX + halfW), int(midY +
halfH), int(midX - halfW), int(midY - halfH)
idx = 0 idx = 0
# draw the bounding box and label on the frame # draw the bounding box and label on the frame
@ -181,5 +191,6 @@ def main():
exit() exit()
camera.stop() camera.stop()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -5,14 +5,15 @@ import gc
import numpy as np import numpy as np
import cv2 import cv2
import time import time
import win32api, win32con import win32api
import win32con
import pandas as pd import pandas as pd
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (cv2, non_max_suppression, xyxy2xywh)
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from models.common import DetectMultiBackend from models.common import DetectMultiBackend
import dxcam import dxcam
import cupy as cp import cupy as cp
def main(): def main():
# Window title to go after and the height of the screenshots # Window title to go after and the height of the screenshots
videoGameWindowTitle = "Counter" videoGameWindowTitle = "Counter"
@ -62,8 +63,10 @@ def main():
"height": screenShotHeight} "height": screenShotHeight}
# Starting screenshoting engine # Starting screenshoting engine
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2) left = aaRightShift + \
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2 ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
top = videoGameWindow.top + \
(videoGameWindow.height - screenShotHeight) // 2
right, bottom = left + screenShotWidth, top + screenShotHeight right, bottom = left + screenShotWidth, top + screenShotHeight
region = (left, top, right, bottom) region = (left, top, right, bottom)
@ -80,7 +83,8 @@ def main():
sTime = time.time() sTime = time.time()
# Loading Yolo5 Small AI Model # Loading Yolo5 Small AI Model
model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device('cuda'), dnn=False, data='', fp16=True) model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device(
'cuda'), dnn=False, data='', fp16=True)
stride, names, pt = model.stride, model.names, model.pt stride, names, pt = model.stride, model.names, model.pt
# Used for colors drawn on bounding boxes # Used for colors drawn on bounding boxes
@ -100,12 +104,14 @@ def main():
# Converting to numpy for visuals # Converting to numpy for visuals
im0 = im[0].permute(1, 2, 0) * 255 im0 = im[0].permute(1, 2, 0) * 255
im0 = im0.cpu().numpy().astype(np.uint8) im0 = im0.cpu().numpy().astype(np.uint8)
im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #Image has to be in BGR for visualization # Image has to be in BGR for visualization
im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR)
# Detecting all the objects # Detecting all the objects
results = model(im) results = model(im)
pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10) pred = non_max_suppression(
results, confidence, confidence, 0, False, max_det=10)
targets = [] targets = []
for i, det in enumerate(pred): for i, det in enumerate(pred):
@ -117,9 +123,11 @@ def main():
s += f"{n} {names[int(c)]}, " # add to string s += f"{n} {names[int(c)]}, " # add to string
for *xyxy, conf, cls in reversed(det): for *xyxy, conf, cls in reversed(det):
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh targets.append((xyxy2xywh(torch.tensor(xyxy).view(
1, 4)) / gn).view(-1).tolist()) # normalized xywh
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"]) targets = pd.DataFrame(
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
# If there are people in the center bounding box # If there are people in the center bounding box
if len(targets) > 0: if len(targets) > 0:
@ -128,7 +136,8 @@ def main():
targets['last_mid_x'] = last_mid_coord[0] targets['last_mid_x'] = last_mid_coord[0]
targets['last_mid_y'] = last_mid_coord[1] targets['last_mid_y'] = last_mid_coord[1]
# Take distance between current person mid coordinate and last person mid coordinate # Take distance between current person mid coordinate and last person mid coordinate
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1) targets['dist'] = np.linalg.norm(
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
targets.sort_values(by="dist", ascending=False) targets.sort_values(by="dist", ascending=False)
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance) # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -145,7 +154,8 @@ def main():
# Moving the mouse # Moving the mouse
if win32api.GetKeyState(0x14): if win32api.GetKeyState(0x14):
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
last_mid_coord = [xMid, yMid] last_mid_coord = [xMid, yMid]
else: else:
@ -159,7 +169,8 @@ def main():
halfH = round(targets["height"][i] / 2) halfH = round(targets["height"][i] / 2)
midX = targets['current_mid_x'][i] midX = targets['current_mid_x'][i]
midY = targets['current_mid_y'][i] midY = targets['current_mid_y'][i]
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH) (startX, startY, endX, endY) = int(
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
idx = 0 idx = 0
# draw the bounding box and label on the frame # draw the bounding box and label on the frame
@ -188,5 +199,6 @@ def main():
exit() exit()
camera.stop() camera.stop()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -5,12 +5,13 @@ import gc
import numpy as np import numpy as np
import cv2 import cv2
import time import time
import win32api, win32con import win32api
import win32con
import pandas as pd import pandas as pd
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, from utils.general import (cv2, non_max_suppression, xyxy2xywh)
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
import dxcam import dxcam
def main(): def main():
# Window title to go after and the height of the screenshots # Window title to go after and the height of the screenshots
videoGameWindowTitle = "Counter" videoGameWindowTitle = "Counter"
@ -60,8 +61,10 @@ def main():
"height": screenShotHeight} "height": screenShotHeight}
# Starting screenshoting engine # Starting screenshoting engine
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2) left = aaRightShift + \
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2 ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
top = videoGameWindow.top + \
(videoGameWindow.height - screenShotHeight) // 2
right, bottom = left + screenShotWidth, top + screenShotHeight right, bottom = left + screenShotWidth, top + screenShotHeight
region = (left, top, right, bottom) region = (left, top, right, bottom)
@ -78,7 +81,8 @@ def main():
sTime = time.time() sTime = time.time()
# Loading Yolo5 Small AI Model # Loading Yolo5 Small AI Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True) model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
pretrained=True, force_reload=True)
stride, names, pt = model.stride, model.names, model.pt stride, names, pt = model.stride, model.names, model.pt
model.half() model.half()
@ -106,7 +110,8 @@ def main():
results = model(im, size=screenShotHeight) results = model(im, size=screenShotHeight)
# Suppressing results that dont meet thresholds # Suppressing results that dont meet thresholds
pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10) pred = non_max_suppression(
results, confidence, confidence, 0, False, max_det=10)
# Converting output to usable cords # Converting output to usable cords
targets = [] targets = []
@ -119,9 +124,11 @@ def main():
s += f"{n} {names[int(c)]}, " # add to string s += f"{n} {names[int(c)]}, " # add to string
for *xyxy, conf, cls in reversed(det): for *xyxy, conf, cls in reversed(det):
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh targets.append((xyxy2xywh(torch.tensor(xyxy).view(
1, 4)) / gn).view(-1).tolist()) # normalized xywh
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"]) targets = pd.DataFrame(
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
# If there are people in the center bounding box # If there are people in the center bounding box
if len(targets) > 0: if len(targets) > 0:
@ -130,7 +137,8 @@ def main():
targets['last_mid_x'] = last_mid_coord[0] targets['last_mid_x'] = last_mid_coord[0]
targets['last_mid_y'] = last_mid_coord[1] targets['last_mid_y'] = last_mid_coord[1]
# Take distance between current person mid coordinate and last person mid coordinate # Take distance between current person mid coordinate and last person mid coordinate
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1) targets['dist'] = np.linalg.norm(
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
targets.sort_values(by="dist", ascending=False) targets.sort_values(by="dist", ascending=False)
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance) # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -147,7 +155,8 @@ def main():
# Moving the mouse # Moving the mouse
if win32api.GetKeyState(0x14): if win32api.GetKeyState(0x14):
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
last_mid_coord = [xMid, yMid] last_mid_coord = [xMid, yMid]
else: else:
@ -161,7 +170,8 @@ def main():
halfH = round(targets["height"][i] / 2) halfH = round(targets["height"][i] / 2)
midX = targets['current_mid_x'][i] midX = targets['current_mid_x'][i]
midY = targets['current_mid_y'][i] midY = targets['current_mid_y'][i]
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH) (startX, startY, endX, endY) = int(
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
idx = 0 idx = 0
# draw the bounding box and label on the frame # draw the bounding box and label on the frame
@ -190,5 +200,6 @@ def main():
exit() exit()
camera.stop() camera.stop()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -10,6 +10,7 @@ import warnings
from collections import OrderedDict, namedtuple from collections import OrderedDict, namedtuple
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse
import cv2 import cv2
import numpy as np import numpy as np
@ -22,26 +23,51 @@ from torch.cuda import amp
from utils.dataloaders import exif_transpose, letterbox from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr, from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
increment_path, make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh, increment_path, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh,
yaml_load) yaml_load)
from utils.plots import Annotator, colors, save_one_box from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import copy_attr, smart_inference_mode from utils.torch_utils import copy_attr, smart_inference_mode
def autopad(k, p=None): # kernel, padding def export_formats():
# Pad to 'same' # YOLOv5 export formats
x = [
['PyTorch', '-', '.pt', True, True],
['TorchScript', 'torchscript', '.torchscript', True, True],
['ONNX', 'onnx', '.onnx', True, True],
['OpenVINO', 'openvino', '_openvino_model', True, False],
['TensorRT', 'engine', '.engine', False, True],
['CoreML', 'coreml', '.mlmodel', True, False],
['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
['TensorFlow GraphDef', 'pb', '.pb', True, True],
['TensorFlow Lite', 'tflite', '.tflite', True, False],
['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
['TensorFlow.js', 'tfjs', '_web_model', False, False],
['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
def autopad(k, p=None, d=1): # kernel, padding, dilation
# Pad to 'same' shape outputs
if d > 1:
k = d * (k - 1) + 1 if isinstance(k,
int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None: if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p return p
class Conv(nn.Module): class Conv(nn.Module):
# Standard convolution # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__() super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.conv = nn.Conv2d(c1, c2, k, s, autopad(
k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2) self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) self.act = self.default_act if act is True else act if isinstance(
act, nn.Module) else nn.Identity()
def forward(self, x): def forward(self, x):
return self.act(self.bn(self.conv(x))) return self.act(self.bn(self.conv(x)))
@ -51,14 +77,16 @@ class Conv(nn.Module):
class DWConv(Conv): class DWConv(Conv):
# Depth-wise convolution class # Depth-wise convolution
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups # ch_in, ch_out, kernel, stride, dilation, activation
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act) def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class DWConvTranspose2d(nn.ConvTranspose2d): class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution class # Depth-wise transpose convolution
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out # ch_in, ch_out, kernel, stride, padding, padding_out
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
@ -87,7 +115,8 @@ class TransformerBlock(nn.Module):
if c1 != c2: if c1 != c2:
self.conv = Conv(c1, c2) self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads)
for _ in range(num_layers)))
self.c2 = c2 self.c2 = c2
def forward(self, x): def forward(self, x):
@ -100,7 +129,8 @@ class TransformerBlock(nn.Module):
class Bottleneck(nn.Module): class Bottleneck(nn.Module):
# Standard bottleneck # Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion # ch_in, ch_out, shortcut, groups, expansion
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) self.cv1 = Conv(c1, c_, 1, 1)
@ -113,7 +143,8 @@ class Bottleneck(nn.Module):
class BottleneckCSP(nn.Module): class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion # ch_in, ch_out, number, shortcut, groups, expansion
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) self.cv1 = Conv(c1, c_, 1, 1)
@ -122,7 +153,8 @@ class BottleneckCSP(nn.Module):
self.cv4 = Conv(2 * c_, c2, 1, 1) self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.SiLU() self.act = nn.SiLU()
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) self.m = nn.Sequential(
*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x): def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x))) y1 = self.cv3(self.m(self.cv1(x)))
@ -146,13 +178,15 @@ class CrossConv(nn.Module):
class C3(nn.Module): class C3(nn.Module):
# CSP Bottleneck with 3 convolutions # CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion # ch_in, ch_out, number, shortcut, groups, expansion
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) self.m = nn.Sequential(
*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x): def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
@ -163,7 +197,8 @@ class C3x(C3):
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e) super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) c_ = int(c2 * e)
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n))) self.m = nn.Sequential(
*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
class C3TR(C3): class C3TR(C3):
@ -197,12 +232,14 @@ class SPP(nn.Module):
c_ = c1 // 2 # hidden channels c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) self.m = nn.ModuleList(
[nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x): def forward(self, x):
x = self.cv1(x) x = self.cv1(x)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning # suppress torch 1.9.0 max_pool2d() warning
warnings.simplefilter('ignore')
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
@ -218,7 +255,8 @@ class SPPF(nn.Module):
def forward(self, x): def forward(self, x):
x = self.cv1(x) x = self.cv1(x)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning # suppress torch 1.9.0 max_pool2d() warning
warnings.simplefilter('ignore')
y1 = self.m(x) y1 = self.m(x)
y2 = self.m(y1) y2 = self.m(y1)
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
@ -226,9 +264,10 @@ class SPPF(nn.Module):
class Focus(nn.Module): class Focus(nn.Module):
# Focus wh information into c-space # Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups # ch_in, ch_out, kernel, stride, padding, groups
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
super().__init__() super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act) self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
# self.contract = Contract(gain=2) # self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
@ -238,11 +277,12 @@ class Focus(nn.Module):
class GhostConv(nn.Module): class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet # Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups # ch_in, ch_out, kernel, stride, groups
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
super().__init__() super().__init__()
c_ = c2 // 2 # hidden channels c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act) self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
def forward(self, x): def forward(self, x):
y = self.cv1(x) y = self.cv1(x)
@ -310,7 +350,7 @@ class DetectMultiBackend(nn.Module):
# PyTorch: weights = *.pt # PyTorch: weights = *.pt
# TorchScript: *.torchscript # TorchScript: *.torchscript
# ONNX Runtime: *.onnx # ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx with --dnn # ONNX OpenCV DNN: *.onnx --dnn
# OpenVINO: *.xml # OpenVINO: *.xml
# CoreML: *.mlmodel # CoreML: *.mlmodel
# TensorRT: *.engine # TensorRT: *.engine
@ -318,25 +358,35 @@ class DetectMultiBackend(nn.Module):
# TensorFlow GraphDef: *.pb # TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite # TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite # TensorFlow Edge TPU: *_edgetpu.tflite
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import # PaddlePaddle: *_paddle_model
# scoped to avoid circular import
from models.experimental import attempt_download, attempt_load
super().__init__() super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights) w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self._model_type(w) # get backend pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(
w = attempt_download(w) # download if not local w)
fp16 &= pt or jit or onnx or engine # FP16 fp16 &= pt or jit or onnx or engine # FP16
# BHWC formats (vs torch BCWH)
nhwc = coreml or saved_model or pb or tflite or edgetpu
stride = 32 # default stride stride = 32 # default stride
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
if not (pt or triton):
w = attempt_download(w) # download if not local
if pt: # PyTorch if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse) model = attempt_load(weights if isinstance(
weights, list) else w, device=device, inplace=True, fuse=fuse)
stride = max(int(model.stride.max()), 32) # model stride stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names names = model.module.names if hasattr(
model, 'module') else model.names # get class names
model.half() if fp16 else model.float() model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half() self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...') LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files) model = torch.jit.load(
w, _extra_files=extra_files, map_location=device)
model.half() if fp16 else model.float() model.half() if fp16 else model.float()
if extra_files['config.txt']: # load metadata dict if extra_files['config.txt']: # load metadata dict
d = json.loads(extra_files['config.txt'], d = json.loads(extra_files['config.txt'],
@ -345,14 +395,15 @@ class DetectMultiBackend(nn.Module):
stride, names = int(d['stride']), d['names'] stride, names = int(d['stride']), d['names']
elif dnn: # ONNX OpenCV DNN elif dnn: # ONNX OpenCV DNN
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
check_requirements(('opencv-python>=4.5.4',)) check_requirements('opencv-python>=4.5.4')
net = cv2.dnn.readNetFromONNX(w) net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime elif onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...') LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
cuda = torch.cuda.is_available() and device.type != 'cpu' check_requirements(
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else [
'CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers) session = onnxruntime.InferenceSession(w, providers=providers)
output_names = [x.name for x in session.get_outputs()] output_names = [x.name for x in session.get_outputs()]
meta = session.get_modelmeta().custom_metadata_map # metadata meta = session.get_modelmeta().custom_metadata_map # metadata
@ -360,100 +411,149 @@ class DetectMultiBackend(nn.Module):
stride, names = int(meta['stride']), eval(meta['names']) stride, names = int(meta['stride']), eval(meta['names'])
elif xml: # OpenVINO elif xml: # OpenVINO
LOGGER.info(f'Loading {w} for OpenVINO inference...') LOGGER.info(f'Loading {w} for OpenVINO inference...')
check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/ # requires openvino-dev: https://pypi.org/project/openvino-dev/
check_requirements('openvino')
from openvino.runtime import Core, Layout, get_batch from openvino.runtime import Core, Layout, get_batch
ie = Core() ie = Core()
if not Path(w).is_file(): # if not *.xml if not Path(w).is_file(): # if not *.xml
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir # get *.xml file from *_openvino_model dir
network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) w = next(Path(w).glob('*.xml'))
network = ie.read_model(
model=w, weights=Path(w).with_suffix('.bin'))
if network.get_parameters()[0].get_layout().empty: if network.get_parameters()[0].get_layout().empty:
network.get_parameters()[0].set_layout(Layout("NCHW")) network.get_parameters()[0].set_layout(Layout("NCHW"))
batch_dim = get_batch(network) batch_dim = get_batch(network)
if batch_dim.is_static: if batch_dim.is_static:
batch_size = batch_dim.get_length() batch_size = batch_dim.get_length()
executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 # device_name="MYRIAD" for Intel NCS2
output_layer = next(iter(executable_network.outputs)) executable_network = ie.compile_model(network, device_name="CPU")
stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata stride, names = self._load_metadata(
Path(w).with_suffix('.yaml')) # load metadata
elif engine: # TensorRT elif engine: # TensorRT
LOGGER.info(f'Loading {w} for TensorRT inference...') LOGGER.info(f'Loading {w} for TensorRT inference...')
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 # require tensorrt>=7.0.0
check_version(trt.__version__, '7.0.0', hard=True)
if device.type == 'cpu': if device.type == 'cpu':
device = torch.device('cuda:0') device = torch.device('cuda:0')
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) Binding = namedtuple(
'Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO) logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime: with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read()) model = runtime.deserialize_cuda_engine(f.read())
context = model.create_execution_context() context = model.create_execution_context()
bindings = OrderedDict() bindings = OrderedDict()
output_names = []
fp16 = False # default updated below fp16 = False # default updated below
dynamic = False dynamic = False
for index in range(model.num_bindings): for i in range(model.num_bindings):
name = model.get_binding_name(index) name = model.get_binding_name(i)
dtype = trt.nptype(model.get_binding_dtype(index)) dtype = trt.nptype(model.get_binding_dtype(i))
if model.binding_is_input(index): if model.binding_is_input(i):
if -1 in tuple(model.get_binding_shape(index)): # dynamic if -1 in tuple(model.get_binding_shape(i)): # dynamic
dynamic = True dynamic = True
context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2])) context.set_binding_shape(
i, tuple(model.get_profile_shape(0, i)[2]))
if dtype == np.float16: if dtype == np.float16:
fp16 = True fp16 = True
shape = tuple(context.get_binding_shape(index)) else: # output
output_names.append(name)
shape = tuple(context.get_binding_shape(i))
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) bindings[name] = Binding(
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) name, dtype, shape, im, int(im.data_ptr()))
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size binding_addrs = OrderedDict((n, d.ptr)
for n, d in bindings.items())
# if dynamic, this is instead max batch size
batch_size = bindings['images'].shape[0]
elif coreml: # CoreML elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...') LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct import coremltools as ct
model = ct.models.MLModel(w) model = ct.models.MLModel(w)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) elif saved_model: # TF SavedModel
if saved_model: # SavedModel
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
import tensorflow as tf import tensorflow as tf
keras = False # assume TF1 saved_model keras = False # assume TF1 saved_model
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) model = tf.keras.models.load_model(
w) if keras else tf.saved_model.load(w)
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs): def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped x = tf.compat.v1.wrap_function(
lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
ge = x.graph.as_graph_element ge = x.graph.as_graph_element
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
gd = tf.Graph().as_graph_def() # graph_def def gd_outputs(gd):
name_list, input_list = [], []
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
name_list.append(node.name)
input_list.extend(node.input)
return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
gd = tf.Graph().as_graph_def() # TF GraphDef
with open(w, 'rb') as f: with open(w, 'rb') as f:
gd.ParseFromString(f.read()) gd.ParseFromString(f.read())
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0") frozen_func = wrap_frozen_graph(
gd, inputs="x:0", outputs=gd_outputs(gd))
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
from tflite_runtime.interpreter import Interpreter, load_delegate from tflite_runtime.interpreter import Interpreter, load_delegate
except ImportError: except ImportError:
import tensorflow as tf import tensorflow as tf
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') LOGGER.info(
f'Loading {w} for TensorFlow Lite Edge TPU inference...')
delegate = { delegate = {
'Linux': 'libedgetpu.so.1', 'Linux': 'libedgetpu.so.1',
'Darwin': 'libedgetpu.1.dylib', 'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()] 'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) interpreter = Interpreter(model_path=w, experimental_delegates=[
else: # Lite load_delegate(delegate)])
else: # TFLite
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
interpreter = Interpreter(model_path=w) # load TFLite model interpreter = Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs output_details = interpreter.get_output_details() # outputs
elif tfjs: elif tfjs: # TF.js
raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported') raise NotImplementedError(
'ERROR: YOLOv5 TF.js inference is not supported')
elif paddle: # PaddlePaddle
LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
import paddle.inference as pdi
if not Path(w).is_file(): # if not *.pdmodel
# get *.xml file from *_openvino_model dir
w = next(Path(w).rglob('*.pdmodel'))
weights = Path(w).with_suffix('.pdiparams')
config = pdi.Config(str(w), str(weights))
if cuda:
config.enable_use_gpu(
memory_pool_init_size_mb=2048, device_id=0)
predictor = pdi.create_predictor(config)
input_handle = predictor.get_input_handle(
predictor.get_input_names()[0])
output_names = predictor.get_output_names()
elif triton: # NVIDIA Triton Inference Server
LOGGER.info(f'Using {w} as Triton Inference Server...')
check_requirements('tritonclient[all]')
from utils.triton import TritonRemoteModel
model = TritonRemoteModel(url=w)
nhwc = model.runtime.startswith("tensorflow")
else: else:
raise NotImplementedError(f'ERROR: {w} is not a supported format') raise NotImplementedError(f'ERROR: {w} is not a supported format')
# class names # class names
if 'names' not in locals(): if 'names' not in locals():
names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)} names = yaml_load(data)['names'] if data else {
i: f'class{i}' for i in range(999)}
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names # human-readable names
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']
self.__dict__.update(locals()) # assign all variables to self self.__dict__.update(locals()) # assign all variables to self
@ -462,9 +562,13 @@ class DetectMultiBackend(nn.Module):
b, ch, h, w = im.shape # batch, channel, height, width b, ch, h, w = im.shape # batch, channel, height, width
if self.fp16 and im.dtype != torch.float16: if self.fp16 and im.dtype != torch.float16:
im = im.half() # to FP16 im = im.half() # to FP16
if self.nhwc:
# torch BCHW to numpy BHWC shape(1,320,192,3)
im = im.permute(0, 2, 3, 1)
if self.pt: # PyTorch if self.pt: # PyTorch
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) y = self.model(
im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
elif self.jit: # TorchScript elif self.jit: # TorchScript
y = self.model(im) y = self.model(im)
elif self.dnn: # ONNX OpenCV DNN elif self.dnn: # ONNX OpenCV DNN
@ -473,52 +577,77 @@ class DetectMultiBackend(nn.Module):
y = self.net.forward() y = self.net.forward()
elif self.onnx: # ONNX Runtime elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy im = im.cpu().numpy() # torch to numpy
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) y = self.session.run(self.output_names, {
self.session.get_inputs()[0].name: im})
elif self.xml: # OpenVINO elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32 im = im.cpu().numpy() # FP32
y = self.executable_network([im])[self.output_layer] y = list(self.executable_network([im]).values())
elif self.engine: # TensorRT elif self.engine: # TensorRT
if self.dynamic and im.shape != self.bindings['images'].shape: if self.dynamic and im.shape != self.bindings['images'].shape:
i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output')) i = self.model.get_binding_index('images')
self.context.set_binding_shape(i_in, im.shape) # reshape if dynamic self.context.set_binding_shape(
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) i, im.shape) # reshape if dynamic
self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out))) self.bindings['images'] = self.bindings['images']._replace(
shape=im.shape)
for name in self.output_names:
i = self.model.get_binding_index(name)
self.bindings[name].data.resize_(
tuple(self.context.get_binding_shape(i)))
s = self.bindings['images'].shape s = self.bindings['images'].shape
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs['images'] = int(im.data_ptr()) self.binding_addrs['images'] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values())) self.context.execute_v2(list(self.binding_addrs.values()))
y = self.bindings['output'].data y = [self.bindings[x].data for x in sorted(self.output_names)]
elif self.coreml: # CoreML elif self.coreml: # CoreML
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) im = im.cpu().numpy()
im = Image.fromarray((im[0] * 255).astype('uint8')) im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.ANTIALIAS) # im = im.resize((192, 320), Image.ANTIALIAS)
y = self.model.predict({'image': im}) # coordinates are xywh normalized # coordinates are xywh normalized
y = self.model.predict({'image': im})
if 'confidence' in y: if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels box = xywh2xyxy(y['coordinates'] *
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) [[w, h, w, h]]) # xyxy pixels
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) conf, cls = y['confidence'].max(
1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate(
(box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else: else:
k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key # reversed for segmentation models (pred, proto)
y = y[k] # output y = list(reversed(y.values()))
elif self.paddle: # PaddlePaddle
im = im.cpu().numpy().astype(np.float32)
self.input_handle.copy_from_cpu(im)
self.predictor.run()
y = [self.predictor.get_output_handle(
x).copy_to_cpu() for x in self.output_names]
elif self.triton: # NVIDIA Triton Inference Server
y = self.model(im)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) im = im.cpu().numpy()
if self.saved_model: # SavedModel if self.saved_model: # SavedModel
y = (self.model(im, training=False) if self.keras else self.model(im)).numpy() y = self.model(
im, training=False) if self.keras else self.model(im)
elif self.pb: # GraphDef elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im)).numpy() y = self.frozen_func(x=self.tf.constant(im))
else: # Lite or Edge TPU else: # Lite or Edge TPU
input, output = self.input_details[0], self.output_details[0] input = self.input_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model # is TFLite quantized uint8 model
int8 = input['dtype'] == np.uint8
if int8: if int8:
scale, zero_point = input['quantization'] scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im) self.interpreter.set_tensor(input['index'], im)
self.interpreter.invoke() self.interpreter.invoke()
y = self.interpreter.get_tensor(output['index']) y = []
for output in self.output_details:
x = self.interpreter.get_tensor(output['index'])
if int8: if int8:
scale, zero_point = output['quantization'] scale, zero_point = output['quantization']
y = (y.astype(np.float32) - zero_point) * scale # re-scale x = (x.astype(np.float32) - zero_point) * \
y[..., :4] *= [w, h, w, h] # xywh normalized to pixels scale # re-scale
y.append(x)
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
if isinstance(y, (list, tuple)): if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
@ -530,23 +659,27 @@ class DetectMultiBackend(nn.Module):
def warmup(self, imgsz=(1, 3, 640, 640)): def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once # Warmup model by running inference once
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
if any(warmup_types) and self.device.type != 'cpu': if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input im = torch.empty(
*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
for _ in range(2 if self.jit else 1): # for _ in range(2 if self.jit else 1): #
self.forward(im) # warmup self.forward(im) # warmup
@staticmethod @staticmethod
def _model_type(p='path/to/model.pt'): def _model_type(p='path/to/model.pt'):
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
from yolov5.export import export_formats # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes from utils.downloads import is_url
check_suffix(p, suffixes) # checks sf = list(export_formats().Suffix) # export suffixes
p = Path(p).name # eliminate trailing separators if not is_url(p, check=False):
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes) check_suffix(p, sf) # checks
xml |= xml2 # *_openvino_model or *.xml url = urlparse(p) # if url may be Triton inference server
tflite &= not edgetpu # *.tflite types = [s in Path(p).name for s in sf]
return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs types[8] &= not types[9] # tflite &= not edgetpu
triton = not any(types) and all(
[any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
return types + [triton]
@staticmethod @staticmethod
def _load_metadata(f=Path('path/to/meta.yaml')): def _load_metadata(f=Path('path/to/meta.yaml')):
@ -563,7 +696,8 @@ class AutoShape(nn.Module):
iou = 0.45 # NMS IoU threshold iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic agnostic = False # NMS class-agnostic
multi_label = False # NMS multiple labels per box multi_label = False # NMS multiple labels per box
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
classes = None
max_det = 1000 # maximum number of detections per image max_det = 1000 # maximum number of detections per image
amp = False # Automatic Mixed Precision (AMP) inference amp = False # Automatic Mixed Precision (AMP) inference
@ -571,19 +705,24 @@ class AutoShape(nn.Module):
super().__init__() super().__init__()
if verbose: if verbose:
LOGGER.info('Adding AutoShape... ') LOGGER.info('Adding AutoShape... ')
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names',
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance 'stride', 'abc'), exclude=()) # copy attributes
# DetectMultiBackend() instance
self.dmb = isinstance(model, DetectMultiBackend)
self.pt = not self.dmb or model.pt # PyTorch model self.pt = not self.dmb or model.pt # PyTorch model
self.model = model.eval() self.model = model.eval()
if self.pt: if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() # Detect()
m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
m.inplace = False # Detect.inplace=False for safe multithread inference m.inplace = False # Detect.inplace=False for safe multithread inference
m.export = True # do not output loss values
def _apply(self, fn): def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn) self = super()._apply(fn)
if self.pt: if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() # Detect()
m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
m.stride = fn(m.stride) m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid)) m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list): if isinstance(m.anchor_grid, list):
@ -605,40 +744,52 @@ class AutoShape(nn.Module):
with dt[0]: with dt[0]:
if isinstance(size, int): # expand if isinstance(size, int): # expand
size = (size, size) size = (size, size)
p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param p = next(self.model.parameters()) if self.pt else torch.empty(
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference 1, device=self.model.device) # param
# Automatic Mixed Precision (AMP) inference
autocast = self.amp and (p.device.type != 'cpu')
if isinstance(ims, torch.Tensor): # torch if isinstance(ims, torch.Tensor): # torch
with amp.autocast(autocast): with amp.autocast(autocast):
return self.model(ims.to(p.device).type_as(p), augment, profile) # inference # inference
return self.model(ims.to(p.device).type_as(p), augment=augment)
# Pre-process # Pre-process
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images n, ims = (len(ims), list(ims)) if isinstance(
ims, (list, tuple)) else (1, [ims]) # number, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(ims): for i, im in enumerate(ims):
f = f'image{i}' # filename f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im im, f = Image.open(requests.get(im, stream=True).raw if str(
im).startswith('http') else im), im
im = np.asarray(exif_transpose(im)) im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f im, f = np.asarray(exif_transpose(im)), getattr(
im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name) files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input im = im.transpose((1, 2, 0))
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(
im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
s = im.shape[:2] # HWC s = im.shape[:2] # HWC
shape0.append(s) # image shape shape0.append(s) # image shape
g = max(size) / max(s) # gain g = max(size) / max(s) # gain
shape1.append([y * g for y in s]) shape1.append([y * g for y in s])
ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update ims[i] = im if im.data.contiguous else np.ascontiguousarray(
shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape im) # update
shape1 = [make_divisible(x, self.stride) for x in np.array(
shape1).max(0)] if self.pt else size # inf shape
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW x = np.ascontiguousarray(np.array(x).transpose(
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 (0, 3, 1, 2))) # stack and BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(
p) / 255 # uint8 to fp16/32
with amp.autocast(autocast): with amp.autocast(autocast):
# Inference # Inference
with dt[1]: with dt[1]:
y = self.model(x, augment, profile) # forward y = self.model(x, augment=augment) # forward
# Post-process # Post-process
with dt[2]: with dt[2]:
@ -650,7 +801,7 @@ class AutoShape(nn.Module):
self.multi_label, self.multi_label,
max_det=self.max_det) # NMS max_det=self.max_det) # NMS
for i in range(n): for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i]) scale_boxes(shape1, y[i][:, :4], shape0[i])
return Detections(ims, y, files, dt, self.names, x.shape) return Detections(ims, y, files, dt, self.names, x.shape)
@ -660,7 +811,8 @@ class Detections:
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
super().__init__() super().__init__()
d = pred[0].device # device d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d)
for im in ims] # normalizations
self.ims = ims # list of images as numpy arrays self.ims = ims # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names self.names = names # class names
@ -672,22 +824,28 @@ class Detections:
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size) self.n = len(self.pred) # number of images (batch size)
self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms) self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
self.s = shape # inference BCHW shape self.s = tuple(shape) # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
crops = [] s, crops = '', []
for i, (im, pred) in enumerate(zip(self.ims, self.pred)): for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string # string
s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
if pred.shape[0]: if pred.shape[0]:
for c in pred[:, -1].unique(): for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string # add to string
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
s = s.rstrip(', ')
if show or save or render or crop: if show or save or render or crop:
annotator = Annotator(im, example=str(self.names)) annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class # xyxy, confidence, class
for *box, conf, cls in reversed(pred):
label = f'{self.names[int(cls)]} {conf:.2f}' label = f'{self.names[int(cls)]} {conf:.2f}'
if crop: if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None file = save_dir / 'crops' / \
self.names[int(cls)] / \
self.files[i] if save else None
crops.append({ crops.append({
'box': box, 'box': box,
'conf': conf, 'conf': conf,
@ -695,45 +853,48 @@ class Detections:
'label': label, 'label': label,
'im': save_one_box(box, im, file=file, save=save)}) 'im': save_one_box(box, im, file=file, save=save)})
else: # all others else: # all others
annotator.box_label(box, label if labels else '', color=colors(cls)) annotator.box_label(
box, label if labels else '', color=colors(cls))
im = annotator.im im = annotator.im
else: else:
s += '(no detections)' s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np im = Image.fromarray(im.astype(np.uint8)) if isinstance(
if pprint: im, np.ndarray) else im # from np
print(s.rstrip(', '))
if show: if show:
im.show(self.files[i]) # show im.show(self.files[i]) # show
if save: if save:
f = self.files[i] f = self.files[i]
im.save(save_dir / f) # save im.save(save_dir / f) # save
if i == self.n - 1: if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") LOGGER.info(
f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render: if render:
self.ims[i] = np.asarray(im) self.ims[i] = np.asarray(im)
if pprint:
s = s.lstrip('\n')
return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
if crop: if crop:
if save: if save:
LOGGER.info(f'Saved results to {save_dir}\n') LOGGER.info(f'Saved results to {save_dir}\n')
return crops return crops
def print(self):
self.display(pprint=True) # print results
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
def show(self, labels=True): def show(self, labels=True):
self.display(show=True, labels=labels) # show results self._run(show=True, labels=labels) # show results
def save(self, labels=True, save_dir='runs/detect/exp'): def save(self, labels=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir save_dir = increment_path(
self.display(save=True, labels=labels, save_dir=save_dir) # save results save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self._run(save=True, labels=labels, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp'): def crop(self, save=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None save_dir = increment_path(
return self.display(crop=True, save=save, save_dir=save_dir) # crop results save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
# crop results
return self._run(crop=True, save=save, save_dir=save_dir)
def render(self, labels=True): def render(self, labels=True):
self.display(render=True, labels=labels) # render results self._run(render=True, labels=labels) # render results
return self.ims return self.ims
def pandas(self): def pandas(self):
@ -742,30 +903,51 @@ class Detections:
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update a = [[x[:5] + [int(x[5]), self.names[int(x[5])]]
for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new return new
def tolist(self): def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():' # return a list of Detections objects, i.e. 'for result in results.tolist():'
r = range(self.n) # iterable r = range(self.n) # iterable
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] x = [Detections([self.ims[i]], [self.pred[i]], [
self.files[i]], self.times, self.names, self.s) for i in r]
# for d in x: # for d in x:
# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
# setattr(d, k, getattr(d, k)[0]) # pop out of list # setattr(d, k, getattr(d, k)[0]) # pop out of list
return x return x
def __len__(self): def print(self):
return self.n # override len(results) LOGGER.info(self.__str__())
def __str__(self): def __len__(self): # override len(results)
self.print() # override print(results) return self.n
return ''
def __str__(self): # override print(results)
return self._run(pprint=True) # print results
def __repr__(self):
return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
class Proto(nn.Module):
# YOLOv5 mask Proto module for segmentation models
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
super().__init__()
self.cv1 = Conv(c1, c_, k=3)
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.cv2 = Conv(c_, c_, k=3)
self.cv3 = Conv(c_, c2)
def forward(self, x):
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
class Classify(nn.Module): class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2) # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups # ch_in, ch_out, kernel, stride, padding, groups
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
super().__init__() super().__init__()
c_ = 1280 # efficientnet_b0 size c_ = 1280 # efficientnet_b0 size
self.conv = Conv(c1, c_, k, s, autopad(k, p), g) self.conv = Conv(c1, c_, k, s, autopad(k, p), g)

View File

@ -0,0 +1,49 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.5 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -30,7 +30,7 @@ from tensorflow import keras
from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
DWConvTranspose2d, Focus, autopad) DWConvTranspose2d, Focus, autopad)
from models.experimental import MixConv2d, attempt_load from models.experimental import MixConv2d, attempt_load
from models.yolo import Detect from models.yolo import Detect, Segment
from utils.activations import SiLU from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args from utils.general import LOGGER, make_divisible, print_args
@ -299,18 +299,18 @@ class TFDetect(keras.layers.Layer):
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no]) x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference if not self.training: # inference
y = tf.sigmoid(x[i]) y = x[i]
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5 grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4 anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
xy = (y[..., 0:2] * 2 + grid) * self.stride[i] # xy xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
wh = y[..., 2:4] ** 2 * anchor_grid wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error # Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, y[..., 4:]], -1) y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x) return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
@staticmethod @staticmethod
def _make_grid(nx=20, ny=20): def _make_grid(nx=20, ny=20):
@ -320,6 +320,37 @@ class TFDetect(keras.layers.Layer):
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFSegment(TFDetect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
super().__init__(nc, anchors, ch, imgsz, w)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
self.detect = TFDetect.call
def call(self, x):
p = self.proto(x[0])
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p)
class TFProto(keras.layers.Layer):
def __init__(self, c1, c_=256, c2=32, w=None):
super().__init__()
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
self.cv3 = TFConv(c_, c2, w=w.cv3)
def call(self, inputs):
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
class TFUpsample(keras.layers.Layer): class TFUpsample(keras.layers.Layer):
# TF version of torch.nn.Upsample() # TF version of torch.nn.Upsample()
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
@ -377,10 +408,12 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
args = [ch[f]] args = [ch[f]]
elif m is Concat: elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
elif m is Detect: elif m in [Detect, Segment]:
args.append([ch[x + 1] for x in f]) args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f) args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
args.append(imgsz) args.append(imgsz)
else: else:
c2 = ch[f] c2 = ch[f]
@ -452,9 +485,9 @@ class TFModel:
iou_thres, iou_thres,
conf_thres, conf_thres,
clip_boxes=False) clip_boxes=False)
return nms, x[1] return (nms,)
return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...] return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0][0] # [x(1,6300,85), ...] to x(6300,85) # x = x[0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes # xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences # conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes

View File

@ -36,6 +36,7 @@ except ImportError:
class Detect(nn.Module): class Detect(nn.Module):
# YOLOv5 Detect head for detection models
stride = None # strides computed during build stride = None # strides computed during build
dynamic = False # force grid reconstruction dynamic = False # force grid reconstruction
export = False # export mode export = False # export mode
@ -46,8 +47,8 @@ class Detect(nn.Module):
self.no = nc + 5 # number of outputs per anchor self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.empty(1)] * self.nl # init grid self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(1)] * self.nl # init anchor grid self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use inplace ops (e.g. slice assignment) self.inplace = inplace # use inplace ops (e.g. slice assignment)
@ -63,16 +64,17 @@ class Detect(nn.Module):
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
y = x[i].sigmoid() if isinstance(self, Segment): # (boxes + masks)
if self.inplace: xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4) y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, -1, self.no)) z.append(y.view(bs, self.na * nx * ny, self.no))
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
@ -87,6 +89,23 @@ class Detect(nn.Module):
return grid, anchor_grid return grid, anchor_grid
class Segment(Detect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) # protos
self.detect = Detect.forward
def forward(self, x):
p = self.proto(x[0])
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
class BaseModel(nn.Module): class BaseModel(nn.Module):
# YOLOv5 base model # YOLOv5 base model
def forward(self, x, profile=False, visualize=False): def forward(self, x, profile=False, visualize=False):
@ -135,7 +154,7 @@ class BaseModel(nn.Module):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn) self = super()._apply(fn)
m = self.model[-1] # Detect() m = self.model[-1] # Detect()
if isinstance(m, Detect): if isinstance(m, (Detect, Segment)):
m.stride = fn(m.stride) m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid)) m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list): if isinstance(m.anchor_grid, list):
@ -169,11 +188,12 @@ class DetectionModel(BaseModel):
# Build strides, anchors # Build strides, anchors
m = self.model[-1] # Detect() m = self.model[-1] # Detect()
if isinstance(m, Detect): if isinstance(m, (Detect, Segment)):
s = 256 # 2x min stride s = 256 # 2x min stride
m.inplace = self.inplace m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))]) # forward forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
check_anchor_order(m) # must be in pixel-space (not grid-space) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m)
m.anchors /= m.stride.view(-1, 1, 1) m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride self.stride = m.stride
self._initialize_biases() # only run once self._initialize_biases() # only run once
@ -235,15 +255,21 @@ class DetectionModel(BaseModel):
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85) b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
class SegmentationModel(DetectionModel):
# YOLOv5 segmentation model
def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel): class ClassificationModel(BaseModel):
# YOLOv5 classification model # YOLOv5 classification model
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
@ -271,8 +297,12 @@ class ClassificationModel(BaseModel):
def parse_model(d, ch): # model_dict, input_channels(3) def parse_model(d, ch): # model_dict, input_channels(3)
# Parse a YOLOv5 model.yaml dictionary
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5) no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
@ -284,24 +314,28 @@ def parse_model(d, ch): # model_dict, input_channels(3)
args[j] = eval(a) if isinstance(a, str) else a # eval strings args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, if m in {
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x): Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
c1, c2 = ch[f], args[0] c1, c2 = ch[f], args[0]
if c2 != no: # if not output if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8) c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]] args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]: if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats args.insert(2, n) # number of repeats
n = 1 n = 1
elif m is nn.BatchNorm2d: elif m is nn.BatchNorm2d:
args = [ch[f]] args = [ch[f]]
elif m is Concat: elif m is Concat:
c2 = sum(ch[x] for x in f) c2 = sum(ch[x] for x in f)
elif m is Detect: # TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f]) args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f) args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
elif m is Contract: elif m is Contract:
c2 = ch[f] * args[0] ** 2 c2 = ch[f] * args[0] ** 2
elif m is Expand: elif m is Expand:

View File

@ -4,9 +4,15 @@ utils/initialization
""" """
import contextlib import contextlib
import platform
import threading import threading
def emojis(str=''):
# Return platform-dependent emoji-safe version of string
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
class TryExcept(contextlib.ContextDecorator): class TryExcept(contextlib.ContextDecorator):
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
def __init__(self, msg=''): def __init__(self, msg=''):
@ -17,7 +23,7 @@ class TryExcept(contextlib.ContextDecorator):
def __exit__(self, exc_type, value, traceback): def __exit__(self, exc_type, value, traceback):
if value: if value:
print(f'{self.msg}{value}') print(emojis(f'{self.msg}{value}'))
return True return True
@ -38,7 +44,7 @@ def notebook_init(verbose=True):
import os import os
import shutil import shutil
from utils.general import check_font, check_requirements, emojis, is_colab from utils.general import check_font, check_requirements, is_colab
from utils.torch_utils import select_device # imports from utils.torch_utils import select_device # imports
check_requirements(('psutil', 'IPython')) check_requirements(('psutil', 'IPython'))

View File

@ -12,7 +12,7 @@ import torch
import torchvision.transforms as T import torchvision.transforms as T
import torchvision.transforms.functional as TF import torchvision.transforms.functional as TF
from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
from utils.metrics import bbox_ioa from utils.metrics import bbox_ioa
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
@ -21,7 +21,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
class Albumentations: class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed) # YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self): def __init__(self, size=640):
self.transform = None self.transform = None
prefix = colorstr('albumentations: ') prefix = colorstr('albumentations: ')
try: try:
@ -29,6 +29,7 @@ class Albumentations:
check_version(A.__version__, '1.0.3', hard=True) # version requirement check_version(A.__version__, '1.0.3', hard=True) # version requirement
T = [ T = [
A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
A.Blur(p=0.01), A.Blur(p=0.01),
A.MedianBlur(p=0.01), A.MedianBlur(p=0.01),
A.ToGray(p=0.01), A.ToGray(p=0.01),
@ -281,7 +282,7 @@ def cutout(im, labels, p=0.5):
# return unobscured labels # return unobscured labels
if len(labels) and s > 0.03: if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h)) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels return labels
@ -303,9 +304,11 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
def classify_albumentations(augment=True, def classify_albumentations(
augment=True,
size=224, size=224,
scale=(0.08, 1.0), scale=(0.08, 1.0),
ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
hflip=0.5, hflip=0.5,
vflip=0.0, vflip=0.0,
jitter=0.4, jitter=0.4,
@ -319,7 +322,7 @@ def classify_albumentations(augment=True,
from albumentations.pytorch import ToTensorV2 from albumentations.pytorch import ToTensorV2
check_version(A.__version__, '1.0.3', hard=True) # version requirement check_version(A.__version__, '1.0.3', hard=True) # version requirement
if augment: # Resize and crop if augment: # Resize and crop
T = [A.RandomResizedCrop(height=size, width=size, scale=scale)] T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
if auto_aug: if auto_aug:
# TODO: implement AugMix, AutoAug & RandAug in albumentation # TODO: implement AugMix, AutoAug & RandAug in albumentation
LOGGER.info(f'{prefix}auto augmentations are currently not supported') LOGGER.info(f'{prefix}auto augmentations are currently not supported')
@ -338,7 +341,7 @@ def classify_albumentations(augment=True,
return A.Compose(T) return A.Compose(T)
except ImportError: # package not installed, skip except ImportError: # package not installed, skip
pass LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
except Exception as e: except Exception as e:
LOGGER.info(f'{prefix}{e}') LOGGER.info(f'{prefix}{e}')

View File

@ -122,7 +122,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
# Filter # Filter
i = (wh0 < 3.0).any(1).sum() i = (wh0 < 3.0).any(1).sum()
if i: if i:
LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size') LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
@ -134,7 +134,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
k = kmeans(wh / s, n, iter=30)[0] * s # points k = kmeans(wh / s, n, iter=30)[0] * s # points
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
except Exception: except Exception:
LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init') LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False) k = print_results(k, verbose=False)

View File

@ -19,7 +19,7 @@ def check_train_batch_size(model, imgsz=640, amp=True):
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
# Automatically estimate best batch size to use `fraction` of available CUDA memory # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
# Usage: # Usage:
# import torch # import torch
# from utils.autobatch import autobatch # from utils.autobatch import autobatch
@ -33,6 +33,9 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
if device.type == 'cpu': if device.type == 'cpu':
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size return batch_size
if torch.backends.cudnn.benchmark:
LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
return batch_size
# Inspect CUDA memory # Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3) gb = 1 << 30 # bytes to GiB (1024 ** 3)
@ -62,8 +65,8 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
b = batch_sizes[max(i - 1, 0)] # select prior safe point b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range if b < 1 or b > 1024: # b outside of safe range
b = batch_size b = batch_size
LOGGER.warning(f'{prefix}WARNING: ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
fraction = np.polyval(p, b) / t # actual fraction predicted fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
return b return b

View File

@ -40,6 +40,7 @@ IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp',
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv('RANK', -1))
PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
# Get orientation exif tag # Get orientation exif tag
@ -116,7 +117,7 @@ def create_dataloader(path,
prefix='', prefix='',
shuffle=False): shuffle=False):
if rect and shuffle: if rect and shuffle:
LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False') LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
shuffle = False shuffle = False
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = LoadImagesAndLabels( dataset = LoadImagesAndLabels(
@ -139,7 +140,7 @@ def create_dataloader(path,
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
generator = torch.Generator() generator = torch.Generator()
generator.manual_seed(0) generator.manual_seed(6148914691236517205 + RANK)
return loader(dataset, return loader(dataset,
batch_size=batch_size, batch_size=batch_size,
shuffle=shuffle and sampler is None, shuffle=shuffle and sampler is None,
@ -185,6 +186,55 @@ class _RepeatSampler:
yield from iter(self.sampler) yield from iter(self.sampler)
class LoadScreenshots:
# YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
# source = [screen_number left top width height] (pixels)
check_requirements('mss')
import mss
source, *params = source.split()
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
if len(params) == 1:
self.screen = int(params[0])
elif len(params) == 4:
left, top, width, height = (int(x) for x in params)
elif len(params) == 5:
self.screen, left, top, width, height = (int(x) for x in params)
self.img_size = img_size
self.stride = stride
self.transforms = transforms
self.auto = auto
self.mode = 'stream'
self.frame = 0
self.sct = mss.mss()
# Parse monitor shape
monitor = self.sct.monitors[self.screen]
self.top = monitor["top"] if top is None else (monitor["top"] + top)
self.left = monitor["left"] if left is None else (monitor["left"] + left)
self.width = width or monitor["width"]
self.height = height or monitor["height"]
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
def __iter__(self):
return self
def __next__(self):
# mss screen capture: get raw pixels from the screen as np array
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
if self.transforms:
im = self.transforms(im0) # transforms
else:
im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im) # contiguous
self.frame += 1
return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s
class LoadImages: class LoadImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
@ -232,8 +282,9 @@ class LoadImages:
if self.video_flag[self.count]: if self.video_flag[self.count]:
# Read video # Read video
self.mode = 'video' self.mode = 'video'
ret_val, im0 = self.cap.read() for _ in range(self.vid_stride):
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1)) # read at vid_stride self.cap.grab()
ret_val, im0 = self.cap.retrieve()
while not ret_val: while not ret_val:
self.count += 1 self.count += 1
self.cap.release() self.cap.release()
@ -328,7 +379,7 @@ class LoadStreams:
self.auto = auto and self.rect self.auto = auto and self.rect
self.transforms = transforms # optional self.transforms = transforms # optional
if not self.rect: if not self.rect:
LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.') LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream): def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread # Read stream `i` frames in daemon thread
@ -341,7 +392,7 @@ class LoadStreams:
if success: if success:
self.imgs[i] = im self.imgs[i] = im
else: else:
LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.') LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
self.imgs[i] = np.zeros_like(self.imgs[i]) self.imgs[i] = np.zeros_like(self.imgs[i])
cap.open(stream) # re-open stream if signal was lost cap.open(stream) # re-open stream if signal was lost
time.sleep(0.0) # wait time time.sleep(0.0) # wait time
@ -403,7 +454,7 @@ class LoadImagesAndLabels(Dataset):
self.mosaic_border = [-img_size // 2, -img_size // 2] self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride self.stride = stride
self.path = path self.path = path
self.albumentations = Albumentations() if augment else None self.albumentations = Albumentations(size=img_size) if augment else None
try: try:
f = [] # image files f = [] # image files
@ -455,7 +506,7 @@ class LoadImagesAndLabels(Dataset):
self.im_files = list(cache.keys()) # update self.im_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update self.label_files = img2label_paths(cache.keys()) # update
n = len(shapes) # number of images n = len(shapes) # number of images
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index
nb = bi[-1] + 1 # number of batches nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image self.batch = bi # batch index of image
self.n = n self.n = n
@ -484,6 +535,7 @@ class LoadImagesAndLabels(Dataset):
self.im_files = [self.im_files[i] for i in irect] self.im_files = [self.im_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect] self.labels = [self.labels[i] for i in irect]
self.segments = [self.segments[i] for i in irect]
self.shapes = s[irect] # wh self.shapes = s[irect] # wh
ar = ar[irect] ar = ar[irect]
@ -497,7 +549,7 @@ class LoadImagesAndLabels(Dataset):
elif mini > 1: elif mini > 1:
shapes[i] = [1, 1 / mini] shapes[i] = [1, 1 / mini]
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
# Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources) # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
self.ims = [None] * n self.ims = [None] * n
@ -542,7 +594,7 @@ class LoadImagesAndLabels(Dataset):
if msgs: if msgs:
LOGGER.info('\n'.join(msgs)) LOGGER.info('\n'.join(msgs))
if nf == 0: if nf == 0:
LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}') LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
x['hash'] = get_hash(self.label_files + self.im_files) x['hash'] = get_hash(self.label_files + self.im_files)
x['results'] = nf, nm, ne, nc, len(self.im_files) x['results'] = nf, nm, ne, nc, len(self.im_files)
x['msgs'] = msgs # warnings x['msgs'] = msgs # warnings
@ -552,7 +604,7 @@ class LoadImagesAndLabels(Dataset):
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
LOGGER.info(f'{prefix}New cache created: {path}') LOGGER.info(f'{prefix}New cache created: {path}')
except Exception as e: except Exception as e:
LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable
return x return x
def __len__(self): def __len__(self):
@ -867,7 +919,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
b = x[1:] * [w, h, w, h] # box b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square # b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad b[2:] = b[2:] * 1.2 + 3 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h) b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
@ -916,7 +968,7 @@ def verify_image_label(args):
f.seek(-2, 2) f.seek(-2, 2)
if f.read() != b'\xff\xd9': # corrupt JPEG if f.read() != b'\xff\xd9': # corrupt JPEG
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved' msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
# verify labels # verify labels
if os.path.isfile(lb_file): if os.path.isfile(lb_file):
@ -938,7 +990,7 @@ def verify_image_label(args):
lb = lb[i] # remove duplicates lb = lb[i] # remove duplicates
if segments: if segments:
segments = [segments[x] for x in i] segments = [segments[x] for x in i]
msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed' msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
else: else:
ne = 1 # label empty ne = 1 # label empty
lb = np.zeros((0, 5), dtype=np.float32) lb = np.zeros((0, 5), dtype=np.float32)
@ -948,7 +1000,7 @@ def verify_image_label(args):
return im_file, lb, shape, segments, nm, nf, ne, nc, msg return im_file, lb, shape, segments, nm, nf, ne, nc, msg
except Exception as e: except Exception as e:
nc = 1 nc = 1
msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}' msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
return [None, None, None, None, nm, nf, ne, nc, msg] return [None, None, None, None, nm, nf, ne, nc, msg]
@ -1011,7 +1063,7 @@ class HUBDatasetStats():
im = im.resize((int(im.width * r), int(im.height * r))) im = im.resize((int(im.width * r), int(im.height * r)))
im.save(f_new, 'JPEG', quality=50, optimize=True) # save im.save(f_new, 'JPEG', quality=50, optimize=True) # save
except Exception as e: # use OpenCV except Exception as e: # use OpenCV
print(f'WARNING: HUB ops PIL failure {f}: {e}') LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
im = cv2.imread(f) im = cv2.imread(f)
im_height, im_width = im.shape[:2] im_height, im_width = im.shape[:2]
r = max_dim / max(im_height, im_width) # ratio r = max_dim / max(im_height, im_width) # ratio
@ -1118,7 +1170,7 @@ def create_classification_dataloader(path,
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
generator = torch.Generator() generator = torch.Generator()
generator.manual_seed(0) generator.manual_seed(6148914691236517205 + RANK)
return InfiniteDataLoader(dataset, return InfiniteDataLoader(dataset,
batch_size=batch_size, batch_size=batch_size,
shuffle=shuffle and sampler is None, shuffle=shuffle and sampler is None,

View File

@ -3,7 +3,7 @@
# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference # Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
# Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch # Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
FROM nvcr.io/nvidia/pytorch:22.07-py3 FROM nvcr.io/nvidia/pytorch:22.08-py3
RUN rm -rf /opt/pytorch # remove 1.2GB dir RUN rm -rf /opt/pytorch # remove 1.2GB dir
# Downloads to user config dir # Downloads to user config dir

View File

@ -16,13 +16,13 @@ import requests
import torch import torch
def is_url(url, check_online=True): def is_url(url, check=True):
# Check if online file exists # Check if string is URL and check if URL exists
try: try:
url = str(url) url = str(url)
result = urllib.parse.urlparse(url) result = urllib.parse.urlparse(url)
assert all([result.scheme, result.netloc, result.path]) # check if is url assert all([result.scheme, result.netloc, result.path]) # check if is url
return (urllib.request.urlopen(url).getcode() == 200) if check_online else True # check if exists online return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
except (AssertionError, urllib.request.HTTPError): except (AssertionError, urllib.request.HTTPError):
return False return False
@ -87,9 +87,7 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
return file return file
# GitHub assets # GitHub assets
assets = [ assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt',
'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
try: try:
tag, assets = github_assets(repo, release) tag, assets = github_assets(repo, release)
except Exception: except Exception:
@ -107,7 +105,6 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
safe_download( safe_download(
file, file,
url=f'https://github.com/{repo}/releases/download/{tag}/{name}', url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}', # backup url (optional)
min_bytes=1E5, min_bytes=1E5,
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')

View File

@ -17,6 +17,7 @@ import signal
import sys import sys
import time import time
import urllib import urllib
from copy import deepcopy
from datetime import datetime from datetime import datetime
from itertools import repeat from itertools import repeat
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
@ -33,7 +34,7 @@ import torch
import torchvision import torchvision
import yaml import yaml
from utils import TryExcept from utils import TryExcept, emojis
from utils.downloads import gsutil_getsize from utils.downloads import gsutil_getsize
from utils.metrics import box_iou, fitness from utils.metrics import box_iou, fitness
@ -42,8 +43,8 @@ ROOT = FILE.parents[1] # YOLOv5 root directory
RANK = int(os.getenv('RANK', -1)) RANK = int(os.getenv('RANK', -1))
# Settings # Settings
DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory
AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
@ -222,7 +223,7 @@ def init_seeds(seed=0, deterministic=False):
torch.manual_seed(seed) torch.manual_seed(seed)
torch.cuda.manual_seed(seed) torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
torch.backends.cudnn.benchmark = True # for faster training # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
torch.use_deterministic_algorithms(True) torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True torch.backends.cudnn.deterministic = True
@ -247,11 +248,6 @@ def get_latest_run(search_dir='.'):
return max(last_list, key=os.path.getctime) if last_list else '' return max(last_list, key=os.path.getctime) if last_list else ''
def emojis(str=''):
# Return platform-dependent emoji-safe version of string
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
def file_age(path=__file__): def file_age(path=__file__):
# Return days since last file update # Return days since last file update
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
@ -332,7 +328,7 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
# Check version vs. required version # Check version vs. required version
current, minimum = (pkg.parse_version(x) for x in (current, minimum)) current, minimum = (pkg.parse_version(x) for x in (current, minimum))
result = (current == minimum) if pinned else (current >= minimum) # bool result = (current == minimum) if pinned else (current >= minimum) # bool
s = f'WARNING: ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed' # string s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed' # string
if hard: if hard:
assert result, emojis(s) # assert min requirements met assert result, emojis(s) # assert min requirements met
if verbose and not result: if verbose and not result:
@ -341,40 +337,38 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
@TryExcept() @TryExcept()
def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()): def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
# Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages) # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
prefix = colorstr('red', 'bold', 'requirements:') prefix = colorstr('red', 'bold', 'requirements:')
check_python() # check python version check_python() # check python version
if isinstance(requirements, (str, Path)): # requirements.txt file if isinstance(requirements, Path): # requirements.txt file
file = Path(requirements) file = requirements.resolve()
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed." assert file.exists(), f"{prefix} {file} not found, check failed."
with file.open() as f: with file.open() as f:
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
else: # list or tuple of packages elif isinstance(requirements, str):
requirements = [x for x in requirements if x not in exclude] requirements = [requirements]
n = 0 # number of packages updates s = ''
for i, r in enumerate(requirements): n = 0
for r in requirements:
try: try:
pkg.require(r) pkg.require(r)
except Exception: # DistributionNotFound or VersionConflict if requirements not met except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met
s = f"{prefix} {r} not found and is required by YOLOv5" s += f'"{r}" '
if install and AUTOINSTALL: # check environment variable
LOGGER.info(f"{s}, attempting auto-update...")
try:
assert check_online(), f"'pip install {r}' skipped (offline)"
LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
n += 1 n += 1
except Exception as e:
LOGGER.warning(f'{prefix} {e}')
else:
LOGGER.info(f'{s}. Please install and rerun your command.')
if n: # if packages updated if s and install and AUTOINSTALL: # check environment variable
source = file.resolve() if 'file' in locals() else requirements LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
try:
assert check_online(), "AutoUpdate skipped (offline)"
LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
source = file if 'file' in locals() else requirements
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
LOGGER.info(s) LOGGER.info(s)
except Exception as e:
LOGGER.warning(f'{prefix}{e}')
def check_img_size(imgsz, s=32, floor=0): def check_img_size(imgsz, s=32, floor=0):
@ -385,7 +379,7 @@ def check_img_size(imgsz, s=32, floor=0):
imgsz = list(imgsz) # convert to list if tuple imgsz = list(imgsz) # convert to list if tuple
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
if new_size != imgsz: if new_size != imgsz:
LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}') LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
return new_size return new_size
@ -400,7 +394,7 @@ def check_imshow():
cv2.waitKey(1) cv2.waitKey(1)
return True return True
except Exception as e: except Exception as e:
LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
return False return False
@ -470,8 +464,7 @@ def check_dataset(data, autodownload=True):
# Read yaml (optional) # Read yaml (optional)
if isinstance(data, (str, Path)): if isinstance(data, (str, Path)):
with open(data, errors='ignore') as f: data = yaml_load(data) # dictionary
data = yaml.safe_load(f) # dictionary
# Checks # Checks
for k in 'train', 'val', 'names': for k in 'train', 'val', 'names':
@ -486,7 +479,13 @@ def check_dataset(data, autodownload=True):
path = (ROOT / path).resolve() path = (ROOT / path).resolve()
for k in 'train', 'val', 'test': for k in 'train', 'val', 'test':
if data.get(k): # prepend path if data.get(k): # prepend path
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]] if isinstance(data[k], str):
x = (path / data[k]).resolve()
if not x.exists() and data[k].startswith('../'):
x = (path / data[k][3:]).resolve()
data[k] = str(x)
else:
data[k] = [str((path / x).resolve()) for x in data[k]]
# Parse yaml # Parse yaml
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
@ -497,13 +496,12 @@ def check_dataset(data, autodownload=True):
if not s or not autodownload: if not s or not autodownload:
raise Exception('Dataset not found ❌') raise Exception('Dataset not found ❌')
t = time.time() t = time.time()
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
if s.startswith('http') and s.endswith('.zip'): # URL if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename f = Path(s).name # filename
LOGGER.info(f'Downloading {s} to {f}...') LOGGER.info(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
Path(root).mkdir(parents=True, exist_ok=True) # create root Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
ZipFile(f).extractall(path=root) # unzip ZipFile(f).extractall(path=DATASETS_DIR) # unzip
Path(f).unlink() # remove zip Path(f).unlink() # remove zip
r = None # success r = None # success
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
@ -512,7 +510,7 @@ def check_dataset(data, autodownload=True):
else: # python script else: # python script
r = exec(s, {'yaml': data}) # return None r = exec(s, {'yaml': data}) # return None
dt = f'({round(time.time() - t, 1)}s)' dt = f'({round(time.time() - t, 1)}s)'
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt}" s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt}"
LOGGER.info(f"Dataset download {s}") LOGGER.info(f"Dataset download {s}")
check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
return data # dictionary return data # dictionary
@ -537,7 +535,7 @@ def check_amp(model):
f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3)) im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
try: try:
assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im) assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
LOGGER.info(f'{prefix}checks passed ✅') LOGGER.info(f'{prefix}checks passed ✅')
return True return True
except Exception: except Exception:
@ -569,10 +567,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
def download_one(url, dir): def download_one(url, dir):
# Download 1 file # Download 1 file
success = True success = True
f = dir / Path(url).name # filename if Path(url).is_file():
if Path(url).is_file(): # exists in current path f = Path(url) # filename
Path(url).rename(f) # move to dir else: # does not exist
elif not f.exists(): f = dir / Path(url).name
LOGGER.info(f'Downloading {url} to {f}...') LOGGER.info(f'Downloading {url} to {f}...')
for i in range(retry + 1): for i in range(retry + 1):
if curl: if curl:
@ -586,9 +584,9 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
if success: if success:
break break
elif i < retry: elif i < retry:
LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...') LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
else: else:
LOGGER.warning(f'Failed to download {url}...') LOGGER.warning(f'Failed to download {url}...')
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
LOGGER.info(f'Unzipping {f}...') LOGGER.info(f'Unzipping {f}...')
@ -727,7 +725,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
if clip: if clip:
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
@ -771,7 +769,23 @@ def resample_segments(segments, n=1000):
return segments return segments
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
# Rescale boxes (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[:, [0, 2]] -= pad[0] # x padding
boxes[:, [1, 3]] -= pad[1] # y padding
boxes[:, :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape # Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
@ -780,15 +794,15 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
gain = ratio_pad[0][0] gain = ratio_pad[0][0]
pad = ratio_pad[1] pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding segments[:, 0] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding segments[:, 1] -= pad[1] # y padding
coords[:, :4] /= gain segments /= gain
clip_coords(coords, img0_shape) clip_segments(segments, img0_shape)
return coords return segments
def clip_coords(boxes, shape): def clip_boxes(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width) # Clip boxes (xyxy) to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1 boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1 boxes[:, 1].clamp_(0, shape[0]) # y1
@ -799,15 +813,28 @@ def clip_coords(boxes, shape):
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def non_max_suppression(prediction, def clip_segments(boxes, shape):
# Clip segments (xy1,xy2,...) to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x
boxes[:, 1].clamp_(0, shape[0]) # y
else: # np.array (faster grouped)
boxes[:, 0] = boxes[:, 0].clip(0, shape[1]) # x
boxes[:, 1] = boxes[:, 1].clip(0, shape[0]) # y
def non_max_suppression(
prediction,
conf_thres=0.25, conf_thres=0.25,
iou_thres=0.45, iou_thres=0.45,
classes=None, classes=None,
agnostic=False, agnostic=False,
multi_label=False, multi_label=False,
labels=(), labels=(),
max_det=300): max_det=300,
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes nm=0, # number of masks
):
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
Returns: Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls] list of detections, on (n,6) tensor per image [xyxy, conf, cls]
@ -817,7 +844,7 @@ def non_max_suppression(prediction,
prediction = prediction[0] # select only inference output prediction = prediction[0] # select only inference output
bs = prediction.shape[0] # batch size bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - 5 # number of classes nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates xc = prediction[..., 4] > conf_thres # candidates
# Checks # Checks
@ -828,13 +855,14 @@ def non_max_suppression(prediction,
# min_wh = 2 # (pixels) minimum box width and height # min_wh = 2 # (pixels) minimum box width and height
max_wh = 7680 # (pixels) maximum box width and height max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 0.3 + 0.03 * bs # seconds to quit after time_limit = 0.5 + 0.05 * bs # seconds to quit after
redundant = True # require redundant detections redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS merge = False # use merge-NMS
t = time.time() t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * bs mi = 5 + nc # mask start index
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
for xi, x in enumerate(prediction): # image index, image inference for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints # Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
@ -843,7 +871,7 @@ def non_max_suppression(prediction,
# Cat apriori labels if autolabelling # Cat apriori labels if autolabelling
if labels and len(labels[xi]): if labels and len(labels[xi]):
lb = labels[xi] lb = labels[xi]
v = torch.zeros((len(lb), nc + 5), device=x.device) v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
v[:, :4] = lb[:, 1:5] # box v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf v[:, 4] = 1.0 # conf
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
@ -856,16 +884,17 @@ def non_max_suppression(prediction,
# Compute conf # Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2) # Box/Mask
box = xywh2xyxy(x[:, :4]) box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls) # Detections matrix nx6 (xyxy, conf, cls)
if multi_label: if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True) conf, j = x[:, 5:mi].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
# Filter by class # Filter by class
if classes is not None: if classes is not None:
@ -881,6 +910,8 @@ def non_max_suppression(prediction,
continue continue
elif n > max_nms: # excess boxes elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
else:
x = x[x[:, 4].argsort(descending=True)] # sort by confidence
# Batched NMS # Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
@ -898,7 +929,7 @@ def non_max_suppression(prediction,
output[xi] = x[i] output[xi] = x[i]
if (time.time() - t) > time_limit: if (time.time() - t) > time_limit:
LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded break # time limit exceeded
return output return output
@ -975,7 +1006,7 @@ def apply_classifier(x, model, img, im0):
d[:, :4] = xywh2xyxy(b).long() d[:, :4] = xywh2xyxy(b).long()
# Rescale boxes from img_size to im0 size # Rescale boxes from img_size to im0 size
scale_coords(img.shape[2:], d[:, :4], im0[i].shape) scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
# Classes # Classes
pred_cls1 = d[:, 5].long() pred_cls1 = d[:, 5].long()

View File

@ -11,13 +11,13 @@ import pkg_resources as pkg
import torch import torch
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from utils.general import colorstr, cv2 from utils.general import LOGGER, colorstr, cv2
from utils.loggers.clearml.clearml_utils import ClearmlLogger from utils.loggers.clearml.clearml_utils import ClearmlLogger
from utils.loggers.wandb.wandb_utils import WandbLogger from utils.loggers.wandb.wandb_utils import WandbLogger
from utils.plots import plot_images, plot_labels, plot_results from utils.plots import plot_images, plot_labels, plot_results
from utils.torch_utils import de_parallel from utils.torch_utils import de_parallel
LOGGERS = ('csv', 'tb', 'wandb', 'clearml') # *.csv, TensorBoard, Weights & Biases, ClearML LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML
RANK = int(os.getenv('RANK', -1)) RANK = int(os.getenv('RANK', -1))
try: try:
@ -41,6 +41,18 @@ try:
except (ImportError, AssertionError): except (ImportError, AssertionError):
clearml = None clearml = None
try:
if RANK not in [0, -1]:
comet_ml = None
else:
import comet_ml
assert hasattr(comet_ml, '__version__') # verify package import not local dir
from utils.loggers.comet import CometLogger
except (ModuleNotFoundError, ImportError, AssertionError):
comet_ml = None
class Loggers(): class Loggers():
# YOLOv5 Loggers class # YOLOv5 Loggers class
@ -80,7 +92,10 @@ class Loggers():
prefix = colorstr('ClearML: ') prefix = colorstr('ClearML: ')
s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML" s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
self.logger.info(s) self.logger.info(s)
if not comet_ml:
prefix = colorstr('Comet: ')
s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
self.logger.info(s)
# TensorBoard # TensorBoard
s = self.save_dir s = self.save_dir
if 'tb' in self.include and not self.opt.evolve: if 'tb' in self.include and not self.opt.evolve:
@ -107,6 +122,18 @@ class Loggers():
else: else:
self.clearml = None self.clearml = None
# Comet
if comet_ml and 'comet' in self.include:
if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
run_id = self.opt.resume.split("/")[-1]
self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
else:
self.comet_logger = CometLogger(self.opt, self.hyp)
else:
self.comet_logger = None
@property @property
def remote_dataset(self): def remote_dataset(self):
# Get data_dict if custom dataset artifact link is provided # Get data_dict if custom dataset artifact link is provided
@ -115,12 +142,18 @@ class Loggers():
data_dict = self.clearml.data_dict data_dict = self.clearml.data_dict
if self.wandb: if self.wandb:
data_dict = self.wandb.data_dict data_dict = self.wandb.data_dict
if self.comet_logger:
data_dict = self.comet_logger.data_dict
return data_dict return data_dict
def on_train_start(self): def on_train_start(self):
# Callback runs on train start if self.comet_logger:
pass self.comet_logger.on_train_start()
def on_pretrain_routine_start(self):
if self.comet_logger:
self.comet_logger.on_pretrain_routine_start()
def on_pretrain_routine_end(self, labels, names): def on_pretrain_routine_end(self, labels, names):
# Callback runs on pre-train routine end # Callback runs on pre-train routine end
@ -131,8 +164,11 @@ class Loggers():
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]}) self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
# if self.clearml: # if self.clearml:
# pass # ClearML saves these images automatically using hooks # pass # ClearML saves these images automatically using hooks
if self.comet_logger:
self.comet_logger.on_pretrain_routine_end(paths)
def on_train_batch_end(self, model, ni, imgs, targets, paths): def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
log_dict = dict(zip(self.keys[0:3], vals))
# Callback runs on train batch end # Callback runs on train batch end
# ni: number integrated batches (since train start) # ni: number integrated batches (since train start)
if self.plots: if self.plots:
@ -148,11 +184,21 @@ class Loggers():
if self.clearml: if self.clearml:
self.clearml.log_debug_samples(files, title='Mosaics') self.clearml.log_debug_samples(files, title='Mosaics')
if self.comet_logger:
self.comet_logger.on_train_batch_end(log_dict, step=ni)
def on_train_epoch_end(self, epoch): def on_train_epoch_end(self, epoch):
# Callback runs on train epoch end # Callback runs on train epoch end
if self.wandb: if self.wandb:
self.wandb.current_epoch = epoch + 1 self.wandb.current_epoch = epoch + 1
if self.comet_logger:
self.comet_logger.on_train_epoch_end(epoch)
def on_val_start(self):
if self.comet_logger:
self.comet_logger.on_val_start()
def on_val_image_end(self, pred, predn, path, names, im): def on_val_image_end(self, pred, predn, path, names, im):
# Callback runs on val image end # Callback runs on val image end
if self.wandb: if self.wandb:
@ -160,7 +206,11 @@ class Loggers():
if self.clearml: if self.clearml:
self.clearml.log_image_with_boxes(path, pred, names, im) self.clearml.log_image_with_boxes(path, pred, names, im)
def on_val_end(self): def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
if self.comet_logger:
self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
# Callback runs on val end # Callback runs on val end
if self.wandb or self.clearml: if self.wandb or self.clearml:
files = sorted(self.save_dir.glob('val*.jpg')) files = sorted(self.save_dir.glob('val*.jpg'))
@ -169,6 +219,9 @@ class Loggers():
if self.clearml: if self.clearml:
self.clearml.log_debug_samples(files, title='Validation') self.clearml.log_debug_samples(files, title='Validation')
if self.comet_logger:
self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
# Callback runs at the end of each fit (train+val) epoch # Callback runs at the end of each fit (train+val) epoch
x = dict(zip(self.keys, vals)) x = dict(zip(self.keys, vals))
@ -199,6 +252,9 @@ class Loggers():
self.clearml.current_epoch_logged_images = set() # reset epoch image limit self.clearml.current_epoch_logged_images = set() # reset epoch image limit
self.clearml.current_epoch += 1 self.clearml.current_epoch += 1
if self.comet_logger:
self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
# Callback runs on model save event # Callback runs on model save event
if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1: if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
@ -209,6 +265,9 @@ class Loggers():
model_name='Latest Model', model_name='Latest Model',
auto_delete_file=False) auto_delete_file=False)
if self.comet_logger:
self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
def on_train_end(self, last, best, epoch, results): def on_train_end(self, last, best, epoch, results):
# Callback runs on training end, i.e. saving best model # Callback runs on training end, i.e. saving best model
if self.plots: if self.plots:
@ -237,10 +296,16 @@ class Loggers():
name='Best Model', name='Best Model',
auto_delete_file=False) auto_delete_file=False)
if self.comet_logger:
final_results = dict(zip(self.keys[3:10], results))
self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
def on_params_update(self, params: dict): def on_params_update(self, params: dict):
# Update hyperparams or configs of the experiment # Update hyperparams or configs of the experiment
if self.wandb: if self.wandb:
self.wandb.wandb_run.config.update(params, allow_val_change=True) self.wandb.wandb_run.config.update(params, allow_val_change=True)
if self.comet_logger:
self.comet_logger.on_params_update(params)
class GenericLogger: class GenericLogger:
@ -328,7 +393,7 @@ def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
warnings.simplefilter('ignore') # suppress jit trace warning warnings.simplefilter('ignore') # suppress jit trace warning
tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), []) tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
except Exception as e: except Exception as e:
print(f'WARNING: TensorBoard graph visualization failure {e}') LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
def web_project_name(project): def web_project_name(project):

View File

@ -11,6 +11,7 @@ from utils.plots import Annotator, colors
try: try:
import clearml import clearml
from clearml import Dataset, Task from clearml import Dataset, Task
assert hasattr(clearml, '__version__') # verify package import not local dir assert hasattr(clearml, '__version__') # verify package import not local dir
except (ImportError, AssertionError): except (ImportError, AssertionError):
clearml = None clearml = None

View File

@ -0,0 +1,256 @@
<img src="https://cdn.comet.ml/img/notebook_logo.png">
# YOLOv5 with Comet
This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet)
# About Comet
Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)!
Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
# Getting Started
## Install Comet
```shell
pip install comet_ml
```
## Configure Comet Credentials
There are two ways to configure Comet with YOLOv5.
You can either set your credentials through enviroment variables
**Environment Variables**
```shell
export COMET_API_KEY=<Your Comet API Key>
export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
```
Or create a `.comet.config` file in your working directory and set your credentials there.
**Comet Configuration File**
```
[comet]
api_key=<Your Comet API Key>
project_name=<Your Comet Project Name> # This will default to 'yolov5'
```
## Run the Training Script
```shell
# Train YOLOv5s on COCO128 for 5 epochs
python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
```
That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
<img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/7529846/187608607-ff89c3d5-1b8b-4743-a974-9275301b0524.png">
# Try out an Example!
Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
Or better yet, try it out yourself in this Colab Notebook
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
# Log automatically
By default, Comet will log the following items
## Metrics
- Box Loss, Object Loss, Classification Loss for the training and validation data
- mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
- Precision and Recall for the validation data
## Parameters
- Model Hyperparameters
- All parameters passed through the command line options
## Visualizations
- Confusion Matrix of the model predictions on the validation data
- Plots for the PR and F1 curves across all classes
- Correlogram of the Class Labels
# Configure Comet Logging
Comet can be configured to log additional data either through command line flags passed to the training script
or through environment variables.
```shell
export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
```
## Logging Checkpoints with Comet
Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
logged checkpoints to Comet based on the interval value provided by `save-period`
```shell
python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights yolov5s.pt \
--save-period 1
```
## Logging Model Predictions
By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
```shell
python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights yolov5s.pt \
--bbox_interval 2
```
### Controlling the number of Prediction Images logged to Comet
When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
```shell
env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights yolov5s.pt \
--bbox_interval 1
```
### Logging Class Level Metrics
Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
```shell
env COMET_LOG_PER_CLASS_METRICS=true python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights yolov5s.pt
```
## Uploading a Dataset to Comet Artifacts
If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration), you can do so using the `upload_dataset` flag.
The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
```shell
python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data coco128.yaml \
--weights yolov5s.pt \
--upload_dataset
```
You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
<img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
You can preview the data directly in the Comet UI.
<img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
<img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
### Using a saved Artifact
If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
```
# contents of artifact.yaml file
path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
```
Then pass this file to your training script in the following way
```shell
python train.py \
--img 640 \
--batch 16 \
--epochs 5 \
--data artifact.yaml \
--weights yolov5s.pt
```
Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
<img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
## Resuming a Training Run
If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
This will restore the run to its state before the interruption, which includes restoring the model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
```shell
python train.py \
--resume "comet://<your run path>"
```
## Hyperparameter Search with the Comet Optimizer
YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
### Configuring an Optimizer Sweep
To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
```shell
python utils/loggers/comet/hpo.py \
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
```
The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
the script.
```shell
python utils/loggers/comet/hpo.py \
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
--save-period 1 \
--bbox_interval 1
```
### Running a Sweep in Parallel
```shell
comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
utils/loggers/comet/optimizer_config.json"
```
### Visualizing Results
Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
<img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">

View File

@ -0,0 +1,501 @@
import glob
import json
import logging
import os
import sys
from pathlib import Path
logger = logging.getLogger(__name__)
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
try:
import comet_ml
# Project Configuration
config = comet_ml.config.get_config()
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
except (ModuleNotFoundError, ImportError):
comet_ml = None
COMET_PROJECT_NAME = None
import PIL
import torch
import torchvision.transforms as T
import yaml
from utils.dataloaders import img2label_paths
from utils.general import check_dataset, scale_boxes, xywh2xyxy
from utils.metrics import box_iou
COMET_PREFIX = "comet://"
COMET_MODE = os.getenv("COMET_MODE", "online")
# Model Saving Settings
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
# Dataset Artifact Settings
COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
# Evaluation Settings
COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100))
# Confusion Matrix Settings
CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
# Batch Logging Settings
COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1)
COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1)
COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
RANK = int(os.getenv("RANK", -1))
to_pil = T.ToPILImage()
class CometLogger:
"""Log metrics, parameters, source code, models and much more
with Comet
"""
def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
self.job_type = job_type
self.opt = opt
self.hyp = hyp
# Comet Flags
self.comet_mode = COMET_MODE
self.save_model = opt.save_period > -1
self.model_name = COMET_MODEL_NAME
# Batch Logging Settings
self.log_batch_metrics = COMET_LOG_BATCH_METRICS
self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
# Dataset Artifact Settings
self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
self.resume = self.opt.resume
# Default parameters to pass to Experiment objects
self.default_experiment_kwargs = {
"log_code": False,
"log_env_gpu": True,
"log_env_cpu": True,
"project_name": COMET_PROJECT_NAME,}
self.default_experiment_kwargs.update(experiment_kwargs)
self.experiment = self._get_experiment(self.comet_mode, run_id)
self.data_dict = self.check_dataset(self.opt.data)
self.class_names = self.data_dict["names"]
self.num_classes = self.data_dict["nc"]
self.logged_images_count = 0
self.max_images = COMET_MAX_IMAGE_UPLOADS
if run_id is None:
self.experiment.log_other("Created from", "YOLOv5")
if not isinstance(self.experiment, comet_ml.OfflineExperiment):
workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
self.experiment.log_other(
"Run Path",
f"{workspace}/{project_name}/{experiment_id}",
)
self.log_parameters(vars(opt))
self.log_parameters(self.opt.hyp)
self.log_asset_data(
self.opt.hyp,
name="hyperparameters.json",
metadata={"type": "hyp-config-file"},
)
self.log_asset(
f"{self.opt.save_dir}/opt.yaml",
metadata={"type": "opt-config-file"},
)
self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
if hasattr(self.opt, "conf_thres"):
self.conf_thres = self.opt.conf_thres
else:
self.conf_thres = CONF_THRES
if hasattr(self.opt, "iou_thres"):
self.iou_thres = self.opt.iou_thres
else:
self.iou_thres = IOU_THRES
self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
self.comet_log_predictions = COMET_LOG_PREDICTIONS
if self.opt.bbox_interval == -1:
self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
else:
self.comet_log_prediction_interval = self.opt.bbox_interval
if self.comet_log_predictions:
self.metadata_dict = {}
self.logged_image_names = []
self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
self.experiment.log_others({
"comet_mode": COMET_MODE,
"comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
"comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
"comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
"comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
"comet_model_name": COMET_MODEL_NAME,})
# Check if running the Experiment with the Comet Optimizer
if hasattr(self.opt, "comet_optimizer_id"):
self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
def _get_experiment(self, mode, experiment_id=None):
if mode == "offline":
if experiment_id is not None:
return comet_ml.ExistingOfflineExperiment(
previous_experiment=experiment_id,
**self.default_experiment_kwargs,
)
return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
else:
try:
if experiment_id is not None:
return comet_ml.ExistingExperiment(
previous_experiment=experiment_id,
**self.default_experiment_kwargs,
)
return comet_ml.Experiment(**self.default_experiment_kwargs)
except ValueError:
logger.warning("COMET WARNING: "
"Comet credentials have not been set. "
"Comet will default to offline logging. "
"Please set your credentials to enable online logging.")
return self._get_experiment("offline", experiment_id)
return
def log_metrics(self, log_dict, **kwargs):
self.experiment.log_metrics(log_dict, **kwargs)
def log_parameters(self, log_dict, **kwargs):
self.experiment.log_parameters(log_dict, **kwargs)
def log_asset(self, asset_path, **kwargs):
self.experiment.log_asset(asset_path, **kwargs)
def log_asset_data(self, asset, **kwargs):
self.experiment.log_asset_data(asset, **kwargs)
def log_image(self, img, **kwargs):
self.experiment.log_image(img, **kwargs)
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
if not self.save_model:
return
model_metadata = {
"fitness_score": fitness_score[-1],
"epochs_trained": epoch + 1,
"save_period": opt.save_period,
"total_epochs": opt.epochs,}
model_files = glob.glob(f"{path}/*.pt")
for model_path in model_files:
name = Path(model_path).name
self.experiment.log_model(
self.model_name,
file_or_folder=model_path,
file_name=name,
metadata=model_metadata,
overwrite=True,
)
def check_dataset(self, data_file):
with open(data_file) as f:
data_config = yaml.safe_load(f)
if data_config['path'].startswith(COMET_PREFIX):
path = data_config['path'].replace(COMET_PREFIX, "")
data_dict = self.download_dataset_artifact(path)
return data_dict
self.log_asset(self.opt.data, metadata={"type": "data-config-file"})
return check_dataset(data_file)
def log_predictions(self, image, labelsn, path, shape, predn):
if self.logged_images_count >= self.max_images:
return
detections = predn[predn[:, 4] > self.conf_thres]
iou = box_iou(labelsn[:, 1:], detections[:, :4])
mask, _ = torch.where(iou > self.iou_thres)
if len(mask) == 0:
return
filtered_detections = detections[mask]
filtered_labels = labelsn[mask]
image_id = path.split("/")[-1].split(".")[0]
image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
if image_name not in self.logged_image_names:
native_scale_image = PIL.Image.open(path)
self.log_image(native_scale_image, name=image_name)
self.logged_image_names.append(image_name)
metadata = []
for cls, *xyxy in filtered_labels.tolist():
metadata.append({
"label": f"{self.class_names[int(cls)]}-gt",
"score": 100,
"box": {
"x": xyxy[0],
"y": xyxy[1],
"x2": xyxy[2],
"y2": xyxy[3]},})
for *xyxy, conf, cls in filtered_detections.tolist():
metadata.append({
"label": f"{self.class_names[int(cls)]}",
"score": conf * 100,
"box": {
"x": xyxy[0],
"y": xyxy[1],
"x2": xyxy[2],
"y2": xyxy[3]},})
self.metadata_dict[image_name] = metadata
self.logged_images_count += 1
return
def preprocess_prediction(self, image, labels, shape, pred):
nl, _ = labels.shape[0], pred.shape[0]
# Predictions
if self.opt.single_cls:
pred[:, 5] = 0
predn = pred.clone()
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
labelsn = None
if nl:
tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels
labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred
return predn, labelsn
def add_assets_to_artifact(self, artifact, path, asset_path, split):
img_paths = sorted(glob.glob(f"{asset_path}/*"))
label_paths = img2label_paths(img_paths)
for image_file, label_file in zip(img_paths, label_paths):
image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
try:
artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
except ValueError as e:
logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
logger.error(f"COMET ERROR: {e}")
continue
return artifact
def upload_dataset_artifact(self):
dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
path = str((ROOT / Path(self.data_dict["path"])).resolve())
metadata = self.data_dict.copy()
for key in ["train", "val", "test"]:
split_path = metadata.get(key)
if split_path is not None:
metadata[key] = split_path.replace(path, "")
artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
for key in metadata.keys():
if key in ["train", "val", "test"]:
if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
continue
asset_path = self.data_dict.get(key)
if asset_path is not None:
artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
self.experiment.log_artifact(artifact)
return
def download_dataset_artifact(self, artifact_path):
logged_artifact = self.experiment.get_artifact(artifact_path)
artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
logged_artifact.download(artifact_save_dir)
metadata = logged_artifact.metadata
data_dict = metadata.copy()
data_dict["path"] = artifact_save_dir
data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
data_dict = self.update_data_paths(data_dict)
return data_dict
def update_data_paths(self, data_dict):
path = data_dict.get("path", "")
for split in ["train", "val", "test"]:
if data_dict.get(split):
split_path = data_dict.get(split)
data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
f"{path}/{x}" for x in split_path])
return data_dict
def on_pretrain_routine_end(self, paths):
if self.opt.resume:
return
for path in paths:
self.log_asset(str(path))
if self.upload_dataset:
if not self.resume:
self.upload_dataset_artifact()
return
def on_train_start(self):
self.log_parameters(self.hyp)
def on_train_epoch_start(self):
return
def on_train_epoch_end(self, epoch):
self.experiment.curr_epoch = epoch
return
def on_train_batch_start(self):
return
def on_train_batch_end(self, log_dict, step):
self.experiment.curr_step = step
if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
self.log_metrics(log_dict, step=step)
return
def on_train_end(self, files, save_dir, last, best, epoch, results):
if self.comet_log_predictions:
curr_epoch = self.experiment.curr_epoch
self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
for f in files:
self.log_asset(f, metadata={"epoch": epoch})
self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
if not self.opt.evolve:
model_path = str(best if best.exists() else last)
name = Path(model_path).name
if self.save_model:
self.experiment.log_model(
self.model_name,
file_or_folder=model_path,
file_name=name,
overwrite=True,
)
# Check if running Experiment with Comet Optimizer
if hasattr(self.opt, 'comet_optimizer_id'):
metric = results.get(self.opt.comet_optimizer_metric)
self.experiment.log_other('optimizer_metric_value', metric)
self.finish_run()
def on_val_start(self):
return
def on_val_batch_start(self):
return
def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
return
for si, pred in enumerate(outputs):
if len(pred) == 0:
continue
image = images[si]
labels = targets[targets[:, 0] == si, 1:]
shape = shapes[si]
path = paths[si]
predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
if labelsn is not None:
self.log_predictions(image, labelsn, path, shape, predn)
return
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
if self.comet_log_per_class_metrics:
if self.num_classes > 1:
for i, c in enumerate(ap_class):
class_name = self.class_names[c]
self.experiment.log_metrics(
{
'mAP@.5': ap50[i],
'mAP@.5:.95': ap[i],
'precision': p[i],
'recall': r[i],
'f1': f1[i],
'true_positives': tp[i],
'false_positives': fp[i],
'support': nt[c]},
prefix=class_name)
if self.comet_log_confusion_matrix:
epoch = self.experiment.curr_epoch
class_names = list(self.class_names.values())
class_names.append("background")
num_classes = len(class_names)
self.experiment.log_confusion_matrix(
matrix=confusion_matrix.matrix,
max_categories=num_classes,
labels=class_names,
epoch=epoch,
column_label='Actual Category',
row_label='Predicted Category',
file_name=f"confusion-matrix-epoch-{epoch}.json",
)
def on_fit_epoch_end(self, result, epoch):
self.log_metrics(result, epoch=epoch)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
def on_params_update(self, params):
self.log_parameters(params)
def finish_run(self):
self.experiment.end()

View File

@ -0,0 +1,150 @@
import logging
import os
from urllib.parse import urlparse
try:
import comet_ml
except (ModuleNotFoundError, ImportError):
comet_ml = None
import yaml
logger = logging.getLogger(__name__)
COMET_PREFIX = "comet://"
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
def download_model_checkpoint(opt, experiment):
model_dir = f"{opt.project}/{experiment.name}"
os.makedirs(model_dir, exist_ok=True)
model_name = COMET_MODEL_NAME
model_asset_list = experiment.get_model_asset_list(model_name)
if len(model_asset_list) == 0:
logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
return
model_asset_list = sorted(
model_asset_list,
key=lambda x: x["step"],
reverse=True,
)
logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
resource_url = urlparse(opt.weights)
checkpoint_filename = resource_url.query
if checkpoint_filename:
asset_id = logged_checkpoint_map.get(checkpoint_filename)
else:
asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
if asset_id is None:
logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
return
try:
logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
asset_filename = checkpoint_filename
model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
model_download_path = f"{model_dir}/{asset_filename}"
with open(model_download_path, "wb") as f:
f.write(model_binary)
opt.weights = model_download_path
except Exception as e:
logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
logger.exception(e)
def set_opt_parameters(opt, experiment):
"""Update the opts Namespace with parameters
from Comet's ExistingExperiment when resuming a run
Args:
opt (argparse.Namespace): Namespace of command line options
experiment (comet_ml.APIExperiment): Comet API Experiment object
"""
asset_list = experiment.get_asset_list()
resume_string = opt.resume
for asset in asset_list:
if asset["fileName"] == "opt.yaml":
asset_id = asset["assetId"]
asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
opt_dict = yaml.safe_load(asset_binary)
for key, value in opt_dict.items():
setattr(opt, key, value)
opt.resume = resume_string
# Save hyperparameters to YAML file
# Necessary to pass checks in training script
save_dir = f"{opt.project}/{experiment.name}"
os.makedirs(save_dir, exist_ok=True)
hyp_yaml_path = f"{save_dir}/hyp.yaml"
with open(hyp_yaml_path, "w") as f:
yaml.dump(opt.hyp, f)
opt.hyp = hyp_yaml_path
def check_comet_weights(opt):
"""Downloads model weights from Comet and updates the
weights path to point to saved weights location
Args:
opt (argparse.Namespace): Command Line arguments passed
to YOLOv5 training script
Returns:
None/bool: Return True if weights are successfully downloaded
else return None
"""
if comet_ml is None:
return
if isinstance(opt.weights, str):
if opt.weights.startswith(COMET_PREFIX):
api = comet_ml.API()
resource = urlparse(opt.weights)
experiment_path = f"{resource.netloc}{resource.path}"
experiment = api.get(experiment_path)
download_model_checkpoint(opt, experiment)
return True
return None
def check_comet_resume(opt):
"""Restores run parameters to its original state based on the model checkpoint
and logged Experiment parameters.
Args:
opt (argparse.Namespace): Command Line arguments passed
to YOLOv5 training script
Returns:
None/bool: Return True if the run is restored successfully
else return None
"""
if comet_ml is None:
return
if isinstance(opt.resume, str):
if opt.resume.startswith(COMET_PREFIX):
api = comet_ml.API()
resource = urlparse(opt.resume)
experiment_path = f"{resource.netloc}{resource.path}"
experiment = api.get(experiment_path)
set_opt_parameters(opt, experiment)
download_model_checkpoint(opt, experiment)
return True
return None

118
utils/loggers/comet/hpo.py Normal file
View File

@ -0,0 +1,118 @@
import argparse
import json
import logging
import os
import sys
from pathlib import Path
import comet_ml
logger = logging.getLogger(__name__)
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from train import train
from utils.callbacks import Callbacks
from utils.general import increment_path
from utils.torch_utils import select_device
# Project Configuration
config = comet_ml.config.get_config()
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
def get_args(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--noplots', action='store_true', help='save no plot files')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--seed', type=int, default=0, help='Global training seed')
parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
# Comet Arguments
parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
parser.add_argument("--comet_optimizer_workers",
type=int,
default=1,
help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
return parser.parse_known_args()[0] if known else parser.parse_args()
def run(parameters, opt):
hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
opt.batch_size = parameters.get("batch_size")
opt.epochs = parameters.get("epochs")
device = select_device(opt.device, batch_size=opt.batch_size)
train(hyp_dict, opt, device, callbacks=Callbacks())
if __name__ == "__main__":
opt = get_args(known=True)
opt.weights = str(opt.weights)
opt.cfg = str(opt.cfg)
opt.data = str(opt.data)
opt.project = str(opt.project)
optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
if optimizer_id is None:
with open(opt.comet_optimizer_config) as f:
optimizer_config = json.load(f)
optimizer = comet_ml.Optimizer(optimizer_config)
else:
optimizer = comet_ml.Optimizer(optimizer_id)
opt.comet_optimizer_id = optimizer.id
status = optimizer.status()
opt.comet_optimizer_objective = status["spec"]["objective"]
opt.comet_optimizer_metric = status["spec"]["metric"]
logger.info("COMET INFO: Starting Hyperparameter Sweep")
for parameter in optimizer.get_parameters():
run(parameter["parameters"], opt)

View File

@ -0,0 +1,209 @@
{
"algorithm": "random",
"parameters": {
"anchor_t": {
"type": "discrete",
"values": [
2,
8
]
},
"batch_size": {
"type": "discrete",
"values": [
16,
32,
64
]
},
"box": {
"type": "discrete",
"values": [
0.02,
0.2
]
},
"cls": {
"type": "discrete",
"values": [
0.2
]
},
"cls_pw": {
"type": "discrete",
"values": [
0.5
]
},
"copy_paste": {
"type": "discrete",
"values": [
1
]
},
"degrees": {
"type": "discrete",
"values": [
0,
45
]
},
"epochs": {
"type": "discrete",
"values": [
5
]
},
"fl_gamma": {
"type": "discrete",
"values": [
0
]
},
"fliplr": {
"type": "discrete",
"values": [
0
]
},
"flipud": {
"type": "discrete",
"values": [
0
]
},
"hsv_h": {
"type": "discrete",
"values": [
0
]
},
"hsv_s": {
"type": "discrete",
"values": [
0
]
},
"hsv_v": {
"type": "discrete",
"values": [
0
]
},
"iou_t": {
"type": "discrete",
"values": [
0.7
]
},
"lr0": {
"type": "discrete",
"values": [
1e-05,
0.1
]
},
"lrf": {
"type": "discrete",
"values": [
0.01,
1
]
},
"mixup": {
"type": "discrete",
"values": [
1
]
},
"momentum": {
"type": "discrete",
"values": [
0.6
]
},
"mosaic": {
"type": "discrete",
"values": [
0
]
},
"obj": {
"type": "discrete",
"values": [
0.2
]
},
"obj_pw": {
"type": "discrete",
"values": [
0.5
]
},
"optimizer": {
"type": "categorical",
"values": [
"SGD",
"Adam",
"AdamW"
]
},
"perspective": {
"type": "discrete",
"values": [
0
]
},
"scale": {
"type": "discrete",
"values": [
0
]
},
"shear": {
"type": "discrete",
"values": [
0
]
},
"translate": {
"type": "discrete",
"values": [
0
]
},
"warmup_bias_lr": {
"type": "discrete",
"values": [
0,
0.2
]
},
"warmup_epochs": {
"type": "discrete",
"values": [
5
]
},
"warmup_momentum": {
"type": "discrete",
"values": [
0,
0.95
]
},
"weight_decay": {
"type": "discrete",
"values": [
0,
0.001
]
}
},
"spec": {
"maxCombo": 0,
"metric": "metrics/mAP_0.5",
"objective": "maximize"
},
"trials": 1
}

View File

@ -28,7 +28,7 @@ def smooth(y, f=0.05):
return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16): def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
""" Compute the average precision, given the recall and precision curves. """ Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments # Arguments
@ -83,10 +83,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
names = dict(enumerate(names)) # to dict names = dict(enumerate(names)) # to dict
if plot: if plot:
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
p, r, f1 = p[:, i], r[:, i], f1[:, i] p, r, f1 = p[:, i], r[:, i], f1[:, i]
@ -170,12 +170,12 @@ class ConfusionMatrix:
if n and sum(j) == 1: if n and sum(j) == 1:
self.matrix[detection_classes[m1[j]], gc] += 1 # correct self.matrix[detection_classes[m1[j]], gc] += 1 # correct
else: else:
self.matrix[self.nc, gc] += 1 # background FP self.matrix[self.nc, gc] += 1 # true background
if n: if n:
for i, dc in enumerate(detection_classes): for i, dc in enumerate(detection_classes):
if not any(m1 == i): if not any(m1 == i):
self.matrix[dc, self.nc] += 1 # background FN self.matrix[dc, self.nc] += 1 # predicted background
def matrix(self): def matrix(self):
return self.matrix return self.matrix
@ -186,7 +186,7 @@ class ConfusionMatrix:
# fn = self.matrix.sum(0) - tp # false negatives (missed detections) # fn = self.matrix.sum(0) - tp # false negatives (missed detections)
return tp[:-1], fp[:-1] # remove background class return tp[:-1], fp[:-1] # remove background class
@TryExcept('WARNING: ConfusionMatrix plot failure: ') @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure: ')
def plot(self, normalize=True, save_dir='', names=()): def plot(self, normalize=True, save_dir='', names=()):
import seaborn as sn import seaborn as sn
@ -197,6 +197,7 @@ class ConfusionMatrix:
nc, nn = self.nc, len(names) # number of classes, names nc, nn = self.nc, len(names) # number of classes, names
sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
ticklabels = (names + ['background']) if labels else "auto"
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
sn.heatmap(array, sn.heatmap(array,
@ -208,8 +209,8 @@ class ConfusionMatrix:
fmt='.2f', fmt='.2f',
square=True, square=True,
vmin=0.0, vmin=0.0,
xticklabels=names + ['background FP'] if labels else "auto", xticklabels=ticklabels,
yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) yticklabels=ticklabels).set_facecolor((1, 1, 1))
ax.set_ylabel('True') ax.set_ylabel('True')
ax.set_ylabel('Predicted') ax.set_ylabel('Predicted')
ax.set_title('Confusion Matrix') ax.set_title('Confusion Matrix')

View File

@ -20,9 +20,10 @@ import torch
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from utils import TryExcept, threaded from utils import TryExcept, threaded
from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path, from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
is_ascii, xywh2xyxy, xyxy2xywh) is_ascii, xywh2xyxy, xyxy2xywh)
from utils.metrics import fitness from utils.metrics import fitness
from utils.segment.general import scale_image
# Settings # Settings
RANK = int(os.getenv('RANK', -1)) RANK = int(os.getenv('RANK', -1))
@ -113,6 +114,52 @@ class Annotator:
thickness=tf, thickness=tf,
lineType=cv2.LINE_AA) lineType=cv2.LINE_AA)
def masks(self, masks, colors, im_gpu=None, alpha=0.5):
"""Plot masks at once.
Args:
masks (tensor): predicted masks on cuda, shape: [n, h, w]
colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
"""
if self.pil:
# convert to numpy first
self.im = np.asarray(self.im).copy()
if im_gpu is None:
# Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
if len(masks) == 0:
return
if isinstance(masks, torch.Tensor):
masks = torch.as_tensor(masks, dtype=torch.uint8)
masks = masks.permute(1, 2, 0).contiguous()
masks = masks.cpu().numpy()
# masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
masks = scale_image(masks.shape[:2], masks, self.im.shape)
masks = np.asarray(masks, dtype=np.float32)
colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
self.im[:] = masks * alpha + self.im * (1 - s * alpha)
else:
if len(masks) == 0:
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
colors = colors[:, None, None] # shape(n,1,1,3)
masks = masks.unsqueeze(3) # shape(n,h,w,1)
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)
im_gpu = im_gpu.flip(dims=[0]) # flip channel
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255).byte().cpu().numpy()
self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
if self.pil:
# convert im back to PIL and update draw
self.fromarray(self.im)
def rectangle(self, xy, fill=None, outline=None, width=1): def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only) # Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width) self.draw.rectangle(xy, fill, outline, width)
@ -124,6 +171,11 @@ class Annotator:
xy[1] += 1 - h xy[1] += 1 - h
self.draw.text(xy, text, fill=txt_color, font=self.font) self.draw.text(xy, text, fill=txt_color, font=self.font)
def fromarray(self, im):
# Update self.im from a numpy array
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
def result(self): def result(self):
# Return annotated image as array # Return annotated image as array
return np.asarray(self.im) return np.asarray(self.im)
@ -152,7 +204,6 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detec
ax[i].axis('off') ax[i].axis('off')
LOGGER.info(f'Saving {f}... ({n}/{channels})') LOGGER.info(f'Saving {f}... ({n}/{channels})')
plt.title('Features')
plt.savefig(f, dpi=300, bbox_inches='tight') plt.savefig(f, dpi=300, bbox_inches='tight')
plt.close() plt.close()
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save
@ -180,26 +231,31 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
return filtfilt(b, a, data) # forward-backward filter return filtfilt(b, a, data) # forward-backward filter
def output_to_target(output): def output_to_target(output, max_det=300):
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
targets = [] targets = []
for i, o in enumerate(output): for i, o in enumerate(output):
targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy()) box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
return np.array(targets) j = torch.full((conf.shape[0], 1), i)
targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
return torch.cat(targets, 0).numpy()
@threaded @threaded
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16): def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
# Plot image grid with labels # Plot image grid with labels
if isinstance(images, torch.Tensor): if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy() images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor): if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy() targets = targets.cpu().numpy()
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional) max_size = 1920 # max image size
max_subplots = 16 # max image subplots, i.e. 4x4
bs, _, h, w = images.shape # batch size, _, height, width bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square) ns = np.ceil(bs ** 0.5) # number of subplots (square)
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional)
# Build Image # Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
@ -364,7 +420,7 @@ def plot_labels(labels, names=(), save_dir=Path('')):
ax[0].set_ylabel('instances') ax[0].set_ylabel('instances')
if 0 < len(names) < 30: if 0 < len(names) < 30:
ax[0].set_xticks(range(len(names))) ax[0].set_xticks(range(len(names)))
ax[0].set_xticklabels(names, rotation=90, fontsize=10) ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
else: else:
ax[0].set_xlabel('classes') ax[0].set_xlabel('classes')
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
@ -509,7 +565,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long() xyxy = xywh2xyxy(b).long()
clip_coords(xyxy, im.shape) clip_boxes(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save: if save:
file.parent.mkdir(parents=True, exist_ok=True) # make directory file.parent.mkdir(parents=True, exist_ok=True) # make directory

View File

View File

@ -0,0 +1,104 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Image augmentation functions
"""
import math
import random
import cv2
import numpy as np
from ..augmentations import box_candidates
from ..general import resample_segments, segment2box
def mixup(im, labels, segments, im2, labels2, segments2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
segments = np.concatenate((segments, segments2), 0)
return im, labels, segments
def random_perspective(im,
targets=(),
segments=(),
degrees=10,
translate=.1,
scale=.1,
shear=10,
perspective=0.0,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels)
T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
new_segments = []
if n:
new = np.zeros((n, 4))
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
new_segments.append(xy)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
targets = targets[i]
targets[:, 1:5] = new[i]
new_segments = np.array(new_segments)[i]
return im, targets, new_segments

View File

@ -0,0 +1,330 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Dataloaders
"""
import os
import random
import cv2
import numpy as np
import torch
from torch.utils.data import DataLoader, distributed
from ..augmentations import augment_hsv, copy_paste, letterbox
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
from ..torch_utils import torch_distributed_zero_first
from .augmentations import mixup, random_perspective
RANK = int(os.getenv('RANK', -1))
def create_dataloader(path,
imgsz,
batch_size,
stride,
single_cls=False,
hyp=None,
augment=False,
cache=False,
pad=0.0,
rect=False,
rank=-1,
workers=8,
image_weights=False,
quad=False,
prefix='',
shuffle=False,
mask_downsample_ratio=1,
overlap_mask=False):
if rect and shuffle:
LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
shuffle = False
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = LoadImagesAndLabelsAndMasks(
path,
imgsz,
batch_size,
augment=augment, # augmentation
hyp=hyp, # hyperparameters
rect=rect, # rectangular batches
cache_images=cache,
single_cls=single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights,
prefix=prefix,
downsample_ratio=mask_downsample_ratio,
overlap=overlap_mask)
batch_size = min(batch_size, len(dataset))
nd = torch.cuda.device_count() # number of CUDA devices
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
generator = torch.Generator()
generator.manual_seed(6148914691236517205 + RANK)
return loader(
dataset,
batch_size=batch_size,
shuffle=shuffle and sampler is None,
num_workers=nw,
sampler=sampler,
pin_memory=True,
collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
worker_init_fn=seed_worker,
generator=generator,
), dataset
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
def __init__(
self,
path,
img_size=640,
batch_size=16,
augment=False,
hyp=None,
rect=False,
image_weights=False,
cache_images=False,
single_cls=False,
stride=32,
pad=0,
prefix="",
downsample_ratio=1,
overlap=False,
):
super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
stride, pad, prefix)
self.downsample_ratio = downsample_ratio
self.overlap = overlap
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
masks = []
if mosaic:
# Load mosaic
img, labels, segments = self.load_mosaic(index)
shapes = None
# MixUp augmentation
if random.random() < hyp["mixup"]:
img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
else:
# Load image
img, (h0, w0), (h, w) = self.load_image(index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
segments = self.segments[index].copy()
if len(segments):
for i_s in range(len(segments)):
segments[i_s] = xyn2xy(
segments[i_s],
ratio[0] * w,
ratio[1] * h,
padw=pad[0],
padh=pad[1],
)
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
img, labels, segments = random_perspective(img,
labels,
segments=segments,
degrees=hyp["degrees"],
translate=hyp["translate"],
scale=hyp["scale"],
shear=hyp["shear"],
perspective=hyp["perspective"])
nl = len(labels) # number of labels
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
if self.overlap:
masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
segments,
downsample_ratio=self.downsample_ratio)
masks = masks[None] # (640, 640) -> (1, 640, 640)
labels = labels[sorted_idx]
else:
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
self.downsample_ratio, img.shape[1] //
self.downsample_ratio))
# TODO: albumentations support
if self.augment:
# Albumentations
# there are some augmentation that won't change boxes and masks,
# so just be it for now.
img, labels = self.albumentations(img, labels)
nl = len(labels) # update after albumentations
# HSV color-space
augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
# Flip up-down
if random.random() < hyp["flipud"]:
img = np.flipud(img)
if nl:
labels[:, 2] = 1 - labels[:, 2]
masks = torch.flip(masks, dims=[1])
# Flip left-right
if random.random() < hyp["fliplr"]:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
masks = torch.flip(masks, dims=[2])
# Cutouts # labels = cutout(img, labels, p=0.5)
labels_out = torch.zeros((nl, 6))
if nl:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
def load_mosaic(self, index):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
# 3 additional image indices
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = self.load_image(index)
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
img4, labels4, segments4 = random_perspective(img4,
labels4,
segments4,
degrees=self.hyp["degrees"],
translate=self.hyp["translate"],
scale=self.hyp["scale"],
shear=self.hyp["shear"],
perspective=self.hyp["perspective"],
border=self.mosaic_border) # border to remove
return img4, labels4, segments4
@staticmethod
def collate_fn(batch):
img, label, path, shapes, masks = zip(*batch) # transposed
batched_masks = torch.cat(masks, 0)
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
"""
Args:
img_size (tuple): The image size.
polygons (np.ndarray): [N, M], N is the number of polygons,
M is the number of points(Be divided by 2).
"""
mask = np.zeros(img_size, dtype=np.uint8)
polygons = np.asarray(polygons)
polygons = polygons.astype(np.int32)
shape = polygons.shape
polygons = polygons.reshape(shape[0], -1, 2)
cv2.fillPoly(mask, polygons, color=color)
nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
# NOTE: fillPoly firstly then resize is trying the keep the same way
# of loss calculation when mask-ratio=1.
mask = cv2.resize(mask, (nw, nh))
return mask
def polygons2masks(img_size, polygons, color, downsample_ratio=1):
"""
Args:
img_size (tuple): The image size.
polygons (list[np.ndarray]): each polygon is [N, M],
N is the number of polygons,
M is the number of points(Be divided by 2).
"""
masks = []
for si in range(len(polygons)):
mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
masks.append(mask)
return np.array(masks)
def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
"""Return a (640, 640) overlap mask."""
masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
dtype=np.int32 if len(segments) > 255 else np.uint8)
areas = []
ms = []
for si in range(len(segments)):
mask = polygon2mask(
img_size,
[segments[si].reshape(-1)],
downsample_ratio=downsample_ratio,
color=1,
)
ms.append(mask)
areas.append(mask.sum())
areas = np.asarray(areas)
index = np.argsort(-areas)
ms = np.array(ms)[index]
for i in range(len(segments)):
mask = ms[i] * (i + 1)
masks = masks + mask
masks = np.clip(masks, a_min=0, a_max=i + 1)
return masks, index

134
utils/segment/general.py Normal file
View File

@ -0,0 +1,134 @@
import cv2
import numpy as np
import torch
import torch.nn.functional as F
def crop_mask(masks, boxes):
"""
"Crop" predicted masks by zeroing out everything not in the predicted bbox.
Vectorized by Chong (thanks Chong).
Args:
- masks should be a size [h, w, n] tensor of masks
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
"""
n, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def process_mask_upsample(protos, masks_in, bboxes, shape):
"""
Crop after upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
masks = crop_mask(masks, bboxes) # CHW
return masks.gt_(0.5)
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
"""
Crop before upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
"""
c, mh, mw = protos.shape # CHW
ih, iw = shape
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih
masks = crop_mask(masks, downsampled_bboxes) # CHW
if upsample:
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
return masks.gt_(0.5)
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
"""
img1_shape: model input shape, [h, w]
img0_shape: origin pic shape, [h, w, 3]
masks: [h, w, num]
"""
# Rescale coordinates (xyxy) from im1_shape to im0_shape
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
pad = ratio_pad[1]
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
# masks = masks.permute(2, 0, 1).contiguous()
# masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
# masks = masks.permute(1, 2, 0).contiguous()
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def mask_iou(mask1, mask2, eps=1e-7):
"""
mask1: [N, n] m1 means number of predicted objects
mask2: [M, n] m2 means number of gt objects
Note: n means image_w x image_h
return: masks iou, [N, M]
"""
intersection = torch.matmul(mask1, mask2.t()).clamp(0)
union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
return intersection / (union + eps)
def masks_iou(mask1, mask2, eps=1e-7):
"""
mask1: [N, n] m1 means number of predicted objects
mask2: [N, n] m2 means number of gt objects
Note: n means image_w x image_h
return: masks iou, (N, )
"""
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
return intersection / (union + eps)
def masks2segments(masks, strategy='largest'):
# Convert masks(n,160,160) into segments(n,xy)
segments = []
for x in masks.int().numpy().astype('uint8'):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if strategy == 'concat': # concatenate all segments
c = np.concatenate([x.reshape(-1, 2) for x in c])
elif strategy == 'largest': # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
segments.append(c.astype('float32'))
return segments

186
utils/segment/loss.py Normal file
View File

@ -0,0 +1,186 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..general import xywh2xyxy
from ..loss import FocalLoss, smooth_BCE
from ..metrics import bbox_iou
from ..torch_utils import de_parallel
from .general import crop_mask
class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False, overlap=False):
self.sort_obj_iou = False
self.overlap = overlap
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
self.device = device
# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
m = de_parallel(model).model[-1] # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
self.na = m.na # number of anchors
self.nc = m.nc # number of classes
self.nl = m.nl # number of layers
self.nm = m.nm # number of masks
self.anchors = m.anchors
self.device = device
def __call__(self, preds, targets, masks): # predictions, targets, model
p, proto = preds
bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
lcls = torch.zeros(1, device=self.device)
lbox = torch.zeros(1, device=self.device)
lobj = torch.zeros(1, device=self.device)
lseg = torch.zeros(1, device=self.device)
tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions
# Box regression
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
j = iou.argsort()
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
if self.gr < 1:
iou = (1.0 - self.gr) + self.gr * iou
tobj[b, a, gj, gi] = iou # iou ratio
# Classification
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(pcls, self.cn, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(pcls, t) # BCE
# Mask regression
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized
mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
for bi in b.unique():
j = b == bi # matching index
if self.overlap:
mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
else:
mask_gti = masks[tidxs[i]][j]
lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp["box"]
lobj *= self.hyp["obj"]
lcls *= self.hyp["cls"]
lseg *= self.hyp["box"] / bs
loss = lbox + lobj + lcls + lseg
return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
# Mask loss for one image
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
gain = torch.ones(8, device=self.device) # normalized to gridspace gain
ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
if self.overlap:
batch = p[0].shape[0]
ti = []
for i in range(batch):
num = (targets[:, 0] == i).sum() # find number of targets of each image
ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num)
ti = torch.cat(ti, 1) # (na, nt)
else:
ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor(
[
[0, 0],
[1, 0],
[0, 1],
[-1, 0],
[0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
],
device=self.device).float() * g # offsets
for i in range(self.nl):
anchors, shape = self.anchors[i], p[i].shape
gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets * gain # shape(3,n,7)
if nt:
# Matches
r = t[..., 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1 < g) & (gxy > 1)).T
l, m = ((gxi % 1 < g) & (gxi > 1)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0
# Define
bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
(a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class
gij = (gxy - offsets).long()
gi, gj = gij.T # grid indices
# Append
indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
tidxs.append(tidx)
xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized
return tcls, tbox, indices, anch, tidxs, xywhn

210
utils/segment/metrics.py Normal file
View File

@ -0,0 +1,210 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Model validation metrics
"""
import numpy as np
from ..metrics import ap_per_class
def fitness(x):
# Model fitness as a weighted combination of metrics
w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
return (x[:, :8] * w).sum(1)
def ap_per_class_box_and_mask(
tp_m,
tp_b,
conf,
pred_cls,
target_cls,
plot=False,
save_dir=".",
names=(),
):
"""
Args:
tp_b: tp of boxes.
tp_m: tp of masks.
other arguments see `func: ap_per_class`.
"""
results_boxes = ap_per_class(tp_b,
conf,
pred_cls,
target_cls,
plot=plot,
save_dir=save_dir,
names=names,
prefix="Box")[2:]
results_masks = ap_per_class(tp_m,
conf,
pred_cls,
target_cls,
plot=plot,
save_dir=save_dir,
names=names,
prefix="Mask")[2:]
results = {
"boxes": {
"p": results_boxes[0],
"r": results_boxes[1],
"ap": results_boxes[3],
"f1": results_boxes[2],
"ap_class": results_boxes[4]},
"masks": {
"p": results_masks[0],
"r": results_masks[1],
"ap": results_masks[3],
"f1": results_masks[2],
"ap_class": results_masks[4]}}
return results
class Metric:
def __init__(self) -> None:
self.p = [] # (nc, )
self.r = [] # (nc, )
self.f1 = [] # (nc, )
self.all_ap = [] # (nc, 10)
self.ap_class_index = [] # (nc, )
@property
def ap50(self):
"""AP@0.5 of all classes.
Return:
(nc, ) or [].
"""
return self.all_ap[:, 0] if len(self.all_ap) else []
@property
def ap(self):
"""AP@0.5:0.95
Return:
(nc, ) or [].
"""
return self.all_ap.mean(1) if len(self.all_ap) else []
@property
def mp(self):
"""mean precision of all classes.
Return:
float.
"""
return self.p.mean() if len(self.p) else 0.0
@property
def mr(self):
"""mean recall of all classes.
Return:
float.
"""
return self.r.mean() if len(self.r) else 0.0
@property
def map50(self):
"""Mean AP@0.5 of all classes.
Return:
float.
"""
return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
@property
def map(self):
"""Mean AP@0.5:0.95 of all classes.
Return:
float.
"""
return self.all_ap.mean() if len(self.all_ap) else 0.0
def mean_results(self):
"""Mean of results, return mp, mr, map50, map"""
return (self.mp, self.mr, self.map50, self.map)
def class_result(self, i):
"""class-aware result, return p[i], r[i], ap50[i], ap[i]"""
return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
def get_maps(self, nc):
maps = np.zeros(nc) + self.map
for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i]
return maps
def update(self, results):
"""
Args:
results: tuple(p, r, ap, f1, ap_class)
"""
p, r, all_ap, f1, ap_class_index = results
self.p = p
self.r = r
self.all_ap = all_ap
self.f1 = f1
self.ap_class_index = ap_class_index
class Metrics:
"""Metric for boxes and masks."""
def __init__(self) -> None:
self.metric_box = Metric()
self.metric_mask = Metric()
def update(self, results):
"""
Args:
results: Dict{'boxes': Dict{}, 'masks': Dict{}}
"""
self.metric_box.update(list(results["boxes"].values()))
self.metric_mask.update(list(results["masks"].values()))
def mean_results(self):
return self.metric_box.mean_results() + self.metric_mask.mean_results()
def class_result(self, i):
return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
def get_maps(self, nc):
return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
@property
def ap_class_index(self):
# boxes and masks have the same ap_class_index
return self.metric_box.ap_class_index
KEYS = [
"train/box_loss",
"train/seg_loss", # train loss
"train/obj_loss",
"train/cls_loss",
"metrics/precision(B)",
"metrics/recall(B)",
"metrics/mAP_0.5(B)",
"metrics/mAP_0.5:0.95(B)", # metrics
"metrics/precision(M)",
"metrics/recall(M)",
"metrics/mAP_0.5(M)",
"metrics/mAP_0.5:0.95(M)", # metrics
"val/box_loss",
"val/seg_loss", # val loss
"val/obj_loss",
"val/cls_loss",
"x/lr0",
"x/lr1",
"x/lr2",]
BEST_KEYS = [
"best/epoch",
"best/precision(B)",
"best/recall(B)",
"best/mAP_0.5(B)",
"best/mAP_0.5:0.95(B)",
"best/precision(M)",
"best/recall(M)",
"best/mAP_0.5(M)",
"best/mAP_0.5:0.95(M)",]

143
utils/segment/plots.py Normal file
View File

@ -0,0 +1,143 @@
import contextlib
import math
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from .. import threaded
from ..general import xywh2xyxy
from ..plots import Annotator, colors
@threaded
def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
# Plot image grid with labels
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
if isinstance(masks, torch.Tensor):
masks = masks.cpu().numpy().astype(int)
max_size = 1920 # max image size
max_subplots = 16 # max image subplots, i.e. 4x4
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional)
# Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, im in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
break
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
im = im.transpose(1, 2, 0)
mosaic[y:y + h, x:x + w, :] = im
# Resize (optional)
scale = max_size / ns / max(h, w)
if scale < 1:
h = math.ceil(scale * h)
w = math.ceil(scale * w)
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
# Annotate
fs = int((h + w) * ns * 0.01) # font size
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
for i in range(i + 1):
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
if paths:
annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
if len(targets) > 0:
idx = targets[:, 0] == i
ti = targets[idx] # image targets
boxes = xywh2xyxy(ti[:, 2:6]).T
classes = ti[:, 1].astype('int')
labels = ti.shape[1] == 6 # labels if no conf column
conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale < 1: # absolute coords need scale if image scales
boxes *= scale
boxes[[0, 2]] += x
boxes[[1, 3]] += y
for j, box in enumerate(boxes.T.tolist()):
cls = classes[j]
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
annotator.box_label(box, label, color=color)
# Plot masks
if len(masks):
if masks.max() > 1.0: # mean that masks are overlap
image_masks = masks[[i]] # (1, 640, 640)
nl = len(ti)
index = np.arange(nl).reshape(nl, 1, 1) + 1
image_masks = np.repeat(image_masks, nl, axis=0)
image_masks = np.where(image_masks == index, 1.0, 0.0)
else:
image_masks = masks[idx]
im = np.asarray(annotator.im).copy()
for j, box in enumerate(boxes.T.tolist()):
if labels or conf[j] > 0.25: # 0.25 conf thresh
color = colors(classes[j])
mh, mw = image_masks[j].shape
if mh != h or mw != w:
mask = image_masks[j].astype(np.uint8)
mask = cv2.resize(mask, (w, h))
mask = mask.astype(bool)
else:
mask = image_masks[j].astype(bool)
with contextlib.suppress(Exception):
im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
annotator.fromarray(im)
annotator.im.save(fname) # save
def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
save_dir = Path(file).parent if file else Path(dir)
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
ax = ax.ravel()
files = list(save_dir.glob("results*.csv"))
assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
for f in files:
try:
data = pd.read_csv(f)
index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
0.1 * data.values[:, 11])
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
y = data.values[:, j]
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
if best:
# best
ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
else:
# last
ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
print(f"Warning: Plotting error for {f}: {e}")
ax[1].legend()
fig.savefig(save_dir / "results.png", dpi=200)
plt.close()

View File

@ -47,7 +47,7 @@ def smartCrossEntropyLoss(label_smoothing=0.0):
if check_version(torch.__version__, '1.10.0'): if check_version(torch.__version__, '1.10.0'):
return nn.CrossEntropyLoss(label_smoothing=label_smoothing) return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
if label_smoothing > 0: if label_smoothing > 0:
LOGGER.warning(f'WARNING: label smoothing {label_smoothing} requires torch>=1.10.0') LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
return nn.CrossEntropyLoss() return nn.CrossEntropyLoss()
@ -251,6 +251,7 @@ def fuse_conv_and_bn(conv, bn):
kernel_size=conv.kernel_size, kernel_size=conv.kernel_size,
stride=conv.stride, stride=conv.stride,
padding=conv.padding, padding=conv.padding,
dilation=conv.dilation,
groups=conv.groups, groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device) bias=True).requires_grad_(False).to(conv.weight.device)

85
utils/triton.py Normal file
View File

@ -0,0 +1,85 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
""" Utils to interact with the Triton Inference Server
"""
import typing
from urllib.parse import urlparse
import torch
class TritonRemoteModel:
""" A wrapper over a model served by the Triton Inference Server. It can
be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
as input and returns them as outputs.
"""
def __init__(self, url: str):
"""
Keyword arguments:
url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
"""
parsed_url = urlparse(url)
if parsed_url.scheme == "grpc":
from tritonclient.grpc import InferenceServerClient, InferInput
self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client
model_repository = self.client.get_model_repository_index()
self.model_name = model_repository.models[0].name
self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
def create_input_placeholders() -> typing.List[InferInput]:
return [
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
else:
from tritonclient.http import InferenceServerClient, InferInput
self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client
model_repository = self.client.get_model_repository_index()
self.model_name = model_repository[0]['name']
self.metadata = self.client.get_model_metadata(self.model_name)
def create_input_placeholders() -> typing.List[InferInput]:
return [
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
self._create_input_placeholders_fn = create_input_placeholders
@property
def runtime(self):
"""Returns the model runtime"""
return self.metadata.get("backend", self.metadata.get("platform"))
def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
""" Invokes the model. Parameters can be provided via args or kwargs.
args, if provided, are assumed to match the order of inputs of the model.
kwargs are matched with the model input names.
"""
inputs = self._create_inputs(*args, **kwargs)
response = self.client.infer(model_name=self.model_name, inputs=inputs)
result = []
for output in self.metadata['outputs']:
tensor = torch.as_tensor(response.as_numpy(output['name']))
result.append(tensor)
return result[0] if len(result) == 1 else result
def _create_inputs(self, *args, **kwargs):
args_len, kwargs_len = len(args), len(kwargs)
if not args_len and not kwargs_len:
raise RuntimeError("No inputs provided.")
if args_len and kwargs_len:
raise RuntimeError("Cannot specify args and kwargs at the same time")
placeholders = self._create_input_placeholders_fn()
if args_len:
if args_len != len(placeholders):
raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
for input, value in zip(placeholders, args):
input.set_data_from_numpy(value.cpu().numpy())
else:
for input in placeholders:
value = kwargs[input.name]
input.set_data_from_numpy(value.cpu().numpy())
return placeholders