Fixing bugs on yolo update

2025-06-21 02:41:01 +08:00 · 2022-09-25 20:27:52 -04:00 · 2022-09-25 20:27:52 -04:00 · 65cb7a2111
commit 65cb7a2111
parent 115a454637
64 changed files with 3697 additions and 459 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,6 @@ yolov5s.engine
 yolov5
 tensorrt-8.4.1.5-cp39-none-win_amd64.whl
 tensorrt-8.4.1.5-cp310-none-win_amd64.whl
 __pycache__
 utils/__pycache__
 models/__pycache__
--- a/main.py
+++ b/main.py
@ -5,12 +5,13 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
 import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 import dxcam
 def main():
    # Window title of the game, don't need the entire name
    videoGameWindowTitle = "Counter"
@ -63,8 +64,10 @@ def main():
    # sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080}
    # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    left = aaRightShift + \
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
    top = videoGameWindow.top + \
        (videoGameWindow.height - screenShotHeight) // 2
    right, bottom = left + screenShotWidth, top + screenShotHeight
    region = (left, top, right, bottom)
@ -81,7 +84,8 @@ def main():
    sTime = time.time()
    # Loading Yolo5 Small AI Model
-    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
                           pretrained=True, force_reload=True)
    stride, names, pt = model.stride, model.names, model.pt
    model.half()
@ -109,8 +113,8 @@ def main():
            results = model(im, size=screenShotHeight)
            # Suppressing results that dont meet thresholds
-            pred = non_max_suppression(results, 0.25, 0.25, 0, False, max_det=1000)
+            pred = non_max_suppression(
-
+                results, 0.25, 0.25, 0, False, max_det=1000)
            # Converting output to usable cords
            targets = []
@ -123,9 +127,11 @@ def main():
                        s += f"{n} {names[int(c)]}, "  # add to string
                    for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
            # If there are people in the center bounding box
            if len(targets) > 0:
@ -134,7 +140,8 @@ def main():
                    targets['last_mid_x'] = last_mid_coord[0]
                    targets['last_mid_y'] = last_mid_coord[1]
                    # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                    targets.sort_values(by="dist", ascending=False)
                # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -151,7 +158,8 @@ def main():
                # Moving the mouse
                if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                last_mid_coord = [xMid, yMid]
            else:
@ -165,7 +173,8 @@ def main():
                    halfH = round(targets["height"][i] / 2)
                    midX = targets['current_mid_x'][i]
                    midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
                    confidence = .5
@ -197,5 +206,6 @@ def main():
                    exit()
    camera.stop()
 if __name__ == "__main__":
    main()
--- a/main_onnx_cpu.py
+++ b/main_onnx_cpu.py
@ -5,13 +5,14 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
 import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 import dxcam
 import torch
 def main():
    # Window title to go after and the height of the screenshots
    videoGameWindowTitle = "Counter"
@ -61,8 +62,10 @@ def main():
                         "height": screenShotHeight}
    # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    left = aaRightShift + \
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
    top = videoGameWindow.top + \
        (videoGameWindow.height - screenShotHeight) // 2
    right, bottom = left + 320, top + 320
    region = (left, top, right, bottom)
@ -82,7 +85,8 @@ def main():
    so = ort.SessionOptions()
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
+    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
                                    'CUDAExecutionProvider'])
    # Used for colors drawn on bounding boxes
    COLORS = np.random.uniform(0, 255, size=(1500, 3))
@ -99,7 +103,8 @@ def main():
        im = torch.from_numpy(outputs[0]).to('cpu')
-        pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
+        pred = non_max_suppression(
            im, confidence, confidence, 0, False, max_det=10)
        targets = []
        for i, det in enumerate(pred):
@ -111,9 +116,11 @@ def main():
                    s += f"{n} {int(c)}, "  # add to string
                for *xyxy, conf, cls in reversed(det):
-                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(
                        1, 4)) / gn).view(-1).tolist())  # normalized xywh
-        targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+        targets = pd.DataFrame(
            targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
        # If there are people in the center bounding box
        if len(targets) > 0:
@ -122,7 +129,8 @@ def main():
                targets['last_mid_x'] = last_mid_coord[0]
                targets['last_mid_y'] = last_mid_coord[1]
                # Take distance between current person mid coordinate and last person mid coordinate
-                targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                targets['dist'] = np.linalg.norm(
                    targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                targets.sort_values(by="dist", ascending=False)
            # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -139,7 +147,8 @@ def main():
            # Moving the mouse
            if win32api.GetKeyState(0x14):
-                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
                    mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
            last_mid_coord = [xMid, yMid]
        else:
@ -153,7 +162,8 @@ def main():
                halfH = round(targets["height"][i] / 2)
                midX = targets['current_mid_x'][i]
                midY = targets['current_mid_y'][i]
-                (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                (startX, startY, endX, endY) = int(midX + halfW), int(midY +
                                                                      halfH), int(midX - halfW), int(midY - halfH)
                idx = 0
                # draw the bounding box and label on the frame
@ -183,5 +193,6 @@ def main():
    camera.stop()
 if __name__ == "__main__":
    main()
--- a/main_onnx_gpu.py
+++ b/main_onnx_gpu.py
@ -6,13 +6,14 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
 import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 import dxcam
 import torch
 def main():
    # Window title to go after and the height of the screenshots
    videoGameWindowTitle = "Counter"
@ -62,8 +63,10 @@ def main():
                         "height": screenShotHeight}
    # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    left = aaRightShift + \
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
    top = videoGameWindow.top + \
        (videoGameWindow.height - screenShotHeight) // 2
    right, bottom = left + 320, top + 320
    region = (left, top, right, bottom)
@ -81,7 +84,8 @@ def main():
    so = ort.SessionOptions()
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
+    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
                                    'CUDAExecutionProvider'])
    # Used for colors drawn on bounding boxes
    COLORS = np.random.uniform(0, 255, size=(1500, 3))
@ -98,7 +102,8 @@ def main():
        im = torch.from_numpy(outputs[0]).to('cpu')
-        pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
+        pred = non_max_suppression(
            im, confidence, confidence, 0, False, max_det=10)
        targets = []
        for i, det in enumerate(pred):
@ -110,9 +115,11 @@ def main():
                    s += f"{n} {int(c)}, "  # add to string
                for *xyxy, conf, cls in reversed(det):
-                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(
                        1, 4)) / gn).view(-1).tolist())  # normalized xywh
-        targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+        targets = pd.DataFrame(
            targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
        # If there are people in the center bounding box
        if len(targets) > 0:
@ -121,7 +128,8 @@ def main():
                targets['last_mid_x'] = last_mid_coord[0]
                targets['last_mid_y'] = last_mid_coord[1]
                # Take distance between current person mid coordinate and last person mid coordinate
-                targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                targets['dist'] = np.linalg.norm(
                    targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                targets.sort_values(by="dist", ascending=False)
            # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -138,7 +146,8 @@ def main():
            # Moving the mouse
            if win32api.GetKeyState(0x14):
-                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
                    mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
            last_mid_coord = [xMid, yMid]
        else:
@ -152,7 +161,8 @@ def main():
                halfH = round(targets["height"][i] / 2)
                midX = targets['current_mid_x'][i]
                midY = targets['current_mid_y'][i]
-                (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                (startX, startY, endX, endY) = int(midX + halfW), int(midY +
                                                                      halfH), int(midX - halfW), int(midY - halfH)
                idx = 0
                # draw the bounding box and label on the frame
@ -181,5 +191,6 @@ def main():
                exit()
    camera.stop()
 if __name__ == "__main__":
    main()
--- a/main_tensorrt_gpu.py
+++ b/main_tensorrt_gpu.py
@ -5,14 +5,15 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
 import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from models.common import DetectMultiBackend
 import dxcam
 import cupy as cp
 def main():
    # Window title to go after and the height of the screenshots
    videoGameWindowTitle = "Counter"
@ -62,8 +63,10 @@ def main():
                         "height": screenShotHeight}
    # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    left = aaRightShift + \
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
    top = videoGameWindow.top + \
        (videoGameWindow.height - screenShotHeight) // 2
    right, bottom = left + screenShotWidth, top + screenShotHeight
    region = (left, top, right, bottom)
@ -80,7 +83,8 @@ def main():
    sTime = time.time()
    # Loading Yolo5 Small AI Model
-    model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device('cuda'), dnn=False, data='', fp16=True)
+    model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device(
        'cuda'), dnn=False, data='', fp16=True)
    stride, names, pt = model.stride, model.names, model.pt
    # Used for colors drawn on bounding boxes
@ -100,12 +104,14 @@ def main():
            # Converting to numpy for visuals
            im0 = im[0].permute(1, 2, 0) * 255
            im0 = im0.cpu().numpy().astype(np.uint8)
-            im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #Image has to be in BGR for visualization 
+            # Image has to be in BGR for visualization
            im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR)
            # Detecting all the objects
            results = model(im)
-            pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
+            pred = non_max_suppression(
                results, confidence, confidence, 0, False, max_det=10)
            targets = []
            for i, det in enumerate(pred):
@ -117,9 +123,11 @@ def main():
                        s += f"{n} {names[int(c)]}, "  # add to string
                    for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
            # If there are people in the center bounding box
            if len(targets) > 0:
@ -128,7 +136,8 @@ def main():
                    targets['last_mid_x'] = last_mid_coord[0]
                    targets['last_mid_y'] = last_mid_coord[1]
                    # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                    targets.sort_values(by="dist", ascending=False)
                # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -145,7 +154,8 @@ def main():
                # Moving the mouse
                if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                last_mid_coord = [xMid, yMid]
            else:
@ -159,7 +169,8 @@ def main():
                    halfH = round(targets["height"][i] / 2)
                    midX = targets['current_mid_x'][i]
                    midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
                    idx = 0
                    # draw the bounding box and label on the frame
@ -188,5 +199,6 @@ def main():
                    exit()
    camera.stop()
 if __name__ == "__main__":
    main()
--- a/main_torch_gpu.py
+++ b/main_torch_gpu.py
@ -5,12 +5,13 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
 import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 import dxcam
 def main():
    # Window title to go after and the height of the screenshots
    videoGameWindowTitle = "Counter"
@ -60,8 +61,10 @@ def main():
                         "height": screenShotHeight}
    # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    left = aaRightShift + \
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
    top = videoGameWindow.top + \
        (videoGameWindow.height - screenShotHeight) // 2
    right, bottom = left + screenShotWidth, top + screenShotHeight
    region = (left, top, right, bottom)
@ -78,7 +81,8 @@ def main():
    sTime = time.time()
    # Loading Yolo5 Small AI Model
-    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
                           pretrained=True, force_reload=True)
    stride, names, pt = model.stride, model.names, model.pt
    model.half()
@ -106,7 +110,8 @@ def main():
            results = model(im, size=screenShotHeight)
            # Suppressing results that dont meet thresholds
-            pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
+            pred = non_max_suppression(
                results, confidence, confidence, 0, False, max_det=10)
            # Converting output to usable cords
            targets = []
@ -119,9 +124,11 @@ def main():
                        s += f"{n} {names[int(c)]}, "  # add to string
                    for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
            # If there are people in the center bounding box
            if len(targets) > 0:
@ -130,7 +137,8 @@ def main():
                    targets['last_mid_x'] = last_mid_coord[0]
                    targets['last_mid_y'] = last_mid_coord[1]
                    # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                    targets.sort_values(by="dist", ascending=False)
                # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@ -147,7 +155,8 @@ def main():
                # Moving the mouse
                if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                last_mid_coord = [xMid, yMid]
            else:
@ -161,7 +170,8 @@ def main():
                    halfH = round(targets["height"][i] / 2)
                    midX = targets['current_mid_x'][i]
                    midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
                    idx = 0
                    # draw the bounding box and label on the frame
@ -190,5 +200,6 @@ def main():
                    exit()
    camera.stop()
 if __name__ == "__main__":
    main()
--- a/models/pycache/init.cpython-39.pyc
+++ b/models/pycache/init.cpython-39.pyc
--- a/models/pycache/common.cpython-310.pyc
+++ b/models/pycache/common.cpython-310.pyc
--- a/models/pycache/common.cpython-39.pyc
+++ b/models/pycache/common.cpython-39.pyc
--- a/models/pycache/experimental.cpython-310.pyc
+++ b/models/pycache/experimental.cpython-310.pyc
--- a/models/pycache/yolo.cpython-310.pyc
+++ b/models/pycache/yolo.cpython-310.pyc
--- a/models/common.py
+++ b/models/common.py
@ -10,6 +10,7 @@ import warnings
 from collections import OrderedDict, namedtuple
 from copy import copy
 from pathlib import Path
 from urllib.parse import urlparse
 import cv2
 import numpy as np
@ -22,26 +23,51 @@ from torch.cuda import amp
 from utils.dataloaders import exif_transpose, letterbox
 from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
-                           increment_path, make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh,
+                           increment_path, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh,
                           yaml_load)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import copy_attr, smart_inference_mode
-def autopad(k, p=None):  # kernel, padding
+def export_formats():
-    # Pad to 'same'
+    # YOLOv5 export formats
    x = [
        ['PyTorch', '-', '.pt', True, True],
        ['TorchScript', 'torchscript', '.torchscript', True, True],
        ['ONNX', 'onnx', '.onnx', True, True],
        ['OpenVINO', 'openvino', '_openvino_model', True, False],
        ['TensorRT', 'engine', '.engine', False, True],
        ['CoreML', 'coreml', '.mlmodel', True, False],
        ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
        ['TensorFlow GraphDef', 'pb', '.pb', True, True],
        ['TensorFlow Lite', 'tflite', '.tflite', True, False],
        ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
        ['TensorFlow.js', 'tfjs', '_web_model', False, False],
        ['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]
    return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
 def autopad(k, p=None, d=1):  # kernel, padding, dilation
    # Pad to 'same' shape outputs
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k,
                                          int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
 class Conv(nn.Module):
-    # Standard convolution
+    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    default_act = nn.SiLU()  # default activation
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        super().__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(
            k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+        self.act = self.default_act if act is True else act if isinstance(
            act, nn.Module) else nn.Identity()
    def forward(self, x):
        return self.act(self.bn(self.conv(x)))
@ -51,14 +77,16 @@ class Conv(nn.Module):
 class DWConv(Conv):
-    # Depth-wise convolution class
+    # Depth-wise convolution
-    def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    # ch_in, ch_out, kernel, stride, dilation, activation
-        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
 class DWConvTranspose2d(nn.ConvTranspose2d):
-    # Depth-wise transpose convolution class
+    # Depth-wise transpose convolution
-    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
+    # ch_in, ch_out, kernel, stride, padding, padding_out
    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
@ -87,7 +115,8 @@ class TransformerBlock(nn.Module):
        if c1 != c2:
            self.conv = Conv(c1, c2)
        self.linear = nn.Linear(c2, c2)  # learnable position embedding
-        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
+        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads)
                                for _ in range(num_layers)))
        self.c2 = c2
    def forward(self, x):
@ -100,7 +129,8 @@ class TransformerBlock(nn.Module):
 class Bottleneck(nn.Module):
    # Standard bottleneck
-    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+    # ch_in, ch_out, shortcut, groups, expansion
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
@ -113,7 +143,8 @@ class Bottleneck(nn.Module):
 class BottleneckCSP(nn.Module):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    # ch_in, ch_out, number, shortcut, groups, expansion
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
@ -122,7 +153,8 @@ class BottleneckCSP(nn.Module):
        self.cv4 = Conv(2 * c_, c2, 1, 1)
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
        self.act = nn.SiLU()
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(
            *(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
    def forward(self, x):
        y1 = self.cv3(self.m(self.cv1(x)))
@ -146,13 +178,15 @@ class CrossConv(nn.Module):
 class C3(nn.Module):
    # CSP Bottleneck with 3 convolutions
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    # ch_in, ch_out, number, shortcut, groups, expansion
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(
            *(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
@ -163,7 +197,8 @@ class C3x(C3):
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
-        self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
+        self.m = nn.Sequential(
            *(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
 class C3TR(C3):
@ -197,12 +232,14 @@ class SPP(nn.Module):
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
-        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+        self.m = nn.ModuleList(
            [nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
    def forward(self, x):
        x = self.cv1(x)
        with warnings.catch_warnings():
-            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            # suppress torch 1.9.0 max_pool2d() warning
            warnings.simplefilter('ignore')
            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
@ -218,7 +255,8 @@ class SPPF(nn.Module):
    def forward(self, x):
        x = self.cv1(x)
        with warnings.catch_warnings():
-            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            # suppress torch 1.9.0 max_pool2d() warning
            warnings.simplefilter('ignore')
            y1 = self.m(x)
            y2 = self.m(y1)
            return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
@ -226,9 +264,10 @@ class SPPF(nn.Module):
 class Focus(nn.Module):
    # Focus wh information into c-space
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    # ch_in, ch_out, kernel, stride, padding, groups
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        super().__init__()
-        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
        # self.contract = Contract(gain=2)
    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
@ -238,11 +277,12 @@ class Focus(nn.Module):
 class GhostConv(nn.Module):
    # Ghost Convolution https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+    # ch_in, ch_out, kernel, stride, groups
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
        super().__init__()
        c_ = c2 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
-        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
    def forward(self, x):
        y = self.cv1(x)
@ -310,7 +350,7 @@ class DetectMultiBackend(nn.Module):
        #   PyTorch:              weights = *.pt
        #   TorchScript:                    *.torchscript
        #   ONNX Runtime:                   *.onnx
-        #   ONNX OpenCV DNN:                *.onnx with --dnn
+        #   ONNX OpenCV DNN:                *.onnx --dnn
        #   OpenVINO:                       *.xml
        #   CoreML:                         *.mlmodel
        #   TensorRT:                       *.engine
@ -318,25 +358,35 @@ class DetectMultiBackend(nn.Module):
        #   TensorFlow GraphDef:            *.pb
        #   TensorFlow Lite:                *.tflite
        #   TensorFlow Edge TPU:            *_edgetpu.tflite
-        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
+        #   PaddlePaddle:                   *_paddle_model
        # scoped to avoid circular import
        from models.experimental import attempt_download, attempt_load
        super().__init__()
        w = str(weights[0] if isinstance(weights, list) else weights)
-        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self._model_type(w)  # get backend
+        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(
-        w = attempt_download(w)  # download if not local
+            w)
        fp16 &= pt or jit or onnx or engine  # FP16
        # BHWC formats (vs torch BCWH)
        nhwc = coreml or saved_model or pb or tflite or edgetpu
        stride = 32  # default stride
        cuda = torch.cuda.is_available() and device.type != 'cpu'  # use CUDA
        if not (pt or triton):
            w = attempt_download(w)  # download if not local
        if pt:  # PyTorch
-            model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
+            model = attempt_load(weights if isinstance(
                weights, list) else w, device=device, inplace=True, fuse=fuse)
            stride = max(int(model.stride.max()), 32)  # model stride
-            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            names = model.module.names if hasattr(
                model, 'module') else model.names  # get class names
            model.half() if fp16 else model.float()
            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
        elif jit:  # TorchScript
            LOGGER.info(f'Loading {w} for TorchScript inference...')
            extra_files = {'config.txt': ''}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files)
+            model = torch.jit.load(
                w, _extra_files=extra_files, map_location=device)
            model.half() if fp16 else model.float()
            if extra_files['config.txt']:  # load metadata dict
                d = json.loads(extra_files['config.txt'],
@ -345,14 +395,15 @@ class DetectMultiBackend(nn.Module):
                stride, names = int(d['stride']), d['names']
        elif dnn:  # ONNX OpenCV DNN
            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
-            check_requirements(('opencv-python>=4.5.4',))
+            check_requirements('opencv-python>=4.5.4')
            net = cv2.dnn.readNetFromONNX(w)
        elif onnx:  # ONNX Runtime
            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
-            cuda = torch.cuda.is_available() and device.type != 'cpu'
+            check_requirements(
-            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
+                ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
            import onnxruntime
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else [
                'CPUExecutionProvider']
            session = onnxruntime.InferenceSession(w, providers=providers)
            output_names = [x.name for x in session.get_outputs()]
            meta = session.get_modelmeta().custom_metadata_map  # metadata
@ -360,100 +411,149 @@ class DetectMultiBackend(nn.Module):
                stride, names = int(meta['stride']), eval(meta['names'])
        elif xml:  # OpenVINO
            LOGGER.info(f'Loading {w} for OpenVINO inference...')
-            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            # requires openvino-dev: https://pypi.org/project/openvino-dev/
            check_requirements('openvino')
            from openvino.runtime import Core, Layout, get_batch
            ie = Core()
            if not Path(w).is_file():  # if not *.xml
-                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
+                # get *.xml file from *_openvino_model dir
-            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
+                w = next(Path(w).glob('*.xml'))
            network = ie.read_model(
                model=w, weights=Path(w).with_suffix('.bin'))
            if network.get_parameters()[0].get_layout().empty:
                network.get_parameters()[0].set_layout(Layout("NCHW"))
            batch_dim = get_batch(network)
            if batch_dim.is_static:
                batch_size = batch_dim.get_length()
-            executable_network = ie.compile_model(network, device_name="CPU")  # device_name="MYRIAD" for Intel NCS2
+            # device_name="MYRIAD" for Intel NCS2
-            output_layer = next(iter(executable_network.outputs))
+            executable_network = ie.compile_model(network, device_name="CPU")
-            stride, names = self._load_metadata(Path(w).with_suffix('.yaml'))  # load metadata
+            stride, names = self._load_metadata(
                Path(w).with_suffix('.yaml'))  # load metadata
        elif engine:  # TensorRT
            LOGGER.info(f'Loading {w} for TensorRT inference...')
            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
-            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
+            # require tensorrt>=7.0.0
            check_version(trt.__version__, '7.0.0', hard=True)
            if device.type == 'cpu':
                device = torch.device('cuda:0')
-            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            Binding = namedtuple(
                'Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
            logger = trt.Logger(trt.Logger.INFO)
            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
                model = runtime.deserialize_cuda_engine(f.read())
            context = model.create_execution_context()
            bindings = OrderedDict()
            output_names = []
            fp16 = False  # default updated below
            dynamic = False
-            for index in range(model.num_bindings):
+            for i in range(model.num_bindings):
-                name = model.get_binding_name(index)
+                name = model.get_binding_name(i)
-                dtype = trt.nptype(model.get_binding_dtype(index))
+                dtype = trt.nptype(model.get_binding_dtype(i))
-                if model.binding_is_input(index):
+                if model.binding_is_input(i):
-                    if -1 in tuple(model.get_binding_shape(index)):  # dynamic
+                    if -1 in tuple(model.get_binding_shape(i)):  # dynamic
                        dynamic = True
-                        context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
+                        context.set_binding_shape(
                            i, tuple(model.get_profile_shape(0, i)[2]))
                    if dtype == np.float16:
                        fp16 = True
-                shape = tuple(context.get_binding_shape(index))
+                else:  # output
                    output_names.append(name)
                shape = tuple(context.get_binding_shape(i))
                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
-                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
+                bindings[name] = Binding(
-            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
+                    name, dtype, shape, im, int(im.data_ptr()))
-            batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+            binding_addrs = OrderedDict((n, d.ptr)
                                        for n, d in bindings.items())
            # if dynamic, this is instead max batch size
            batch_size = bindings['images'].shape[0]
        elif coreml:  # CoreML
            LOGGER.info(f'Loading {w} for CoreML inference...')
            import coremltools as ct
            model = ct.models.MLModel(w)
-        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
+        elif saved_model:  # TF SavedModel
            if saved_model:  # SavedModel
            LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
            import tensorflow as tf
            keras = False  # assume TF1 saved_model
-                model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
+            model = tf.keras.models.load_model(
                w) if keras else tf.saved_model.load(w)
        elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
            LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
            import tensorflow as tf
            def wrap_frozen_graph(gd, inputs, outputs):
-                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
+                x = tf.compat.v1.wrap_function(
                    lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
                ge = x.graph.as_graph_element
                return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
-                gd = tf.Graph().as_graph_def()  # graph_def
+            def gd_outputs(gd):
                name_list, input_list = [], []
                for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
                    name_list.append(node.name)
                    input_list.extend(node.input)
                return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
            gd = tf.Graph().as_graph_def()  # TF GraphDef
            with open(w, 'rb') as f:
                gd.ParseFromString(f.read())
-                frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
+            frozen_func = wrap_frozen_graph(
                gd, inputs="x:0", outputs=gd_outputs(gd))
        elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
                from tflite_runtime.interpreter import Interpreter, load_delegate
            except ImportError:
                import tensorflow as tf
                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
-                if edgetpu:  # Edge TPU https://coral.ai/software/#edgetpu-runtime
+            if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
-                    LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
+                LOGGER.info(
                    f'Loading {w} for TensorFlow Lite Edge TPU inference...')
                delegate = {
                    'Linux': 'libedgetpu.so.1',
                    'Darwin': 'libedgetpu.1.dylib',
                    'Windows': 'edgetpu.dll'}[platform.system()]
-                    interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
+                interpreter = Interpreter(model_path=w, experimental_delegates=[
-                else:  # Lite
+                                          load_delegate(delegate)])
            else:  # TFLite
                LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
                interpreter = Interpreter(model_path=w)  # load TFLite model
            interpreter.allocate_tensors()  # allocate
            input_details = interpreter.get_input_details()  # inputs
            output_details = interpreter.get_output_details()  # outputs
-            elif tfjs:
+        elif tfjs:  # TF.js
-                raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
+            raise NotImplementedError(
                'ERROR: YOLOv5 TF.js inference is not supported')
        elif paddle:  # PaddlePaddle
            LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
            check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
            import paddle.inference as pdi
            if not Path(w).is_file():  # if not *.pdmodel
                # get *.xml file from *_openvino_model dir
                w = next(Path(w).rglob('*.pdmodel'))
            weights = Path(w).with_suffix('.pdiparams')
            config = pdi.Config(str(w), str(weights))
            if cuda:
                config.enable_use_gpu(
                    memory_pool_init_size_mb=2048, device_id=0)
            predictor = pdi.create_predictor(config)
            input_handle = predictor.get_input_handle(
                predictor.get_input_names()[0])
            output_names = predictor.get_output_names()
        elif triton:  # NVIDIA Triton Inference Server
            LOGGER.info(f'Using {w} as Triton Inference Server...')
            check_requirements('tritonclient[all]')
            from utils.triton import TritonRemoteModel
            model = TritonRemoteModel(url=w)
            nhwc = model.runtime.startswith("tensorflow")
        else:
            raise NotImplementedError(f'ERROR: {w} is not a supported format')
        # class names
        if 'names' not in locals():
-            names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
+            names = yaml_load(data)['names'] if data else {
                i: f'class{i}' for i in range(999)}
        if names[0] == 'n01440764' and len(names) == 1000:  # ImageNet
-            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']  # human-readable names
+            # human-readable names
            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']
        self.__dict__.update(locals())  # assign all variables to self
@ -462,9 +562,13 @@ class DetectMultiBackend(nn.Module):
        b, ch, h, w = im.shape  # batch, channel, height, width
        if self.fp16 and im.dtype != torch.float16:
            im = im.half()  # to FP16
        if self.nhwc:
            # torch BCHW to numpy BHWC shape(1,320,192,3)
            im = im.permute(0, 2, 3, 1)
        if self.pt:  # PyTorch
-            y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
+            y = self.model(
                im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
        elif self.jit:  # TorchScript
            y = self.model(im)
        elif self.dnn:  # ONNX OpenCV DNN
@ -473,52 +577,77 @@ class DetectMultiBackend(nn.Module):
            y = self.net.forward()
        elif self.onnx:  # ONNX Runtime
            im = im.cpu().numpy()  # torch to numpy
-            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+            y = self.session.run(self.output_names, {
                                 self.session.get_inputs()[0].name: im})
        elif self.xml:  # OpenVINO
            im = im.cpu().numpy()  # FP32
-            y = self.executable_network([im])[self.output_layer]
+            y = list(self.executable_network([im]).values())
        elif self.engine:  # TensorRT
            if self.dynamic and im.shape != self.bindings['images'].shape:
-                i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output'))
+                i = self.model.get_binding_index('images')
-                self.context.set_binding_shape(i_in, im.shape)  # reshape if dynamic
+                self.context.set_binding_shape(
-                self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
+                    i, im.shape)  # reshape if dynamic
-                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
+                self.bindings['images'] = self.bindings['images']._replace(
                    shape=im.shape)
                for name in self.output_names:
                    i = self.model.get_binding_index(name)
                    self.bindings[name].data.resize_(
                        tuple(self.context.get_binding_shape(i)))
            s = self.bindings['images'].shape
            assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
            self.binding_addrs['images'] = int(im.data_ptr())
            self.context.execute_v2(list(self.binding_addrs.values()))
-            y = self.bindings['output'].data
+            y = [self.bindings[x].data for x in sorted(self.output_names)]
        elif self.coreml:  # CoreML
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = im.cpu().numpy()
            im = Image.fromarray((im[0] * 255).astype('uint8'))
            # im = im.resize((192, 320), Image.ANTIALIAS)
-            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            # coordinates are xywh normalized
            y = self.model.predict({'image': im})
            if 'confidence' in y:
-                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+                box = xywh2xyxy(y['coordinates'] *
-                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+                                [[w, h, w, h]])  # xyxy pixels
-                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
+                conf, cls = y['confidence'].max(
                    1), y['confidence'].argmax(1).astype(np.float)
                y = np.concatenate(
                    (box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
            else:
-                k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1])  # output key
+                # reversed for segmentation models (pred, proto)
-                y = y[k]  # output
+                y = list(reversed(y.values()))
        elif self.paddle:  # PaddlePaddle
            im = im.cpu().numpy().astype(np.float32)
            self.input_handle.copy_from_cpu(im)
            self.predictor.run()
            y = [self.predictor.get_output_handle(
                x).copy_to_cpu() for x in self.output_names]
        elif self.triton:  # NVIDIA Triton Inference Server
            y = self.model(im)
        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = im.cpu().numpy()
            if self.saved_model:  # SavedModel
-                y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
+                y = self.model(
                    im, training=False) if self.keras else self.model(im)
            elif self.pb:  # GraphDef
-                y = self.frozen_func(x=self.tf.constant(im)).numpy()
+                y = self.frozen_func(x=self.tf.constant(im))
            else:  # Lite or Edge TPU
-                input, output = self.input_details[0], self.output_details[0]
+                input = self.input_details[0]
-                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                # is TFLite quantized uint8 model
                int8 = input['dtype'] == np.uint8
                if int8:
                    scale, zero_point = input['quantization']
                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
                self.interpreter.set_tensor(input['index'], im)
                self.interpreter.invoke()
-                y = self.interpreter.get_tensor(output['index'])
+                y = []
                for output in self.output_details:
                    x = self.interpreter.get_tensor(output['index'])
                    if int8:
                        scale, zero_point = output['quantization']
-                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
+                        x = (x.astype(np.float32) - zero_point) * \
-            y[..., :4] *= [w, h, w, h]  # xywh normalized to pixels
+                            scale  # re-scale
                    y.append(x)
            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
            y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels
        if isinstance(y, (list, tuple)):
            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
@ -530,23 +659,27 @@ class DetectMultiBackend(nn.Module):
    def warmup(self, imgsz=(1, 3, 640, 640)):
        # Warmup model by running inference once
-        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
+        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
-        if any(warmup_types) and self.device.type != 'cpu':
+        if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
-            im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
+            im = torch.empty(
                *imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
            for _ in range(2 if self.jit else 1):  #
                self.forward(im)  # warmup
    @staticmethod
    def _model_type(p='path/to/model.pt'):
        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
-        from yolov5.export import export_formats
+        # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
-        suffixes = list(export_formats().Suffix) + ['.xml']  # export suffixes
+        from utils.downloads import is_url
-        check_suffix(p, suffixes)  # checks
+        sf = list(export_formats().Suffix)  # export suffixes
-        p = Path(p).name  # eliminate trailing separators
+        if not is_url(p, check=False):
-        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
+            check_suffix(p, sf)  # checks
-        xml |= xml2  # *_openvino_model or *.xml
+        url = urlparse(p)  # if url may be Triton inference server
-        tflite &= not edgetpu  # *.tflite
+        types = [s in Path(p).name for s in sf]
-        return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
+        types[8] &= not types[9]  # tflite &= not edgetpu
        triton = not any(types) and all(
            [any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
        return types + [triton]
    @staticmethod
    def _load_metadata(f=Path('path/to/meta.yaml')):
@ -563,7 +696,8 @@ class AutoShape(nn.Module):
    iou = 0.45  # NMS IoU threshold
    agnostic = False  # NMS class-agnostic
    multi_label = False  # NMS multiple labels per box
-    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
    classes = None
    max_det = 1000  # maximum number of detections per image
    amp = False  # Automatic Mixed Precision (AMP) inference
@ -571,19 +705,24 @@ class AutoShape(nn.Module):
        super().__init__()
        if verbose:
            LOGGER.info('Adding AutoShape... ')
-        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
+        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names',
-        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
+                  'stride', 'abc'), exclude=())  # copy attributes
        # DetectMultiBackend() instance
        self.dmb = isinstance(model, DetectMultiBackend)
        self.pt = not self.dmb or model.pt  # PyTorch model
        self.model = model.eval()
        if self.pt:
-            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            # Detect()
            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
            m.inplace = False  # Detect.inplace=False for safe multithread inference
            m.export = True  # do not output loss values
    def _apply(self, fn):
        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
        self = super()._apply(fn)
        if self.pt:
-            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            # Detect()
            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
            m.stride = fn(m.stride)
            m.grid = list(map(fn, m.grid))
            if isinstance(m.anchor_grid, list):
@ -605,40 +744,52 @@ class AutoShape(nn.Module):
        with dt[0]:
            if isinstance(size, int):  # expand
                size = (size, size)
-            p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # param
+            p = next(self.model.parameters()) if self.pt else torch.empty(
-            autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
+                1, device=self.model.device)  # param
            # Automatic Mixed Precision (AMP) inference
            autocast = self.amp and (p.device.type != 'cpu')
            if isinstance(ims, torch.Tensor):  # torch
                with amp.autocast(autocast):
-                    return self.model(ims.to(p.device).type_as(p), augment, profile)  # inference
+                    # inference
                    return self.model(ims.to(p.device).type_as(p), augment=augment)
            # Pre-process
-            n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
+            n, ims = (len(ims), list(ims)) if isinstance(
                ims, (list, tuple)) else (1, [ims])  # number, list of images
            shape0, shape1, files = [], [], []  # image and inference shapes, filenames
            for i, im in enumerate(ims):
                f = f'image{i}'  # filename
                if isinstance(im, (str, Path)):  # filename or uri
-                    im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
+                    im, f = Image.open(requests.get(im, stream=True).raw if str(
                        im).startswith('http') else im), im
                    im = np.asarray(exif_transpose(im))
                elif isinstance(im, Image.Image):  # PIL Image
-                    im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
+                    im, f = np.asarray(exif_transpose(im)), getattr(
                        im, 'filename', f) or f
                files.append(Path(f).with_suffix('.jpg').name)
                if im.shape[0] < 5:  # image in CHW
-                    im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+                    # reverse dataloader .transpose(2, 0, 1)
-                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
+                    im = im.transpose((1, 2, 0))
                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(
                    im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
                s = im.shape[:2]  # HWC
                shape0.append(s)  # image shape
                g = max(size) / max(s)  # gain
                shape1.append([y * g for y in s])
-                ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
+                ims[i] = im if im.data.contiguous else np.ascontiguousarray(
-            shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size  # inf shape
+                    im)  # update
            shape1 = [make_divisible(x, self.stride) for x in np.array(
                shape1).max(0)] if self.pt else size  # inf shape
            x = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad
-            x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHW
+            x = np.ascontiguousarray(np.array(x).transpose(
-            x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
+                (0, 3, 1, 2)))  # stack and BHWC to BCHW
            x = torch.from_numpy(x).to(p.device).type_as(
                p) / 255  # uint8 to fp16/32
        with amp.autocast(autocast):
            # Inference
            with dt[1]:
-                y = self.model(x, augment, profile)  # forward
+                y = self.model(x, augment=augment)  # forward
            # Post-process
            with dt[2]:
@ -650,7 +801,7 @@ class AutoShape(nn.Module):
                                        self.multi_label,
                                        max_det=self.max_det)  # NMS
                for i in range(n):
-                    scale_coords(shape1, y[i][:, :4], shape0[i])
+                    scale_boxes(shape1, y[i][:, :4], shape0[i])
            return Detections(ims, y, files, dt, self.names, x.shape)
@ -660,7 +811,8 @@ class Detections:
    def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
        super().__init__()
        d = pred[0].device  # device
-        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizations
+        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d)
              for im in ims]  # normalizations
        self.ims = ims  # list of images as numpy arrays
        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
        self.names = names  # class names
@ -672,22 +824,28 @@ class Detections:
        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
        self.n = len(self.pred)  # number of images (batch size)
        self.t = tuple(x.t / self.n * 1E3 for x in times)  # timestamps (ms)
-        self.s = shape  # inference BCHW shape
+        self.s = tuple(shape)  # inference BCHW shape
-    def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
+    def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
-        crops = []
+        s, crops = '', []
        for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
-            s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
+            # string
            s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
            if pred.shape[0]:
                for c in pred[:, -1].unique():
                    n = (pred[:, -1] == c).sum()  # detections per class
-                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                    # add to string
                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
                s = s.rstrip(', ')
                if show or save or render or crop:
                    annotator = Annotator(im, example=str(self.names))
-                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
+                    # xyxy, confidence, class
                    for *box, conf, cls in reversed(pred):
                        label = f'{self.names[int(cls)]} {conf:.2f}'
                        if crop:
-                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
+                            file = save_dir / 'crops' / \
                                self.names[int(cls)] / \
                                self.files[i] if save else None
                            crops.append({
                                'box': box,
                                'conf': conf,
@ -695,45 +853,48 @@ class Detections:
                                'label': label,
                                'im': save_one_box(box, im, file=file, save=save)})
                        else:  # all others
-                            annotator.box_label(box, label if labels else '', color=colors(cls))
+                            annotator.box_label(
                                box, label if labels else '', color=colors(cls))
                    im = annotator.im
            else:
                s += '(no detections)'
-            im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
+            im = Image.fromarray(im.astype(np.uint8)) if isinstance(
-            if pprint:
+                im, np.ndarray) else im  # from np
                print(s.rstrip(', '))
            if show:
                im.show(self.files[i])  # show
            if save:
                f = self.files[i]
                im.save(save_dir / f)  # save
                if i == self.n - 1:
-                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
+                    LOGGER.info(
                        f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
            if render:
                self.ims[i] = np.asarray(im)
        if pprint:
            s = s.lstrip('\n')
            return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
        if crop:
            if save:
                LOGGER.info(f'Saved results to {save_dir}\n')
            return crops
    def print(self):
        self.display(pprint=True)  # print results
        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
    def show(self, labels=True):
-        self.display(show=True, labels=labels)  # show results
+        self._run(show=True, labels=labels)  # show results
    def save(self, labels=True, save_dir='runs/detect/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
+        save_dir = increment_path(
-        self.display(save=True, labels=labels, save_dir=save_dir)  # save results
+            save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
        self._run(save=True, labels=labels, save_dir=save_dir)  # save results
    def crop(self, save=True, save_dir='runs/detect/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
+        save_dir = increment_path(
-        return self.display(crop=True, save=save, save_dir=save_dir)  # crop results
+            save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
        # crop results
        return self._run(crop=True, save=save, save_dir=save_dir)
    def render(self, labels=True):
-        self.display(render=True, labels=labels)  # render results
+        self._run(render=True, labels=labels)  # render results
        return self.ims
    def pandas(self):
@ -742,30 +903,51 @@ class Detections:
        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
-            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
+            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]]
                  for x in x.tolist()] for x in getattr(self, k)]  # update
            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
        return new
    def tolist(self):
        # return a list of Detections objects, i.e. 'for result in results.tolist():'
        r = range(self.n)  # iterable
-        x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
+        x = [Detections([self.ims[i]], [self.pred[i]], [
                        self.files[i]], self.times, self.names, self.s) for i in r]
        # for d in x:
        #    for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
        #        setattr(d, k, getattr(d, k)[0])  # pop out of list
        return x
-    def __len__(self):
+    def print(self):
-        return self.n  # override len(results)
+        LOGGER.info(self.__str__())
-    def __str__(self):
+    def __len__(self):  # override len(results)
-        self.print()  # override print(results)
+        return self.n
-        return ''
+
    def __str__(self):  # override print(results)
        return self._run(pprint=True)  # print results
    def __repr__(self):
        return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
 class Proto(nn.Module):
    # YOLOv5 mask Proto module for segmentation models
    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
        super().__init__()
        self.cv1 = Conv(c1, c_, k=3)
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.cv2 = Conv(c_, c_, k=3)
        self.cv3 = Conv(c_, c2)
    def forward(self, x):
        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
 class Classify(nn.Module):
-    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+    # ch_in, ch_out, kernel, stride, padding, groups
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
        super().__init__()
        c_ = 1280  # efficientnet_b0 size
        self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
--- a/models/hub/yolov5s-LeakyReLU.yaml
+++ b/models/hub/yolov5s-LeakyReLU.yaml
@ -0,0 +1,49 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 activation: nn.LeakyReLU(0.1)  # <----- Conv() activation used throughout entire YOLOv5 model
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/models/segment/yolov5l-seg.yaml
+++ b/models/segment/yolov5l-seg.yaml
@ -0,0 +1,48 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
  ]
--- a/models/segment/yolov5m-seg.yaml
+++ b/models/segment/yolov5m-seg.yaml
@ -0,0 +1,48 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
  ]
--- a/models/segment/yolov5n-seg.yaml
+++ b/models/segment/yolov5n-seg.yaml
@ -0,0 +1,48 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.25  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
  ]
--- a/models/segment/yolov5s-seg.yaml
+++ b/models/segment/yolov5s-seg.yaml
@ -0,0 +1,48 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.5  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
  ]
--- a/models/segment/yolov5x-seg.yaml
+++ b/models/segment/yolov5x-seg.yaml
@ -0,0 +1,48 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple
 anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]
 # YOLOv5 v6.0 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
  ]
--- a/models/tf.py
+++ b/models/tf.py
@ -30,7 +30,7 @@ from tensorflow import keras
 from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
                           DWConvTranspose2d, Focus, autopad)
 from models.experimental import MixConv2d, attempt_load
-from models.yolo import Detect
+from models.yolo import Detect, Segment
 from utils.activations import SiLU
 from utils.general import LOGGER, make_divisible, print_args
@ -299,18 +299,18 @@ class TFDetect(keras.layers.Layer):
            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
            if not self.training:  # inference
-                y = tf.sigmoid(x[i])
+                y = x[i]
                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
-                xy = (y[..., 0:2] * 2 + grid) * self.stride[i]  # xy
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
-                wh = y[..., 2:4] ** 2 * anchor_grid
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
                # Normalize xywh to 0-1 to reduce calibration error
                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
-                y = tf.concat([xy, wh, y[..., 4:]], -1)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
-        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x)
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
    @staticmethod
    def _make_grid(nx=20, ny=20):
@ -320,6 +320,37 @@ class TFDetect(keras.layers.Layer):
        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
 class TFSegment(TFDetect):
    # YOLOv5 Segment head for segmentation models
    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
        super().__init__(nc, anchors, ch, imgsz, w)
        self.nm = nm  # number of masks
        self.npr = npr  # number of protos
        self.no = 5 + nc + self.nm  # number of outputs per anchor
        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
        self.detect = TFDetect.call
    def call(self, x):
        p = self.proto(x[0])
        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
        x = self.detect(self, x)
        return (x, p) if self.training else (x[0], p)
 class TFProto(keras.layers.Layer):
    def __init__(self, c1, c_=256, c2=32, w=None):
        super().__init__()
        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
        self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
        self.cv3 = TFConv(c_, c2, w=w.cv3)
    def call(self, inputs):
        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
 class TFUpsample(keras.layers.Layer):
    # TF version of torch.nn.Upsample()
    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
@ -377,10 +408,12 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
-        elif m is Detect:
+        elif m in [Detect, Segment]:
            args.append([ch[x + 1] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
            if m is Segment:
                args[3] = make_divisible(args[3] * gw, 8)
            args.append(imgsz)
        else:
            c2 = ch[f]
@ -452,9 +485,9 @@ class TFModel:
                                                            iou_thres,
                                                            conf_thres,
                                                            clip_boxes=False)
-            return nms, x[1]
+            return (nms,)
-        return x[0]  # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
-        # x = x[0][0]  # [x(1,6300,85), ...] to x(6300,85)
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
        # xywh = x[..., :4]  # x(6300,4) boxes
        # conf = x[..., 4:5]  # x(6300,1) confidences
        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
--- a/models/yolo.py
+++ b/models/yolo.py
@ -36,6 +36,7 @@ except ImportError:
 class Detect(nn.Module):
    # YOLOv5 Detect head for detection models
    stride = None  # strides computed during build
    dynamic = False  # force grid reconstruction
    export = False  # export mode
@ -46,8 +47,8 @@ class Detect(nn.Module):
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid = [torch.empty(1)] * self.nl  # init grid
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
-        self.anchor_grid = [torch.empty(1)] * self.nl  # init anchor grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
@ -63,16 +64,17 @@ class Detect(nn.Module):
                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
-                y = x[i].sigmoid()
+                if isinstance(self, Segment):  # (boxes + masks)
-                if self.inplace:
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
-                    y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
-                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
-                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
-                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                else:  # Detect (boxes only)
                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, conf), 4)
-                z.append(y.view(bs, -1, self.no))
+                z.append(y.view(bs, self.na * nx * ny, self.no))
        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
@ -87,6 +89,23 @@ class Detect(nn.Module):
        return grid, anchor_grid
 class Segment(Detect):
    # YOLOv5 Segment head for segmentation models
    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
        super().__init__(nc, anchors, ch, inplace)
        self.nm = nm  # number of masks
        self.npr = npr  # number of protos
        self.no = 5 + nc + self.nm  # number of outputs per anchor
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
        self.detect = Detect.forward
    def forward(self, x):
        p = self.proto(x[0])
        x = self.detect(self, x)
        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
 class BaseModel(nn.Module):
    # YOLOv5 base model
    def forward(self, x, profile=False, visualize=False):
@ -135,7 +154,7 @@ class BaseModel(nn.Module):
        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
        self = super()._apply(fn)
        m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
            m.stride = fn(m.stride)
            m.grid = list(map(fn, m.grid))
            if isinstance(m.anchor_grid, list):
@ -169,11 +188,12 @@ class DetectionModel(BaseModel):
        # Build strides, anchors
        m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
            s = 256  # 2x min stride
            m.inplace = self.inplace
-            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))])  # forward
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
-            check_anchor_order(m)  # must be in pixel-space (not grid-space)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
            check_anchor_order(m)
            m.anchors /= m.stride.view(-1, 1, 1)
            self.stride = m.stride
            self._initialize_biases()  # only run once
@ -235,15 +255,21 @@ class DetectionModel(BaseModel):
        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
        m = self.model[-1]  # Detect() module
        for mi, s in zip(m.m, m.stride):  # from
-            b = mi.bias.view(m.na, -1).detach()  # conv.bias(255) to (3,85)
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
-            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())  # cls
            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
 class SegmentationModel(DetectionModel):
    # YOLOv5 segmentation model
    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
        super().__init__(cfg, ch, nc, anchors)
 class ClassificationModel(BaseModel):
    # YOLOv5 classification model
    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
@ -271,8 +297,12 @@ class ClassificationModel(BaseModel):
 def parse_model(d, ch):  # model_dict, input_channels(3)
    # Parse a YOLOv5 model.yaml dictionary
    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
-    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
    if act:
        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@ -284,24 +314,28 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+        if m in {
-                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
+                Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
                BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
            c1, c2 = ch[f], args[0]
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)
            args = [c1, c2, *args[1:]]
-            if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
                args.insert(2, n)  # number of repeats
                n = 1
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[x] for x in f)
-        elif m is Detect:
+        # TODO: channel, gw, gd
        elif m in {Detect, Segment}:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
            if m is Segment:
                args[3] = make_divisible(args[3] * gw, 8)
        elif m is Contract:
            c2 = ch[f] * args[0] ** 2
        elif m is Expand:
--- a/utils/init.py
+++ b/utils/init.py
@ -4,9 +4,15 @@ utils/initialization
 """
 import contextlib
 import platform
 import threading
 def emojis(str=''):
    # Return platform-dependent emoji-safe version of string
    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
 class TryExcept(contextlib.ContextDecorator):
    # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
    def __init__(self, msg=''):
@ -17,7 +23,7 @@ class TryExcept(contextlib.ContextDecorator):
    def __exit__(self, exc_type, value, traceback):
        if value:
-            print(f'{self.msg}{value}')
+            print(emojis(f'{self.msg}{value}'))
        return True
@ -38,7 +44,7 @@ def notebook_init(verbose=True):
    import os
    import shutil
-    from utils.general import check_font, check_requirements, emojis, is_colab
+    from utils.general import check_font, check_requirements, is_colab
    from utils.torch_utils import select_device  # imports
    check_requirements(('psutil', 'IPython'))
--- a/utils/pycache/init.cpython-310.pyc
+++ b/utils/pycache/init.cpython-310.pyc
--- a/utils/pycache/init.cpython-39.pyc
+++ b/utils/pycache/init.cpython-39.pyc
--- a/utils/pycache/augmentations.cpython-310.pyc
+++ b/utils/pycache/augmentations.cpython-310.pyc
--- a/utils/pycache/augmentations.cpython-39.pyc
+++ b/utils/pycache/augmentations.cpython-39.pyc
--- a/utils/pycache/autoanchor.cpython-310.pyc
+++ b/utils/pycache/autoanchor.cpython-310.pyc
--- a/utils/pycache/autoanchor.cpython-39.pyc
+++ b/utils/pycache/autoanchor.cpython-39.pyc
--- a/utils/pycache/dataloaders.cpython-310.pyc
+++ b/utils/pycache/dataloaders.cpython-310.pyc
--- a/utils/pycache/dataloaders.cpython-39.pyc
+++ b/utils/pycache/dataloaders.cpython-39.pyc
--- a/utils/pycache/downloads.cpython-310.pyc
+++ b/utils/pycache/downloads.cpython-310.pyc
--- a/utils/pycache/downloads.cpython-39.pyc
+++ b/utils/pycache/downloads.cpython-39.pyc
--- a/utils/pycache/general.cpython-310.pyc
+++ b/utils/pycache/general.cpython-310.pyc
--- a/utils/pycache/general.cpython-39.pyc
+++ b/utils/pycache/general.cpython-39.pyc
--- a/utils/pycache/metrics.cpython-310.pyc
+++ b/utils/pycache/metrics.cpython-310.pyc
--- a/utils/pycache/metrics.cpython-39.pyc
+++ b/utils/pycache/metrics.cpython-39.pyc
--- a/utils/pycache/plots.cpython-310.pyc
+++ b/utils/pycache/plots.cpython-310.pyc
--- a/utils/pycache/plots.cpython-39.pyc
+++ b/utils/pycache/plots.cpython-39.pyc
--- a/utils/pycache/torch_utils.cpython-310.pyc
+++ b/utils/pycache/torch_utils.cpython-310.pyc
--- a/utils/pycache/torch_utils.cpython-39.pyc
+++ b/utils/pycache/torch_utils.cpython-39.pyc
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@ -12,7 +12,7 @@ import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF
-from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box
+from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
 from utils.metrics import bbox_ioa
 IMAGENET_MEAN = 0.485, 0.456, 0.406  # RGB mean
@ -21,7 +21,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
 class Albumentations:
    # YOLOv5 Albumentations class (optional, only used if package is installed)
-    def __init__(self):
+    def __init__(self, size=640):
        self.transform = None
        prefix = colorstr('albumentations: ')
        try:
@ -29,6 +29,7 @@ class Albumentations:
            check_version(A.__version__, '1.0.3', hard=True)  # version requirement
            T = [
                A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
                A.Blur(p=0.01),
                A.MedianBlur(p=0.01),
                A.ToGray(p=0.01),
@ -281,7 +282,7 @@ def cutout(im, labels, p=0.5):
            # return unobscured labels
            if len(labels) and s > 0.03:
                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
-                ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+                ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))  # intersection over area
                labels = labels[ioa < 0.60]  # remove >60% obscured labels
    return labels
@ -303,9 +304,11 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
-def classify_albumentations(augment=True,
+def classify_albumentations(
        augment=True,
        size=224,
        scale=(0.08, 1.0),
        ratio=(0.75, 1.0 / 0.75),  # 0.75, 1.33
        hflip=0.5,
        vflip=0.0,
        jitter=0.4,
@ -319,7 +322,7 @@ def classify_albumentations(augment=True,
        from albumentations.pytorch import ToTensorV2
        check_version(A.__version__, '1.0.3', hard=True)  # version requirement
        if augment:  # Resize and crop
-            T = [A.RandomResizedCrop(height=size, width=size, scale=scale)]
+            T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
            if auto_aug:
                # TODO: implement AugMix, AutoAug & RandAug in albumentation
                LOGGER.info(f'{prefix}auto augmentations are currently not supported')
@ -338,7 +341,7 @@ def classify_albumentations(augment=True,
        return A.Compose(T)
    except ImportError:  # package not installed, skip
-        pass
+        LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
    except Exception as e:
        LOGGER.info(f'{prefix}{e}')
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@ -122,7 +122,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
    # Filter
    i = (wh0 < 3.0).any(1).sum()
    if i:
-        LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size')
+        LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
    wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32)  # filter > 2 pixels
    # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
@ -134,7 +134,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
        k = kmeans(wh / s, n, iter=30)[0] * s  # points
        assert n == len(k)  # kmeans may return fewer points than requested if wh is insufficient or too similar
    except Exception:
-        LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init')
+        LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
        k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
    wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
    k = print_results(k, verbose=False)
--- a/utils/autobatch.py
+++ b/utils/autobatch.py
@ -19,7 +19,7 @@ def check_train_batch_size(model, imgsz=640, amp=True):
 def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
-    # Automatically estimate best batch size to use `fraction` of available CUDA memory
+    # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
    # Usage:
    #     import torch
    #     from utils.autobatch import autobatch
@ -33,6 +33,9 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
    if device.type == 'cpu':
        LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
        return batch_size
    if torch.backends.cudnn.benchmark:
        LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
        return batch_size
    # Inspect CUDA memory
    gb = 1 << 30  # bytes to GiB (1024 ** 3)
@ -62,8 +65,8 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
            b = batch_sizes[max(i - 1, 0)]  # select prior safe point
    if b < 1 or b > 1024:  # b outside of safe range
        b = batch_size
-        LOGGER.warning(f'{prefix}WARNING: ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
+        LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
-    fraction = np.polyval(p, b) / t  # actual fraction predicted
+    fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
    LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
    return b
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@ -40,6 +40,7 @@ IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp',
 VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
 BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}'  # tqdm bar format
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true'  # global pin_memory for dataloaders
 # Get orientation exif tag
@ -116,7 +117,7 @@ def create_dataloader(path,
                      prefix='',
                      shuffle=False):
    if rect and shuffle:
-        LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
        shuffle = False
    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
        dataset = LoadImagesAndLabels(
@ -139,7 +140,7 @@ def create_dataloader(path,
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
    generator = torch.Generator()
-    generator.manual_seed(0)
+    generator.manual_seed(6148914691236517205 + RANK)
    return loader(dataset,
                  batch_size=batch_size,
                  shuffle=shuffle and sampler is None,
@ -185,6 +186,55 @@ class _RepeatSampler:
            yield from iter(self.sampler)
 class LoadScreenshots:
    # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
    def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
        # source = [screen_number left top width height] (pixels)
        check_requirements('mss')
        import mss
        source, *params = source.split()
        self.screen, left, top, width, height = 0, None, None, None, None  # default to full screen 0
        if len(params) == 1:
            self.screen = int(params[0])
        elif len(params) == 4:
            left, top, width, height = (int(x) for x in params)
        elif len(params) == 5:
            self.screen, left, top, width, height = (int(x) for x in params)
        self.img_size = img_size
        self.stride = stride
        self.transforms = transforms
        self.auto = auto
        self.mode = 'stream'
        self.frame = 0
        self.sct = mss.mss()
        # Parse monitor shape
        monitor = self.sct.monitors[self.screen]
        self.top = monitor["top"] if top is None else (monitor["top"] + top)
        self.left = monitor["left"] if left is None else (monitor["left"] + left)
        self.width = width or monitor["width"]
        self.height = height or monitor["height"]
        self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
    def __iter__(self):
        return self
    def __next__(self):
        # mss screen capture: get raw pixels from the screen as np array
        im0 = np.array(self.sct.grab(self.monitor))[:, :, :3]  # [:, :, :3] BGRA to BGR
        s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
        if self.transforms:
            im = self.transforms(im0)  # transforms
        else:
            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
            im = np.ascontiguousarray(im)  # contiguous
        self.frame += 1
        return str(self.screen), im, im0, None, s  # screen, img, original img, im0s, s
 class LoadImages:
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
@ -232,8 +282,9 @@ class LoadImages:
        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
-            ret_val, im0 = self.cap.read()
+            for _ in range(self.vid_stride):
-            self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1))  # read at vid_stride
+                self.cap.grab()
            ret_val, im0 = self.cap.retrieve()
            while not ret_val:
                self.count += 1
                self.cap.release()
@ -328,7 +379,7 @@ class LoadStreams:
        self.auto = auto and self.rect
        self.transforms = transforms  # optional
        if not self.rect:
-            LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
+            LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
    def update(self, i, cap, stream):
        # Read stream `i` frames in daemon thread
@ -341,7 +392,7 @@ class LoadStreams:
                if success:
                    self.imgs[i] = im
                else:
-                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
+                    LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
                    self.imgs[i] = np.zeros_like(self.imgs[i])
                    cap.open(stream)  # re-open stream if signal was lost
            time.sleep(0.0)  # wait time
@ -403,7 +454,7 @@ class LoadImagesAndLabels(Dataset):
        self.mosaic_border = [-img_size // 2, -img_size // 2]
        self.stride = stride
        self.path = path
-        self.albumentations = Albumentations() if augment else None
+        self.albumentations = Albumentations(size=img_size) if augment else None
        try:
            f = []  # image files
@ -455,7 +506,7 @@ class LoadImagesAndLabels(Dataset):
        self.im_files = list(cache.keys())  # update
        self.label_files = img2label_paths(cache.keys())  # update
        n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(int)  # batch index
        nb = bi[-1] + 1  # number of batches
        self.batch = bi  # batch index of image
        self.n = n
@ -484,6 +535,7 @@ class LoadImagesAndLabels(Dataset):
            self.im_files = [self.im_files[i] for i in irect]
            self.label_files = [self.label_files[i] for i in irect]
            self.labels = [self.labels[i] for i in irect]
            self.segments = [self.segments[i] for i in irect]
            self.shapes = s[irect]  # wh
            ar = ar[irect]
@ -497,7 +549,7 @@ class LoadImagesAndLabels(Dataset):
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
        # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
        self.ims = [None] * n
@ -542,7 +594,7 @@ class LoadImagesAndLabels(Dataset):
        if msgs:
            LOGGER.info('\n'.join(msgs))
        if nf == 0:
-            LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}')
+            LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
        x['hash'] = get_hash(self.label_files + self.im_files)
        x['results'] = nf, nm, ne, nc, len(self.im_files)
        x['msgs'] = msgs  # warnings
@ -552,7 +604,7 @@ class LoadImagesAndLabels(Dataset):
            path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
            LOGGER.info(f'{prefix}New cache created: {path}')
        except Exception as e:
-            LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # not writeable
+            LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}')  # not writeable
        return x
    def __len__(self):
@ -867,7 +919,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'):  # from utils.dataloaders impo
                    b = x[1:] * [w, h, w, h]  # box
                    # b[2:] = b[2:].max()  # rectangle to square
                    b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
@ -916,7 +968,7 @@ def verify_image_label(args):
                f.seek(-2, 2)
                if f.read() != b'\xff\xd9':  # corrupt JPEG
                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
-                    msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
        # verify labels
        if os.path.isfile(lb_file):
@ -938,7 +990,7 @@ def verify_image_label(args):
                    lb = lb[i]  # remove duplicates
                    if segments:
                        segments = [segments[x] for x in i]
-                    msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
            else:
                ne = 1  # label empty
                lb = np.zeros((0, 5), dtype=np.float32)
@ -948,7 +1000,7 @@ def verify_image_label(args):
        return im_file, lb, shape, segments, nm, nf, ne, nc, msg
    except Exception as e:
        nc = 1
-        msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
+        msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
        return [None, None, None, None, nm, nf, ne, nc, msg]
@ -1011,7 +1063,7 @@ class HUBDatasetStats():
                im = im.resize((int(im.width * r), int(im.height * r)))
            im.save(f_new, 'JPEG', quality=50, optimize=True)  # save
        except Exception as e:  # use OpenCV
-            print(f'WARNING: HUB ops PIL failure {f}: {e}')
+            LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
            im = cv2.imread(f)
            im_height, im_width = im.shape[:2]
            r = max_dim / max(im_height, im_width)  # ratio
@ -1118,7 +1170,7 @@ def create_classification_dataloader(path,
    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
    generator = torch.Generator()
-    generator.manual_seed(0)
+    generator.manual_seed(6148914691236517205 + RANK)
    return InfiniteDataLoader(dataset,
                              batch_size=batch_size,
                              shuffle=shuffle and sampler is None,
--- a/utils/docker/Dockerfile
+++ b/utils/docker/Dockerfile
@ -3,7 +3,7 @@
 # Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
 # Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
-FROM nvcr.io/nvidia/pytorch:22.07-py3
+FROM nvcr.io/nvidia/pytorch:22.08-py3
 RUN rm -rf /opt/pytorch  # remove 1.2GB dir
 # Downloads to user config dir
--- a/utils/downloads.py
+++ b/utils/downloads.py
@ -16,13 +16,13 @@ import requests
 import torch
-def is_url(url, check_online=True):
+def is_url(url, check=True):
-    # Check if online file exists
+    # Check if string is URL and check if URL exists
    try:
        url = str(url)
        result = urllib.parse.urlparse(url)
        assert all([result.scheme, result.netloc, result.path])  # check if is url
-        return (urllib.request.urlopen(url).getcode() == 200) if check_online else True  # check if exists online
+        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
    except (AssertionError, urllib.request.HTTPError):
        return False
@ -87,9 +87,7 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
            return file
        # GitHub assets
-        assets = [
+        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
            'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt',
            'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
        try:
            tag, assets = github_assets(repo, release)
        except Exception:
@ -107,7 +105,6 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
            safe_download(
                file,
                url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
                url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}',  # backup url (optional)
                min_bytes=1E5,
                error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
--- a/utils/general.py
+++ b/utils/general.py
@ -17,6 +17,7 @@ import signal
 import sys
 import time
 import urllib
 from copy import deepcopy
 from datetime import datetime
 from itertools import repeat
 from multiprocessing.pool import ThreadPool
@ -33,7 +34,7 @@ import torch
 import torchvision
 import yaml
-from utils import TryExcept
+from utils import TryExcept, emojis
 from utils.downloads import gsutil_getsize
 from utils.metrics import box_iou, fitness
@ -42,8 +43,8 @@ ROOT = FILE.parents[1]  # YOLOv5 root directory
 RANK = int(os.getenv('RANK', -1))
 # Settings
 DATASETS_DIR = ROOT.parent / 'datasets'  # YOLOv5 datasets directory
 NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiprocessing threads
 DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets'))  # global datasets directory
 AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true'  # global auto-install mode
 VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true'  # global verbose mode
 FONT = 'Arial.ttf'  # https://ultralytics.com/assets/Arial.ttf
@ -222,7 +223,7 @@ def init_seeds(seed=0, deterministic=False):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # for Multi-GPU, exception safe
-    torch.backends.cudnn.benchmark = True  # for faster training
+    # torch.backends.cudnn.benchmark = True  # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
    if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
        torch.use_deterministic_algorithms(True)
        torch.backends.cudnn.deterministic = True
@ -247,11 +248,6 @@ def get_latest_run(search_dir='.'):
    return max(last_list, key=os.path.getctime) if last_list else ''
 def emojis(str=''):
    # Return platform-dependent emoji-safe version of string
    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
 def file_age(path=__file__):
    # Return days since last file update
    dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime))  # delta
@ -332,7 +328,7 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
    # Check version vs. required version
    current, minimum = (pkg.parse_version(x) for x in (current, minimum))
    result = (current == minimum) if pinned else (current >= minimum)  # bool
-    s = f'WARNING: ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed'  # string
+    s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed'  # string
    if hard:
        assert result, emojis(s)  # assert min requirements met
    if verbose and not result:
@ -341,40 +337,38 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
@TryExcept()
-def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()):
+def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
-    # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages)
+    # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
    prefix = colorstr('red', 'bold', 'requirements:')
    check_python()  # check python version
-    if isinstance(requirements, (str, Path)):  # requirements.txt file
+    if isinstance(requirements, Path):  # requirements.txt file
-        file = Path(requirements)
+        file = requirements.resolve()
-        assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
+        assert file.exists(), f"{prefix} {file} not found, check failed."
        with file.open() as f:
            requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
-    else:  # list or tuple of packages
+    elif isinstance(requirements, str):
-        requirements = [x for x in requirements if x not in exclude]
+        requirements = [requirements]
-    n = 0  # number of packages updates
+    s = ''
-    for i, r in enumerate(requirements):
+    n = 0
    for r in requirements:
        try:
            pkg.require(r)
-        except Exception:  # DistributionNotFound or VersionConflict if requirements not met
+        except (pkg.VersionConflict, pkg.DistributionNotFound):  # exception if requirements not met
-            s = f"{prefix} {r} not found and is required by YOLOv5"
+            s += f'"{r}" '
            if install and AUTOINSTALL:  # check environment variable
                LOGGER.info(f"{s}, attempting auto-update...")
                try:
                    assert check_online(), f"'pip install {r}' skipped (offline)"
                    LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
            n += 1
                except Exception as e:
                    LOGGER.warning(f'{prefix} {e}')
            else:
                LOGGER.info(f'{s}. Please install and rerun your command.')
-    if n:  # if packages updated
+    if s and install and AUTOINSTALL:  # check environment variable
-        source = file.resolve() if 'file' in locals() else requirements
+        LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
        try:
            assert check_online(), "AutoUpdate skipped (offline)"
            LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
            source = file if 'file' in locals() else requirements
            s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
                f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
            LOGGER.info(s)
        except Exception as e:
            LOGGER.warning(f'{prefix} ❌ {e}')
 def check_img_size(imgsz, s=32, floor=0):
@ -385,7 +379,7 @@ def check_img_size(imgsz, s=32, floor=0):
        imgsz = list(imgsz)  # convert to list if tuple
        new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
    if new_size != imgsz:
-        LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
+        LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
    return new_size
@ -400,7 +394,7 @@ def check_imshow():
        cv2.waitKey(1)
        return True
    except Exception as e:
-        LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
+        LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
        return False
@ -470,8 +464,7 @@ def check_dataset(data, autodownload=True):
    # Read yaml (optional)
    if isinstance(data, (str, Path)):
-        with open(data, errors='ignore') as f:
+        data = yaml_load(data)  # dictionary
            data = yaml.safe_load(f)  # dictionary
    # Checks
    for k in 'train', 'val', 'names':
@ -486,7 +479,13 @@ def check_dataset(data, autodownload=True):
        path = (ROOT / path).resolve()
    for k in 'train', 'val', 'test':
        if data.get(k):  # prepend path
-            data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
+            if isinstance(data[k], str):
                x = (path / data[k]).resolve()
                if not x.exists() and data[k].startswith('../'):
                    x = (path / data[k][3:]).resolve()
                data[k] = str(x)
            else:
                data[k] = [str((path / x).resolve()) for x in data[k]]
    # Parse yaml
    train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
@ -497,13 +496,12 @@ def check_dataset(data, autodownload=True):
            if not s or not autodownload:
                raise Exception('Dataset not found ❌')
            t = time.time()
            root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
            if s.startswith('http') and s.endswith('.zip'):  # URL
                f = Path(s).name  # filename
                LOGGER.info(f'Downloading {s} to {f}...')
                torch.hub.download_url_to_file(s, f)
-                Path(root).mkdir(parents=True, exist_ok=True)  # create root
+                Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
-                ZipFile(f).extractall(path=root)  # unzip
+                ZipFile(f).extractall(path=DATASETS_DIR)  # unzip
                Path(f).unlink()  # remove zip
                r = None  # success
            elif s.startswith('bash '):  # bash script
@ -512,7 +510,7 @@ def check_dataset(data, autodownload=True):
            else:  # python script
                r = exec(s, {'yaml': data})  # return None
            dt = f'({round(time.time() - t, 1)}s)'
-            s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
+            s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
            LOGGER.info(f"Dataset download {s}")
    check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True)  # download fonts
    return data  # dictionary
@ -537,7 +535,7 @@ def check_amp(model):
    f = ROOT / 'data' / 'images' / 'bus.jpg'  # image to check
    im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
    try:
-        assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
+        assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
        LOGGER.info(f'{prefix}checks passed ✅')
        return True
    except Exception:
@ -569,10 +567,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
    def download_one(url, dir):
        # Download 1 file
        success = True
-        f = dir / Path(url).name  # filename
+        if Path(url).is_file():
-        if Path(url).is_file():  # exists in current path
+            f = Path(url)  # filename
-            Path(url).rename(f)  # move to dir
+        else:  # does not exist
-        elif not f.exists():
+            f = dir / Path(url).name
            LOGGER.info(f'Downloading {url} to {f}...')
            for i in range(retry + 1):
                if curl:
@ -586,9 +584,9 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
                if success:
                    break
                elif i < retry:
-                    LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...')
+                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
                else:
-                    LOGGER.warning(f'Failed to download {url}...')
+                    LOGGER.warning(f'❌ Failed to download {url}...')
        if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
            LOGGER.info(f'Unzipping {f}...')
@ -727,7 +725,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
    if clip:
-        clip_coords(x, (h - eps, w - eps))  # warning: inplace clip
+        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
    y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
@ -771,7 +769,23 @@ def resample_segments(segments, n=1000):
    return segments
-def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    # Rescale boxes (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]
    boxes[:, [0, 2]] -= pad[0]  # x padding
    boxes[:, [1, 3]] -= pad[1]  # y padding
    boxes[:, :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes
 def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
@ -780,15 +794,15 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]
-    coords[:, [0, 2]] -= pad[0]  # x padding
+    segments[:, 0] -= pad[0]  # x padding
-    coords[:, [1, 3]] -= pad[1]  # y padding
+    segments[:, 1] -= pad[1]  # y padding
-    coords[:, :4] /= gain
+    segments /= gain
-    clip_coords(coords, img0_shape)
+    clip_segments(segments, img0_shape)
-    return coords
+    return segments
-def clip_coords(boxes, shape):
+def clip_boxes(boxes, shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    # Clip boxes (xyxy) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[:, 0].clamp_(0, shape[1])  # x1
        boxes[:, 1].clamp_(0, shape[0])  # y1
@ -799,15 +813,28 @@ def clip_coords(boxes, shape):
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
-def non_max_suppression(prediction,
+def clip_segments(boxes, shape):
    # Clip segments (xy1,xy2,...) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[:, 0].clamp_(0, shape[1])  # x
        boxes[:, 1].clamp_(0, shape[0])  # y
    else:  # np.array (faster grouped)
        boxes[:, 0] = boxes[:, 0].clip(0, shape[1])  # x
        boxes[:, 1] = boxes[:, 1].clip(0, shape[0])  # y
 def non_max_suppression(
        prediction,
        conf_thres=0.25,
        iou_thres=0.45,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
-                        max_det=300):
+        max_det=300,
-    """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
+        nm=0,  # number of masks
 ):
    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
@ -817,7 +844,7 @@ def non_max_suppression(prediction,
        prediction = prediction[0]  # select only inference output
    bs = prediction.shape[0]  # batch size
-    nc = prediction.shape[2] - 5  # number of classes
+    nc = prediction.shape[2] - nm - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates
    # Checks
@ -828,13 +855,14 @@ def non_max_suppression(prediction,
    # min_wh = 2  # (pixels) minimum box width and height
    max_wh = 7680  # (pixels) maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 0.3 + 0.03 * bs  # seconds to quit after
+    time_limit = 0.5 + 0.05 * bs  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS
    t = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * bs
+    mi = 5 + nc  # mask start index
    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@ -843,7 +871,7 @@ def non_max_suppression(prediction,
        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            lb = labels[xi]
-            v = torch.zeros((len(lb), nc + 5), device=x.device)
+            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
            v[:, :4] = lb[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
@ -856,16 +884,17 @@ def non_max_suppression(prediction,
        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        # Box/Mask
-        box = xywh2xyxy(x[:, :4])
+        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
        mask = x[:, mi:]  # zero columns if no masks
        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+            x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
        else:  # best class only
-            conf, j = x[:, 5:].max(1, keepdim=True)
+            conf, j = x[:, 5:mi].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
        # Filter by class
        if classes is not None:
@ -881,6 +910,8 @@ def non_max_suppression(prediction,
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
        else:
            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
@ -898,7 +929,7 @@ def non_max_suppression(prediction,
        output[xi] = x[i]
        if (time.time() - t) > time_limit:
-            LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
+            LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
            break  # time limit exceeded
    return output
@ -975,7 +1006,7 @@ def apply_classifier(x, model, img, im0):
            d[:, :4] = xywh2xyxy(b).long()
            # Rescale boxes from img_size to im0 size
-            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
+            scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
            # Classes
            pred_cls1 = d[:, 5].long()
--- a/utils/loggers/init.py
+++ b/utils/loggers/init.py
@ -11,13 +11,13 @@ import pkg_resources as pkg
 import torch
 from torch.utils.tensorboard import SummaryWriter
-from utils.general import colorstr, cv2
+from utils.general import LOGGER, colorstr, cv2
 from utils.loggers.clearml.clearml_utils import ClearmlLogger
 from utils.loggers.wandb.wandb_utils import WandbLogger
 from utils.plots import plot_images, plot_labels, plot_results
 from utils.torch_utils import de_parallel
-LOGGERS = ('csv', 'tb', 'wandb', 'clearml')  # *.csv, TensorBoard, Weights & Biases, ClearML
+LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet')  # *.csv, TensorBoard, Weights & Biases, ClearML
 RANK = int(os.getenv('RANK', -1))
 try:
@ -41,6 +41,18 @@ try:
 except (ImportError, AssertionError):
    clearml = None
 try:
    if RANK not in [0, -1]:
        comet_ml = None
    else:
        import comet_ml
        assert hasattr(comet_ml, '__version__')  # verify package import not local dir
        from utils.loggers.comet import CometLogger
 except (ModuleNotFoundError, ImportError, AssertionError):
    comet_ml = None
 class Loggers():
    # YOLOv5 Loggers class
@ -80,7 +92,10 @@ class Loggers():
            prefix = colorstr('ClearML: ')
            s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
            self.logger.info(s)
-
+        if not comet_ml:
            prefix = colorstr('Comet: ')
            s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
            self.logger.info(s)
        # TensorBoard
        s = self.save_dir
        if 'tb' in self.include and not self.opt.evolve:
@ -107,6 +122,18 @@ class Loggers():
        else:
            self.clearml = None
        # Comet
        if comet_ml and 'comet' in self.include:
            if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
                run_id = self.opt.resume.split("/")[-1]
                self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
            else:
                self.comet_logger = CometLogger(self.opt, self.hyp)
        else:
            self.comet_logger = None
    @property
    def remote_dataset(self):
        # Get data_dict if custom dataset artifact link is provided
@ -115,12 +142,18 @@ class Loggers():
            data_dict = self.clearml.data_dict
        if self.wandb:
            data_dict = self.wandb.data_dict
        if self.comet_logger:
            data_dict = self.comet_logger.data_dict
        return data_dict
    def on_train_start(self):
-        # Callback runs on train start
+        if self.comet_logger:
-        pass
+            self.comet_logger.on_train_start()
    def on_pretrain_routine_start(self):
        if self.comet_logger:
            self.comet_logger.on_pretrain_routine_start()
    def on_pretrain_routine_end(self, labels, names):
        # Callback runs on pre-train routine end
@ -131,8 +164,11 @@ class Loggers():
                self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
            # if self.clearml:
            #    pass  # ClearML saves these images automatically using hooks
            if self.comet_logger:
                self.comet_logger.on_pretrain_routine_end(paths)
-    def on_train_batch_end(self, model, ni, imgs, targets, paths):
+    def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
        log_dict = dict(zip(self.keys[0:3], vals))
        # Callback runs on train batch end
        # ni: number integrated batches (since train start)
        if self.plots:
@ -148,11 +184,21 @@ class Loggers():
                if self.clearml:
                    self.clearml.log_debug_samples(files, title='Mosaics')
        if self.comet_logger:
            self.comet_logger.on_train_batch_end(log_dict, step=ni)
    def on_train_epoch_end(self, epoch):
        # Callback runs on train epoch end
        if self.wandb:
            self.wandb.current_epoch = epoch + 1
        if self.comet_logger:
            self.comet_logger.on_train_epoch_end(epoch)
    def on_val_start(self):
        if self.comet_logger:
            self.comet_logger.on_val_start()
    def on_val_image_end(self, pred, predn, path, names, im):
        # Callback runs on val image end
        if self.wandb:
@ -160,7 +206,11 @@ class Loggers():
        if self.clearml:
            self.clearml.log_image_with_boxes(path, pred, names, im)
-    def on_val_end(self):
+    def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
        if self.comet_logger:
            self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
        # Callback runs on val end
        if self.wandb or self.clearml:
            files = sorted(self.save_dir.glob('val*.jpg'))
@ -169,6 +219,9 @@ class Loggers():
            if self.clearml:
                self.clearml.log_debug_samples(files, title='Validation')
        if self.comet_logger:
            self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
        # Callback runs at the end of each fit (train+val) epoch
        x = dict(zip(self.keys, vals))
@ -199,6 +252,9 @@ class Loggers():
            self.clearml.current_epoch_logged_images = set()  # reset epoch image limit
            self.clearml.current_epoch += 1
        if self.comet_logger:
            self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
        # Callback runs on model save event
        if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
@ -209,6 +265,9 @@ class Loggers():
                                                      model_name='Latest Model',
                                                      auto_delete_file=False)
        if self.comet_logger:
            self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
    def on_train_end(self, last, best, epoch, results):
        # Callback runs on training end, i.e. saving best model
        if self.plots:
@ -237,10 +296,16 @@ class Loggers():
                                                  name='Best Model',
                                                  auto_delete_file=False)
        if self.comet_logger:
            final_results = dict(zip(self.keys[3:10], results))
            self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
    def on_params_update(self, params: dict):
        # Update hyperparams or configs of the experiment
        if self.wandb:
            self.wandb.wandb_run.config.update(params, allow_val_change=True)
        if self.comet_logger:
            self.comet_logger.on_params_update(params)
 class GenericLogger:
@ -328,7 +393,7 @@ def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
            warnings.simplefilter('ignore')  # suppress jit trace warning
            tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
    except Exception as e:
-        print(f'WARNING: TensorBoard graph visualization failure {e}')
+        LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
 def web_project_name(project):
--- a/utils/loggers/clearml/clearml_utils.py
+++ b/utils/loggers/clearml/clearml_utils.py
@ -11,6 +11,7 @@ from utils.plots import Annotator, colors
 try:
    import clearml
    from clearml import Dataset, Task
    assert hasattr(clearml, '__version__')  # verify package import not local dir
 except (ImportError, AssertionError):
    clearml = None
--- a/utils/loggers/comet/README.md
+++ b/utils/loggers/comet/README.md
@ -0,0 +1,256 @@
 <img src="https://cdn.comet.ml/img/notebook_logo.png">
 # YOLOv5 with Comet
 This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet)
 # About Comet
 Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
 Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)!
 Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
 # Getting Started
 ## Install Comet
 ```shell
 pip install comet_ml
 ```
 ## Configure Comet Credentials
 There are two ways to configure Comet with YOLOv5.
 You can either set your credentials through enviroment variables
 **Environment Variables**
 ```shell
 export COMET_API_KEY=<Your Comet API Key>
 export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
 ```
 Or create a `.comet.config` file in your working directory and set your credentials there.
 **Comet Configuration File**
 ```
 [comet]
 api_key=<Your Comet API Key>
 project_name=<Your Comet Project Name> # This will default to 'yolov5'
 ```
 ## Run the Training Script
 ```shell
 # Train YOLOv5s on COCO128 for 5 epochs
 python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
 ```
 That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
 <img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/7529846/187608607-ff89c3d5-1b8b-4743-a974-9275301b0524.png">
 # Try out an Example!
 Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
 Or better yet, try it out yourself in this Colab Notebook
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
 # Log automatically
 By default, Comet will log the following items
 ## Metrics
 - Box Loss, Object Loss, Classification Loss for the training and validation data
 - mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
 - Precision and Recall for the validation data
 ## Parameters
 - Model Hyperparameters
 - All parameters passed through the command line options
 ## Visualizations
 - Confusion Matrix of the model predictions on the validation data
 - Plots for the PR and F1 curves across all classes
 - Correlogram of the Class Labels
 # Configure Comet Logging
 Comet can be configured to log additional data either through command line flags passed to the training script
 or through environment variables.
 ```shell
 export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
 export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
 export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
 export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
 export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
 export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
 export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
 export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
 ```
 ## Logging Checkpoints with Comet
 Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
 logged checkpoints to Comet based on the interval value provided by `save-period`
 ```shell
 python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data coco128.yaml \
 --weights yolov5s.pt \
 --save-period 1
 ```
 ## Logging Model Predictions
 By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
 You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
 **Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
 Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
 ```shell
 python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data coco128.yaml \
 --weights yolov5s.pt \
 --bbox_interval 2
 ```
 ### Controlling the number of Prediction Images logged to Comet
 When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
 ```shell
 env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data coco128.yaml \
 --weights yolov5s.pt \
 --bbox_interval 1
 ```
 ### Logging Class Level Metrics
 Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
 ```shell
 env COMET_LOG_PER_CLASS_METRICS=true python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data coco128.yaml \
 --weights yolov5s.pt
 ```
 ## Uploading a Dataset to Comet Artifacts
 If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration), you can do so using the `upload_dataset` flag.
 The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
 ```shell
 python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data coco128.yaml \
 --weights yolov5s.pt \
 --upload_dataset
 ```
 You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
 <img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
 You can preview the data directly in the Comet UI.
 <img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
 Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
 <img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
 ### Using a saved Artifact
 If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
 ```
 # contents of artifact.yaml file
 path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
 ```
 Then pass this file to your training script in the following way
 ```shell
 python train.py \
 --img 640 \
 --batch 16 \
 --epochs 5 \
 --data artifact.yaml \
 --weights yolov5s.pt
 ```
 Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
 <img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
 ## Resuming a Training Run
 If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
 The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
 This will restore the run to its state before the interruption, which includes restoring the  model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
 ```shell
 python train.py \
 --resume "comet://<your run path>"
 ```
 ## Hyperparameter Search with the Comet Optimizer
 YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
 ### Configuring an Optimizer Sweep
 To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
 ```shell
 python utils/loggers/comet/hpo.py \
  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
 ```
 The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
 the script.
 ```shell
 python utils/loggers/comet/hpo.py \
  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
  --save-period 1 \
  --bbox_interval 1
 ```
 ### Running a Sweep in Parallel
 ```shell
 comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
  utils/loggers/comet/optimizer_config.json"
 ```
 ### Visualizing Results
 Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
 <img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">
--- a/utils/loggers/comet/init.py
+++ b/utils/loggers/comet/init.py
@ -0,0 +1,501 @@
 import glob
 import json
 import logging
 import os
 import sys
 from pathlib import Path
 logger = logging.getLogger(__name__)
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[3]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 try:
    import comet_ml
    # Project Configuration
    config = comet_ml.config.get_config()
    COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
 except (ModuleNotFoundError, ImportError):
    comet_ml = None
    COMET_PROJECT_NAME = None
 import PIL
 import torch
 import torchvision.transforms as T
 import yaml
 from utils.dataloaders import img2label_paths
 from utils.general import check_dataset, scale_boxes, xywh2xyxy
 from utils.metrics import box_iou
 COMET_PREFIX = "comet://"
 COMET_MODE = os.getenv("COMET_MODE", "online")
 # Model Saving Settings
 COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
 # Dataset Artifact Settings
 COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
 # Evaluation Settings
 COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
 COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
 COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100))
 # Confusion Matrix Settings
 CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
 IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
 # Batch Logging Settings
 COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
 COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1)
 COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1)
 COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
 RANK = int(os.getenv("RANK", -1))
 to_pil = T.ToPILImage()
 class CometLogger:
    """Log metrics, parameters, source code, models and much more
    with Comet
    """
    def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
        self.job_type = job_type
        self.opt = opt
        self.hyp = hyp
        # Comet Flags
        self.comet_mode = COMET_MODE
        self.save_model = opt.save_period > -1
        self.model_name = COMET_MODEL_NAME
        # Batch Logging Settings
        self.log_batch_metrics = COMET_LOG_BATCH_METRICS
        self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
        # Dataset Artifact Settings
        self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
        self.resume = self.opt.resume
        # Default parameters to pass to Experiment objects
        self.default_experiment_kwargs = {
            "log_code": False,
            "log_env_gpu": True,
            "log_env_cpu": True,
            "project_name": COMET_PROJECT_NAME,}
        self.default_experiment_kwargs.update(experiment_kwargs)
        self.experiment = self._get_experiment(self.comet_mode, run_id)
        self.data_dict = self.check_dataset(self.opt.data)
        self.class_names = self.data_dict["names"]
        self.num_classes = self.data_dict["nc"]
        self.logged_images_count = 0
        self.max_images = COMET_MAX_IMAGE_UPLOADS
        if run_id is None:
            self.experiment.log_other("Created from", "YOLOv5")
            if not isinstance(self.experiment, comet_ml.OfflineExperiment):
                workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
                self.experiment.log_other(
                    "Run Path",
                    f"{workspace}/{project_name}/{experiment_id}",
                )
            self.log_parameters(vars(opt))
            self.log_parameters(self.opt.hyp)
            self.log_asset_data(
                self.opt.hyp,
                name="hyperparameters.json",
                metadata={"type": "hyp-config-file"},
            )
            self.log_asset(
                f"{self.opt.save_dir}/opt.yaml",
                metadata={"type": "opt-config-file"},
            )
        self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
        if hasattr(self.opt, "conf_thres"):
            self.conf_thres = self.opt.conf_thres
        else:
            self.conf_thres = CONF_THRES
        if hasattr(self.opt, "iou_thres"):
            self.iou_thres = self.opt.iou_thres
        else:
            self.iou_thres = IOU_THRES
        self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
        self.comet_log_predictions = COMET_LOG_PREDICTIONS
        if self.opt.bbox_interval == -1:
            self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
        else:
            self.comet_log_prediction_interval = self.opt.bbox_interval
        if self.comet_log_predictions:
            self.metadata_dict = {}
            self.logged_image_names = []
        self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
        self.experiment.log_others({
            "comet_mode": COMET_MODE,
            "comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
            "comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
            "comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
            "comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
            "comet_model_name": COMET_MODEL_NAME,})
        # Check if running the Experiment with the Comet Optimizer
        if hasattr(self.opt, "comet_optimizer_id"):
            self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
            self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
            self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
            self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
    def _get_experiment(self, mode, experiment_id=None):
        if mode == "offline":
            if experiment_id is not None:
                return comet_ml.ExistingOfflineExperiment(
                    previous_experiment=experiment_id,
                    **self.default_experiment_kwargs,
                )
            return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
        else:
            try:
                if experiment_id is not None:
                    return comet_ml.ExistingExperiment(
                        previous_experiment=experiment_id,
                        **self.default_experiment_kwargs,
                    )
                return comet_ml.Experiment(**self.default_experiment_kwargs)
            except ValueError:
                logger.warning("COMET WARNING: "
                               "Comet credentials have not been set. "
                               "Comet will default to offline logging. "
                               "Please set your credentials to enable online logging.")
                return self._get_experiment("offline", experiment_id)
        return
    def log_metrics(self, log_dict, **kwargs):
        self.experiment.log_metrics(log_dict, **kwargs)
    def log_parameters(self, log_dict, **kwargs):
        self.experiment.log_parameters(log_dict, **kwargs)
    def log_asset(self, asset_path, **kwargs):
        self.experiment.log_asset(asset_path, **kwargs)
    def log_asset_data(self, asset, **kwargs):
        self.experiment.log_asset_data(asset, **kwargs)
    def log_image(self, img, **kwargs):
        self.experiment.log_image(img, **kwargs)
    def log_model(self, path, opt, epoch, fitness_score, best_model=False):
        if not self.save_model:
            return
        model_metadata = {
            "fitness_score": fitness_score[-1],
            "epochs_trained": epoch + 1,
            "save_period": opt.save_period,
            "total_epochs": opt.epochs,}
        model_files = glob.glob(f"{path}/*.pt")
        for model_path in model_files:
            name = Path(model_path).name
            self.experiment.log_model(
                self.model_name,
                file_or_folder=model_path,
                file_name=name,
                metadata=model_metadata,
                overwrite=True,
            )
    def check_dataset(self, data_file):
        with open(data_file) as f:
            data_config = yaml.safe_load(f)
        if data_config['path'].startswith(COMET_PREFIX):
            path = data_config['path'].replace(COMET_PREFIX, "")
            data_dict = self.download_dataset_artifact(path)
            return data_dict
        self.log_asset(self.opt.data, metadata={"type": "data-config-file"})
        return check_dataset(data_file)
    def log_predictions(self, image, labelsn, path, shape, predn):
        if self.logged_images_count >= self.max_images:
            return
        detections = predn[predn[:, 4] > self.conf_thres]
        iou = box_iou(labelsn[:, 1:], detections[:, :4])
        mask, _ = torch.where(iou > self.iou_thres)
        if len(mask) == 0:
            return
        filtered_detections = detections[mask]
        filtered_labels = labelsn[mask]
        image_id = path.split("/")[-1].split(".")[0]
        image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
        if image_name not in self.logged_image_names:
            native_scale_image = PIL.Image.open(path)
            self.log_image(native_scale_image, name=image_name)
            self.logged_image_names.append(image_name)
        metadata = []
        for cls, *xyxy in filtered_labels.tolist():
            metadata.append({
                "label": f"{self.class_names[int(cls)]}-gt",
                "score": 100,
                "box": {
                    "x": xyxy[0],
                    "y": xyxy[1],
                    "x2": xyxy[2],
                    "y2": xyxy[3]},})
        for *xyxy, conf, cls in filtered_detections.tolist():
            metadata.append({
                "label": f"{self.class_names[int(cls)]}",
                "score": conf * 100,
                "box": {
                    "x": xyxy[0],
                    "y": xyxy[1],
                    "x2": xyxy[2],
                    "y2": xyxy[3]},})
        self.metadata_dict[image_name] = metadata
        self.logged_images_count += 1
        return
    def preprocess_prediction(self, image, labels, shape, pred):
        nl, _ = labels.shape[0], pred.shape[0]
        # Predictions
        if self.opt.single_cls:
            pred[:, 5] = 0
        predn = pred.clone()
        scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
        labelsn = None
        if nl:
            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
            scale_boxes(image.shape[1:], tbox, shape[0], shape[1])  # native-space labels
            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
            scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])  # native-space pred
        return predn, labelsn
    def add_assets_to_artifact(self, artifact, path, asset_path, split):
        img_paths = sorted(glob.glob(f"{asset_path}/*"))
        label_paths = img2label_paths(img_paths)
        for image_file, label_file in zip(img_paths, label_paths):
            image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
            try:
                artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
                artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
            except ValueError as e:
                logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
                logger.error(f"COMET ERROR: {e}")
                continue
        return artifact
    def upload_dataset_artifact(self):
        dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
        path = str((ROOT / Path(self.data_dict["path"])).resolve())
        metadata = self.data_dict.copy()
        for key in ["train", "val", "test"]:
            split_path = metadata.get(key)
            if split_path is not None:
                metadata[key] = split_path.replace(path, "")
        artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
        for key in metadata.keys():
            if key in ["train", "val", "test"]:
                if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
                    continue
                asset_path = self.data_dict.get(key)
                if asset_path is not None:
                    artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
        self.experiment.log_artifact(artifact)
        return
    def download_dataset_artifact(self, artifact_path):
        logged_artifact = self.experiment.get_artifact(artifact_path)
        artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
        logged_artifact.download(artifact_save_dir)
        metadata = logged_artifact.metadata
        data_dict = metadata.copy()
        data_dict["path"] = artifact_save_dir
        data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
        data_dict = self.update_data_paths(data_dict)
        return data_dict
    def update_data_paths(self, data_dict):
        path = data_dict.get("path", "")
        for split in ["train", "val", "test"]:
            if data_dict.get(split):
                split_path = data_dict.get(split)
                data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
                    f"{path}/{x}" for x in split_path])
        return data_dict
    def on_pretrain_routine_end(self, paths):
        if self.opt.resume:
            return
        for path in paths:
            self.log_asset(str(path))
        if self.upload_dataset:
            if not self.resume:
                self.upload_dataset_artifact()
        return
    def on_train_start(self):
        self.log_parameters(self.hyp)
    def on_train_epoch_start(self):
        return
    def on_train_epoch_end(self, epoch):
        self.experiment.curr_epoch = epoch
        return
    def on_train_batch_start(self):
        return
    def on_train_batch_end(self, log_dict, step):
        self.experiment.curr_step = step
        if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
            self.log_metrics(log_dict, step=step)
        return
    def on_train_end(self, files, save_dir, last, best, epoch, results):
        if self.comet_log_predictions:
            curr_epoch = self.experiment.curr_epoch
            self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
        for f in files:
            self.log_asset(f, metadata={"epoch": epoch})
        self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
        if not self.opt.evolve:
            model_path = str(best if best.exists() else last)
            name = Path(model_path).name
            if self.save_model:
                self.experiment.log_model(
                    self.model_name,
                    file_or_folder=model_path,
                    file_name=name,
                    overwrite=True,
                )
        # Check if running Experiment with Comet Optimizer
        if hasattr(self.opt, 'comet_optimizer_id'):
            metric = results.get(self.opt.comet_optimizer_metric)
            self.experiment.log_other('optimizer_metric_value', metric)
        self.finish_run()
    def on_val_start(self):
        return
    def on_val_batch_start(self):
        return
    def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
        if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
            return
        for si, pred in enumerate(outputs):
            if len(pred) == 0:
                continue
            image = images[si]
            labels = targets[targets[:, 0] == si, 1:]
            shape = shapes[si]
            path = paths[si]
            predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
            if labelsn is not None:
                self.log_predictions(image, labelsn, path, shape, predn)
        return
    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
        if self.comet_log_per_class_metrics:
            if self.num_classes > 1:
                for i, c in enumerate(ap_class):
                    class_name = self.class_names[c]
                    self.experiment.log_metrics(
                        {
                            'mAP@.5': ap50[i],
                            'mAP@.5:.95': ap[i],
                            'precision': p[i],
                            'recall': r[i],
                            'f1': f1[i],
                            'true_positives': tp[i],
                            'false_positives': fp[i],
                            'support': nt[c]},
                        prefix=class_name)
        if self.comet_log_confusion_matrix:
            epoch = self.experiment.curr_epoch
            class_names = list(self.class_names.values())
            class_names.append("background")
            num_classes = len(class_names)
            self.experiment.log_confusion_matrix(
                matrix=confusion_matrix.matrix,
                max_categories=num_classes,
                labels=class_names,
                epoch=epoch,
                column_label='Actual Category',
                row_label='Predicted Category',
                file_name=f"confusion-matrix-epoch-{epoch}.json",
            )
    def on_fit_epoch_end(self, result, epoch):
        self.log_metrics(result, epoch=epoch)
    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
        if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
            self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
    def on_params_update(self, params):
        self.log_parameters(params)
    def finish_run(self):
        self.experiment.end()
--- a/utils/loggers/comet/comet_utils.py
+++ b/utils/loggers/comet/comet_utils.py
@ -0,0 +1,150 @@
 import logging
 import os
 from urllib.parse import urlparse
 try:
    import comet_ml
 except (ModuleNotFoundError, ImportError):
    comet_ml = None
 import yaml
 logger = logging.getLogger(__name__)
 COMET_PREFIX = "comet://"
 COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
 COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
 def download_model_checkpoint(opt, experiment):
    model_dir = f"{opt.project}/{experiment.name}"
    os.makedirs(model_dir, exist_ok=True)
    model_name = COMET_MODEL_NAME
    model_asset_list = experiment.get_model_asset_list(model_name)
    if len(model_asset_list) == 0:
        logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
        return
    model_asset_list = sorted(
        model_asset_list,
        key=lambda x: x["step"],
        reverse=True,
    )
    logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
    resource_url = urlparse(opt.weights)
    checkpoint_filename = resource_url.query
    if checkpoint_filename:
        asset_id = logged_checkpoint_map.get(checkpoint_filename)
    else:
        asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
        checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
    if asset_id is None:
        logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
        return
    try:
        logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
        asset_filename = checkpoint_filename
        model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
        model_download_path = f"{model_dir}/{asset_filename}"
        with open(model_download_path, "wb") as f:
            f.write(model_binary)
        opt.weights = model_download_path
    except Exception as e:
        logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
        logger.exception(e)
 def set_opt_parameters(opt, experiment):
    """Update the opts Namespace with parameters
    from Comet's ExistingExperiment when resuming a run
    Args:
        opt (argparse.Namespace): Namespace of command line options
        experiment (comet_ml.APIExperiment): Comet API Experiment object
    """
    asset_list = experiment.get_asset_list()
    resume_string = opt.resume
    for asset in asset_list:
        if asset["fileName"] == "opt.yaml":
            asset_id = asset["assetId"]
            asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
            opt_dict = yaml.safe_load(asset_binary)
            for key, value in opt_dict.items():
                setattr(opt, key, value)
            opt.resume = resume_string
    # Save hyperparameters to YAML file
    # Necessary to pass checks in training script
    save_dir = f"{opt.project}/{experiment.name}"
    os.makedirs(save_dir, exist_ok=True)
    hyp_yaml_path = f"{save_dir}/hyp.yaml"
    with open(hyp_yaml_path, "w") as f:
        yaml.dump(opt.hyp, f)
    opt.hyp = hyp_yaml_path
 def check_comet_weights(opt):
    """Downloads model weights from Comet and updates the
    weights path to point to saved weights location
    Args:
        opt (argparse.Namespace): Command Line arguments passed
            to YOLOv5 training script
    Returns:
        None/bool: Return True if weights are successfully downloaded
            else return None
    """
    if comet_ml is None:
        return
    if isinstance(opt.weights, str):
        if opt.weights.startswith(COMET_PREFIX):
            api = comet_ml.API()
            resource = urlparse(opt.weights)
            experiment_path = f"{resource.netloc}{resource.path}"
            experiment = api.get(experiment_path)
            download_model_checkpoint(opt, experiment)
            return True
    return None
 def check_comet_resume(opt):
    """Restores run parameters to its original state based on the model checkpoint
    and logged Experiment parameters.
    Args:
        opt (argparse.Namespace): Command Line arguments passed
            to YOLOv5 training script
    Returns:
        None/bool: Return True if the run is restored successfully
            else return None
    """
    if comet_ml is None:
        return
    if isinstance(opt.resume, str):
        if opt.resume.startswith(COMET_PREFIX):
            api = comet_ml.API()
            resource = urlparse(opt.resume)
            experiment_path = f"{resource.netloc}{resource.path}"
            experiment = api.get(experiment_path)
            set_opt_parameters(opt, experiment)
            download_model_checkpoint(opt, experiment)
            return True
    return None
--- a/utils/loggers/comet/hpo.py
+++ b/utils/loggers/comet/hpo.py
@ -0,0 +1,118 @@
 import argparse
 import json
 import logging
 import os
 import sys
 from pathlib import Path
 import comet_ml
 logger = logging.getLogger(__name__)
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[3]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 from train import train
 from utils.callbacks import Callbacks
 from utils.general import increment_path
 from utils.torch_utils import select_device
 # Project Configuration
 config = comet_ml.config.get_config()
 COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
 def get_args(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
    parser.add_argument('--noplots', action='store_true', help='save no plot files')
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
    # Comet Arguments
    parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
    parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
    parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
    parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
    parser.add_argument("--comet_optimizer_workers",
                        type=int,
                        default=1,
                        help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
    return parser.parse_known_args()[0] if known else parser.parse_args()
 def run(parameters, opt):
    hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
    opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
    opt.batch_size = parameters.get("batch_size")
    opt.epochs = parameters.get("epochs")
    device = select_device(opt.device, batch_size=opt.batch_size)
    train(hyp_dict, opt, device, callbacks=Callbacks())
 if __name__ == "__main__":
    opt = get_args(known=True)
    opt.weights = str(opt.weights)
    opt.cfg = str(opt.cfg)
    opt.data = str(opt.data)
    opt.project = str(opt.project)
    optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
    if optimizer_id is None:
        with open(opt.comet_optimizer_config) as f:
            optimizer_config = json.load(f)
        optimizer = comet_ml.Optimizer(optimizer_config)
    else:
        optimizer = comet_ml.Optimizer(optimizer_id)
    opt.comet_optimizer_id = optimizer.id
    status = optimizer.status()
    opt.comet_optimizer_objective = status["spec"]["objective"]
    opt.comet_optimizer_metric = status["spec"]["metric"]
    logger.info("COMET INFO: Starting Hyperparameter Sweep")
    for parameter in optimizer.get_parameters():
        run(parameter["parameters"], opt)
--- a/utils/loggers/comet/optimizer_config.json
+++ b/utils/loggers/comet/optimizer_config.json
@ -0,0 +1,209 @@
 {
  "algorithm": "random",
  "parameters": {
    "anchor_t": {
      "type": "discrete",
      "values": [
        2,
        8
      ]
    },
    "batch_size": {
      "type": "discrete",
      "values": [
        16,
        32,
        64
      ]
    },
    "box": {
      "type": "discrete",
      "values": [
        0.02,
        0.2
      ]
    },
    "cls": {
      "type": "discrete",
      "values": [
        0.2
      ]
    },
    "cls_pw": {
      "type": "discrete",
      "values": [
        0.5
      ]
    },
    "copy_paste": {
      "type": "discrete",
      "values": [
        1
      ]
    },
    "degrees": {
      "type": "discrete",
      "values": [
        0,
        45
      ]
    },
    "epochs": {
      "type": "discrete",
      "values": [
        5
      ]
    },
    "fl_gamma": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "fliplr": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "flipud": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "hsv_h": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "hsv_s": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "hsv_v": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "iou_t": {
      "type": "discrete",
      "values": [
        0.7
      ]
    },
    "lr0": {
      "type": "discrete",
      "values": [
        1e-05,
        0.1
      ]
    },
    "lrf": {
      "type": "discrete",
      "values": [
        0.01,
        1
      ]
    },
    "mixup": {
      "type": "discrete",
      "values": [
        1
      ]
    },
    "momentum": {
      "type": "discrete",
      "values": [
        0.6
      ]
    },
    "mosaic": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "obj": {
      "type": "discrete",
      "values": [
        0.2
      ]
    },
    "obj_pw": {
      "type": "discrete",
      "values": [
        0.5
      ]
    },
    "optimizer": {
      "type": "categorical",
      "values": [
        "SGD",
        "Adam",
        "AdamW"
      ]
    },
    "perspective": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "scale": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "shear": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "translate": {
      "type": "discrete",
      "values": [
        0
      ]
    },
    "warmup_bias_lr": {
      "type": "discrete",
      "values": [
        0,
        0.2
      ]
    },
    "warmup_epochs": {
      "type": "discrete",
      "values": [
        5
      ]
    },
    "warmup_momentum": {
      "type": "discrete",
      "values": [
        0,
        0.95
      ]
    },
    "weight_decay": {
      "type": "discrete",
      "values": [
        0,
        0.001
      ]
    }
  },
  "spec": {
    "maxCombo": 0,
    "metric": "metrics/mAP_0.5",
    "objective": "maximize"
  },
  "trials": 1
 }
--- a/utils/metrics.py
+++ b/utils/metrics.py
@ -28,7 +28,7 @@ def smooth(y, f=0.05):
    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
@ -83,10 +83,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
    names = dict(enumerate(names))  # to dict
    if plot:
-        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
+        plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
+        plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
+        plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+        plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
    p, r, f1 = p[:, i], r[:, i], f1[:, i]
@ -170,12 +170,12 @@ class ConfusionMatrix:
            if n and sum(j) == 1:
                self.matrix[detection_classes[m1[j]], gc] += 1  # correct
            else:
-                self.matrix[self.nc, gc] += 1  # background FP
+                self.matrix[self.nc, gc] += 1  # true background
        if n:
            for i, dc in enumerate(detection_classes):
                if not any(m1 == i):
-                    self.matrix[dc, self.nc] += 1  # background FN
+                    self.matrix[dc, self.nc] += 1  # predicted background
    def matrix(self):
        return self.matrix
@ -186,7 +186,7 @@ class ConfusionMatrix:
        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
        return tp[:-1], fp[:-1]  # remove background class
-    @TryExcept('WARNING: ConfusionMatrix plot failure: ')
+    @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure: ')
    def plot(self, normalize=True, save_dir='', names=()):
        import seaborn as sn
@ -197,6 +197,7 @@ class ConfusionMatrix:
        nc, nn = self.nc, len(names)  # number of classes, names
        sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
        labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
        ticklabels = (names + ['background']) if labels else "auto"
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
            sn.heatmap(array,
@ -208,8 +209,8 @@ class ConfusionMatrix:
                       fmt='.2f',
                       square=True,
                       vmin=0.0,
-                       xticklabels=names + ['background FP'] if labels else "auto",
+                       xticklabels=ticklabels,
-                       yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
+                       yticklabels=ticklabels).set_facecolor((1, 1, 1))
        ax.set_ylabel('True')
        ax.set_ylabel('Predicted')
        ax.set_title('Confusion Matrix')
--- a/utils/plots.py
+++ b/utils/plots.py
@ -20,9 +20,10 @@ import torch
 from PIL import Image, ImageDraw, ImageFont
 from utils import TryExcept, threaded
-from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path,
+from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
                           is_ascii, xywh2xyxy, xyxy2xywh)
 from utils.metrics import fitness
 from utils.segment.general import scale_image
 # Settings
 RANK = int(os.getenv('RANK', -1))
@ -113,6 +114,52 @@ class Annotator:
                            thickness=tf,
                            lineType=cv2.LINE_AA)
    def masks(self, masks, colors, im_gpu=None, alpha=0.5):
        """Plot masks at once.
        Args:
            masks (tensor): predicted masks on cuda, shape: [n, h, w]
            colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
            im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
            alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
        """
        if self.pil:
            # convert to numpy first
            self.im = np.asarray(self.im).copy()
        if im_gpu is None:
            # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
            if len(masks) == 0:
                return
            if isinstance(masks, torch.Tensor):
                masks = torch.as_tensor(masks, dtype=torch.uint8)
                masks = masks.permute(1, 2, 0).contiguous()
                masks = masks.cpu().numpy()
            # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
            masks = scale_image(masks.shape[:2], masks, self.im.shape)
            masks = np.asarray(masks, dtype=np.float32)
            colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
            s = masks.sum(2, keepdims=True).clip(0, 1)  # add all masks together
            masks = (masks @ colors).clip(0, 255)  # (h,w,n) @ (n,3) = (h,w,3)
            self.im[:] = masks * alpha + self.im * (1 - s * alpha)
        else:
            if len(masks) == 0:
                self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
            colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
            colors = colors[:, None, None]  # shape(n,1,1,3)
            masks = masks.unsqueeze(3)  # shape(n,h,w,1)
            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
            inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
            im_gpu = im_gpu.flip(dims=[0])  # flip channel
            im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
            im_gpu = im_gpu * inv_alph_masks[-1] + mcs
            im_mask = (im_gpu * 255).byte().cpu().numpy()
            self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
        if self.pil:
            # convert im back to PIL and update draw
            self.fromarray(self.im)
    def rectangle(self, xy, fill=None, outline=None, width=1):
        # Add rectangle to image (PIL-only)
        self.draw.rectangle(xy, fill, outline, width)
@ -124,6 +171,11 @@ class Annotator:
            xy[1] += 1 - h
        self.draw.text(xy, text, fill=txt_color, font=self.font)
    def fromarray(self, im):
        # Update self.im from a numpy array
        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
        self.draw = ImageDraw.Draw(self.im)
    def result(self):
        # Return annotated image as array
        return np.asarray(self.im)
@ -152,7 +204,6 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detec
                ax[i].axis('off')
            LOGGER.info(f'Saving {f}... ({n}/{channels})')
            plt.title('Features')
            plt.savefig(f, dpi=300, bbox_inches='tight')
            plt.close()
            np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy())  # npy save
@ -180,26 +231,31 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
    return filtfilt(b, a, data)  # forward-backward filter
-def output_to_target(output):
+def output_to_target(output, max_det=300):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
    targets = []
    for i, o in enumerate(output):
-        targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy())
+        box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
-    return np.array(targets)
+        j = torch.full((conf.shape[0], 1), i)
        targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
    return torch.cat(targets, 0).numpy()
@threaded
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
    # Plot image grid with labels
    if isinstance(images, torch.Tensor):
        images = images.cpu().float().numpy()
    if isinstance(targets, torch.Tensor):
        targets = targets.cpu().numpy()
-    if np.max(images[0]) <= 1:
+
-        images *= 255  # de-normalise (optional)
+    max_size = 1920  # max image size
    max_subplots = 16  # max image subplots, i.e. 4x4
    bs, _, h, w = images.shape  # batch size, _, height, width
    bs = min(bs, max_subplots)  # limit plot images
    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
    if np.max(images[0]) <= 1:
        images *= 255  # de-normalise (optional)
    # Build Image
    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
@ -364,7 +420,7 @@ def plot_labels(labels, names=(), save_dir=Path('')):
    ax[0].set_ylabel('instances')
    if 0 < len(names) < 30:
        ax[0].set_xticks(range(len(names)))
-        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
+        ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
    else:
        ax[0].set_xlabel('classes')
    sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
@ -509,7 +565,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
    xyxy = xywh2xyxy(b).long()
-    clip_coords(xyxy, im.shape)
+    clip_boxes(xyxy, im.shape)
    crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
    if save:
        file.parent.mkdir(parents=True, exist_ok=True)  # make directory
--- a/utils/segment/init.py
+++ b/utils/segment/init.py
--- a/utils/segment/augmentations.py
+++ b/utils/segment/augmentations.py
@ -0,0 +1,104 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Image augmentation functions
 """
 import math
 import random
 import cv2
 import numpy as np
 from ..augmentations import box_candidates
 from ..general import resample_segments, segment2box
 def mixup(im, labels, segments, im2, labels2, segments2):
    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
    im = (im * r + im2 * (1 - r)).astype(np.uint8)
    labels = np.concatenate((labels, labels2), 0)
    segments = np.concatenate((segments, segments2), 0)
    return im, labels, segments
 def random_perspective(im,
                       targets=(),
                       segments=(),
                       degrees=10,
                       translate=.1,
                       scale=.1,
                       shear=10,
                       perspective=0.0,
                       border=(0, 0)):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # targets = [cls, xyxy]
    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
    width = im.shape[1] + border[1] * 2
    # Center
    C = np.eye(3)
    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
    # Perspective
    P = np.eye(3)
    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
    # Rotation and Scale
    R = np.eye(3)
    a = random.uniform(-degrees, degrees)
    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(1 - scale, 1 + scale)
    # s = 2 ** random.uniform(-scale, scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
    # Translation
    T = np.eye(3)
    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
    # Combined rotation matrix
    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
        if perspective:
            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
        else:  # affine
            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
    # Visualize
    # import matplotlib.pyplot as plt
    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
    # ax[0].imshow(im[:, :, ::-1])  # base
    # ax[1].imshow(im2[:, :, ::-1])  # warped
    # Transform label coordinates
    n = len(targets)
    new_segments = []
    if n:
        new = np.zeros((n, 4))
        segments = resample_segments(segments)  # upsample
        for i, segment in enumerate(segments):
            xy = np.ones((len(segment), 3))
            xy[:, :2] = segment
            xy = xy @ M.T  # transform
            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
            # clip
            new[i] = segment2box(xy, width, height)
            new_segments.append(xy)
        # filter candidates
        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
        targets = targets[i]
        targets[:, 1:5] = new[i]
        new_segments = np.array(new_segments)[i]
    return im, targets, new_segments
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@ -0,0 +1,330 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Dataloaders
 """
 import os
 import random
 import cv2
 import numpy as np
 import torch
 from torch.utils.data import DataLoader, distributed
 from ..augmentations import augment_hsv, copy_paste, letterbox
 from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
 from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
 from ..torch_utils import torch_distributed_zero_first
 from .augmentations import mixup, random_perspective
 RANK = int(os.getenv('RANK', -1))
 def create_dataloader(path,
                      imgsz,
                      batch_size,
                      stride,
                      single_cls=False,
                      hyp=None,
                      augment=False,
                      cache=False,
                      pad=0.0,
                      rect=False,
                      rank=-1,
                      workers=8,
                      image_weights=False,
                      quad=False,
                      prefix='',
                      shuffle=False,
                      mask_downsample_ratio=1,
                      overlap_mask=False):
    if rect and shuffle:
        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
        shuffle = False
    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
        dataset = LoadImagesAndLabelsAndMasks(
            path,
            imgsz,
            batch_size,
            augment=augment,  # augmentation
            hyp=hyp,  # hyperparameters
            rect=rect,  # rectangular batches
            cache_images=cache,
            single_cls=single_cls,
            stride=int(stride),
            pad=pad,
            image_weights=image_weights,
            prefix=prefix,
            downsample_ratio=mask_downsample_ratio,
            overlap=overlap_mask)
    batch_size = min(batch_size, len(dataset))
    nd = torch.cuda.device_count()  # number of CUDA devices
    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
    generator = torch.Generator()
    generator.manual_seed(6148914691236517205 + RANK)
    return loader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle and sampler is None,
        num_workers=nw,
        sampler=sampler,
        pin_memory=True,
        collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
        worker_init_fn=seed_worker,
        generator=generator,
    ), dataset
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
    def __init__(
        self,
        path,
        img_size=640,
        batch_size=16,
        augment=False,
        hyp=None,
        rect=False,
        image_weights=False,
        cache_images=False,
        single_cls=False,
        stride=32,
        pad=0,
        prefix="",
        downsample_ratio=1,
        overlap=False,
    ):
        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
                         stride, pad, prefix)
        self.downsample_ratio = downsample_ratio
        self.overlap = overlap
    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights
        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        masks = []
        if mosaic:
            # Load mosaic
            img, labels, segments = self.load_mosaic(index)
            shapes = None
            # MixUp augmentation
            if random.random() < hyp["mixup"]:
                img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
        else:
            # Load image
            img, (h0, w0), (h, w) = self.load_image(index)
            # Letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
            labels = self.labels[index].copy()
            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
            segments = self.segments[index].copy()
            if len(segments):
                for i_s in range(len(segments)):
                    segments[i_s] = xyn2xy(
                        segments[i_s],
                        ratio[0] * w,
                        ratio[1] * h,
                        padw=pad[0],
                        padh=pad[1],
                    )
            if labels.size:  # normalized xywh to pixel xyxy format
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
            if self.augment:
                img, labels, segments = random_perspective(img,
                                                           labels,
                                                           segments=segments,
                                                           degrees=hyp["degrees"],
                                                           translate=hyp["translate"],
                                                           scale=hyp["scale"],
                                                           shear=hyp["shear"],
                                                           perspective=hyp["perspective"])
        nl = len(labels)  # number of labels
        if nl:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
            if self.overlap:
                masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
                                                           segments,
                                                           downsample_ratio=self.downsample_ratio)
                masks = masks[None]  # (640, 640) -> (1, 640, 640)
                labels = labels[sorted_idx]
            else:
                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
        masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
                                                                        self.downsample_ratio, img.shape[1] //
                                                                        self.downsample_ratio))
        # TODO: albumentations support
        if self.augment:
            # Albumentations
            # there are some augmentation that won't change boxes and masks,
            # so just be it for now.
            img, labels = self.albumentations(img, labels)
            nl = len(labels)  # update after albumentations
            # HSV color-space
            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
            # Flip up-down
            if random.random() < hyp["flipud"]:
                img = np.flipud(img)
                if nl:
                    labels[:, 2] = 1 - labels[:, 2]
                    masks = torch.flip(masks, dims=[1])
            # Flip left-right
            if random.random() < hyp["fliplr"]:
                img = np.fliplr(img)
                if nl:
                    labels[:, 1] = 1 - labels[:, 1]
                    masks = torch.flip(masks, dims=[2])
            # Cutouts  # labels = cutout(img, labels, p=0.5)
        labels_out = torch.zeros((nl, 6))
        if nl:
            labels_out[:, 1:] = torch.from_numpy(labels)
        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
    def load_mosaic(self, index):
        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
        labels4, segments4 = [], []
        s = self.img_size
        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
        # 3 additional image indices
        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
        for i, index in enumerate(indices):
            # Load image
            img, _, (h, w) = self.load_image(index)
            # place img in img4
            if i == 0:  # top left
                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
            padw = x1a - x1b
            padh = y1a - y1b
            labels, segments = self.labels[index].copy(), self.segments[index].copy()
            if labels.size:
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
            labels4.append(labels)
            segments4.extend(segments)
        # Concat/clip labels
        labels4 = np.concatenate(labels4, 0)
        for x in (labels4[:, 1:], *segments4):
            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
        # img4, labels4 = replicate(img4, labels4)  # replicate
        # Augment
        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
        img4, labels4, segments4 = random_perspective(img4,
                                                      labels4,
                                                      segments4,
                                                      degrees=self.hyp["degrees"],
                                                      translate=self.hyp["translate"],
                                                      scale=self.hyp["scale"],
                                                      shear=self.hyp["shear"],
                                                      perspective=self.hyp["perspective"],
                                                      border=self.mosaic_border)  # border to remove
        return img4, labels4, segments4
    @staticmethod
    def collate_fn(batch):
        img, label, path, shapes, masks = zip(*batch)  # transposed
        batched_masks = torch.cat(masks, 0)
        for i, l in enumerate(label):
            l[:, 0] = i  # add target image index for build_targets()
        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
 def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
    """
    Args:
        img_size (tuple): The image size.
        polygons (np.ndarray): [N, M], N is the number of polygons,
            M is the number of points(Be divided by 2).
    """
    mask = np.zeros(img_size, dtype=np.uint8)
    polygons = np.asarray(polygons)
    polygons = polygons.astype(np.int32)
    shape = polygons.shape
    polygons = polygons.reshape(shape[0], -1, 2)
    cv2.fillPoly(mask, polygons, color=color)
    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
    # NOTE: fillPoly firstly then resize is trying the keep the same way
    # of loss calculation when mask-ratio=1.
    mask = cv2.resize(mask, (nw, nh))
    return mask
 def polygons2masks(img_size, polygons, color, downsample_ratio=1):
    """
    Args:
        img_size (tuple): The image size.
        polygons (list[np.ndarray]): each polygon is [N, M],
            N is the number of polygons,
            M is the number of points(Be divided by 2).
    """
    masks = []
    for si in range(len(polygons)):
        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
        masks.append(mask)
    return np.array(masks)
 def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
    """Return a (640, 640) overlap mask."""
    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
                     dtype=np.int32 if len(segments) > 255 else np.uint8)
    areas = []
    ms = []
    for si in range(len(segments)):
        mask = polygon2mask(
            img_size,
            [segments[si].reshape(-1)],
            downsample_ratio=downsample_ratio,
            color=1,
        )
        ms.append(mask)
        areas.append(mask.sum())
    areas = np.asarray(areas)
    index = np.argsort(-areas)
    ms = np.array(ms)[index]
    for i in range(len(segments)):
        mask = ms[i] * (i + 1)
        masks = masks + mask
        masks = np.clip(masks, a_min=0, a_max=i + 1)
    return masks, index
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@ -0,0 +1,134 @@
 import cv2
 import numpy as np
 import torch
 import torch.nn.functional as F
 def crop_mask(masks, boxes):
    """
    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
    Vectorized by Chong (thanks Chong).
    Args:
        - masks should be a size [h, w, n] tensor of masks
        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
    """
    n, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 def process_mask_upsample(protos, masks_in, bboxes, shape):
    """
    Crop after upsample.
    proto_out: [mask_dim, mask_h, mask_w]
    out_masks: [n, mask_dim], n is number of masks after nms
    bboxes: [n, 4], n is number of masks after nms
    shape:input_image_size, (h, w)
    return: h, w, n
    """
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
    masks = crop_mask(masks, bboxes)  # CHW
    return masks.gt_(0.5)
 def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
    Crop before upsample.
    proto_out: [mask_dim, mask_h, mask_w]
    out_masks: [n, mask_dim], n is number of masks after nms
    bboxes: [n, 4], n is number of masks after nms
    shape:input_image_size, (h, w)
    return: h, w, n
    """
    c, mh, mw = protos.shape  # CHW
    ih, iw = shape
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
    downsampled_bboxes = bboxes.clone()
    downsampled_bboxes[:, 0] *= mw / iw
    downsampled_bboxes[:, 2] *= mw / iw
    downsampled_bboxes[:, 3] *= mh / ih
    downsampled_bboxes[:, 1] *= mh / ih
    masks = crop_mask(masks, downsampled_bboxes)  # CHW
    if upsample:
        masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
    return masks.gt_(0.5)
 def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
    """
    img1_shape: model input shape, [h, w]
    img0_shape: origin pic shape, [h, w, 3]
    masks: [h, w, num]
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        pad = ratio_pad[1]
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    # masks = masks.permute(2, 0, 1).contiguous()
    # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
    # masks = masks.permute(1, 2, 0).contiguous()
    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
    if len(masks.shape) == 2:
        masks = masks[:, :, None]
    return masks
 def mask_iou(mask1, mask2, eps=1e-7):
    """
    mask1: [N, n] m1 means number of predicted objects
    mask2: [M, n] m2 means number of gt objects
    Note: n means image_w x image_h
    return: masks iou, [N, M]
    """
    intersection = torch.matmul(mask1, mask2.t()).clamp(0)
    union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection  # (area1 + area2) - intersection
    return intersection / (union + eps)
 def masks_iou(mask1, mask2, eps=1e-7):
    """
    mask1: [N, n] m1 means number of predicted objects
    mask2: [N, n] m2 means number of gt objects
    Note: n means image_w x image_h
    return: masks iou, (N, )
    """
    intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
    union = (mask1.sum(1) + mask2.sum(1))[None] - intersection  # (area1 + area2) - intersection
    return intersection / (union + eps)
 def masks2segments(masks, strategy='largest'):
    # Convert masks(n,160,160) into segments(n,xy)
    segments = []
    for x in masks.int().numpy().astype('uint8'):
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if strategy == 'concat':  # concatenate all segments
            c = np.concatenate([x.reshape(-1, 2) for x in c])
        elif strategy == 'largest':  # select largest segment
            c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        segments.append(c.astype('float32'))
    return segments
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@ -0,0 +1,186 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from ..general import xywh2xyxy
 from ..loss import FocalLoss, smooth_BCE
 from ..metrics import bbox_iou
 from ..torch_utils import de_parallel
 from .general import crop_mask
 class ComputeLoss:
    # Compute losses
    def __init__(self, model, autobalance=False, overlap=False):
        self.sort_obj_iou = False
        self.overlap = overlap
        device = next(model.parameters()).device  # get model device
        h = model.hyp  # hyperparameters
        self.device = device
        # Define criteria
        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
        # Focal loss
        g = h['fl_gamma']  # focal loss gamma
        if g > 0:
            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
        m = de_parallel(model).model[-1]  # Detect() module
        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
        self.na = m.na  # number of anchors
        self.nc = m.nc  # number of classes
        self.nl = m.nl  # number of layers
        self.nm = m.nm  # number of masks
        self.anchors = m.anchors
        self.device = device
    def __call__(self, preds, targets, masks):  # predictions, targets, model
        p, proto = preds
        bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
        lcls = torch.zeros(1, device=self.device)
        lbox = torch.zeros(1, device=self.device)
        lobj = torch.zeros(1, device=self.device)
        lseg = torch.zeros(1, device=self.device)
        tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets)  # targets
        # Losses
        for i, pi in enumerate(p):  # layer index, layer predictions
            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
            n = b.shape[0]  # number of targets
            if n:
                pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1)  # subset of predictions
                # Box regression
                pxy = pxy.sigmoid() * 2 - 0.5
                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
                pbox = torch.cat((pxy, pwh), 1)  # predicted box
                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
                lbox += (1.0 - iou).mean()  # iou loss
                # Objectness
                iou = iou.detach().clamp(0).type(tobj.dtype)
                if self.sort_obj_iou:
                    j = iou.argsort()
                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
                if self.gr < 1:
                    iou = (1.0 - self.gr) + self.gr * iou
                tobj[b, a, gj, gi] = iou  # iou ratio
                # Classification
                if self.nc > 1:  # cls loss (only if multiple classes)
                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
                    t[range(n), tcls[i]] = self.cp
                    lcls += self.BCEcls(pcls, t)  # BCE
                # Mask regression
                if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
                marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
                for bi in b.unique():
                    j = b == bi  # matching index
                    if self.overlap:
                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
                    else:
                        mask_gti = masks[tidxs[i]][j]
                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
            obji = self.BCEobj(pi[..., 4], tobj)
            lobj += obji * self.balance[i]  # obj loss
            if self.autobalance:
                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
        if self.autobalance:
            self.balance = [x / self.balance[self.ssi] for x in self.balance]
        lbox *= self.hyp["box"]
        lobj *= self.hyp["obj"]
        lcls *= self.hyp["cls"]
        lseg *= self.hyp["box"] / bs
        loss = lbox + lobj + lcls + lseg
        return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
        # Mask loss for one image
        pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
    def build_targets(self, p, targets):
        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
        na, nt = self.na, targets.shape[0]  # number of anchors, targets
        tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
        gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
        if self.overlap:
            batch = p[0].shape[0]
            ti = []
            for i in range(batch):
                num = (targets[:, 0] == i).sum()  # find number of targets of each image
                ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
            ti = torch.cat(ti, 1)  # (na, nt)
        else:
            ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
        g = 0.5  # bias
        off = torch.tensor(
            [
                [0, 0],
                [1, 0],
                [0, 1],
                [-1, 0],
                [0, -1],  # j,k,l,m
                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
            ],
            device=self.device).float() * g  # offsets
        for i in range(self.nl):
            anchors, shape = self.anchors[i], p[i].shape
            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
            # Match targets to anchors
            t = targets * gain  # shape(3,n,7)
            if nt:
                # Matches
                r = t[..., 4:6] / anchors[:, None]  # wh ratio
                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                t = t[j]  # filter
                # Offsets
                gxy = t[:, 2:4]  # grid xy
                gxi = gain[[2, 3]] - gxy  # inverse
                j, k = ((gxy % 1 < g) & (gxy > 1)).T
                l, m = ((gxi % 1 < g) & (gxi > 1)).T
                j = torch.stack((torch.ones_like(j), j, k, l, m))
                t = t.repeat((5, 1, 1))[j]
                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
            else:
                t = targets[0]
                offsets = 0
            # Define
            bc, gxy, gwh, at = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
            (a, tidx), (b, c) = at.long().T, bc.long().T  # anchors, image, class
            gij = (gxy - offsets).long()
            gi, gj = gij.T  # grid indices
            # Append
            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
            anch.append(anchors[a])  # anchors
            tcls.append(c)  # class
            tidxs.append(tidx)
            xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6])  # xywh normalized
        return tcls, tbox, indices, anch, tidxs, xywhn
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@ -0,0 +1,210 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Model validation metrics
 """
 import numpy as np
 from ..metrics import ap_per_class
 def fitness(x):
    # Model fitness as a weighted combination of metrics
    w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
    return (x[:, :8] * w).sum(1)
 def ap_per_class_box_and_mask(
        tp_m,
        tp_b,
        conf,
        pred_cls,
        target_cls,
        plot=False,
        save_dir=".",
        names=(),
 ):
    """
    Args:
        tp_b: tp of boxes.
        tp_m: tp of masks.
        other arguments see `func: ap_per_class`.
    """
    results_boxes = ap_per_class(tp_b,
                                 conf,
                                 pred_cls,
                                 target_cls,
                                 plot=plot,
                                 save_dir=save_dir,
                                 names=names,
                                 prefix="Box")[2:]
    results_masks = ap_per_class(tp_m,
                                 conf,
                                 pred_cls,
                                 target_cls,
                                 plot=plot,
                                 save_dir=save_dir,
                                 names=names,
                                 prefix="Mask")[2:]
    results = {
        "boxes": {
            "p": results_boxes[0],
            "r": results_boxes[1],
            "ap": results_boxes[3],
            "f1": results_boxes[2],
            "ap_class": results_boxes[4]},
        "masks": {
            "p": results_masks[0],
            "r": results_masks[1],
            "ap": results_masks[3],
            "f1": results_masks[2],
            "ap_class": results_masks[4]}}
    return results
 class Metric:
    def __init__(self) -> None:
        self.p = []  # (nc, )
        self.r = []  # (nc, )
        self.f1 = []  # (nc, )
        self.all_ap = []  # (nc, 10)
        self.ap_class_index = []  # (nc, )
    @property
    def ap50(self):
        """AP@0.5 of all classes.
        Return:
            (nc, ) or [].
        """
        return self.all_ap[:, 0] if len(self.all_ap) else []
    @property
    def ap(self):
        """AP@0.5:0.95
        Return:
            (nc, ) or [].
        """
        return self.all_ap.mean(1) if len(self.all_ap) else []
    @property
    def mp(self):
        """mean precision of all classes.
        Return:
            float.
        """
        return self.p.mean() if len(self.p) else 0.0
    @property
    def mr(self):
        """mean recall of all classes.
        Return:
            float.
        """
        return self.r.mean() if len(self.r) else 0.0
    @property
    def map50(self):
        """Mean AP@0.5 of all classes.
        Return:
            float.
        """
        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
    @property
    def map(self):
        """Mean AP@0.5:0.95 of all classes.
        Return:
            float.
        """
        return self.all_ap.mean() if len(self.all_ap) else 0.0
    def mean_results(self):
        """Mean of results, return mp, mr, map50, map"""
        return (self.mp, self.mr, self.map50, self.map)
    def class_result(self, i):
        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
    def get_maps(self, nc):
        maps = np.zeros(nc) + self.map
        for i, c in enumerate(self.ap_class_index):
            maps[c] = self.ap[i]
        return maps
    def update(self, results):
        """
        Args:
            results: tuple(p, r, ap, f1, ap_class)
        """
        p, r, all_ap, f1, ap_class_index = results
        self.p = p
        self.r = r
        self.all_ap = all_ap
        self.f1 = f1
        self.ap_class_index = ap_class_index
 class Metrics:
    """Metric for boxes and masks."""
    def __init__(self) -> None:
        self.metric_box = Metric()
        self.metric_mask = Metric()
    def update(self, results):
        """
        Args:
            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
        """
        self.metric_box.update(list(results["boxes"].values()))
        self.metric_mask.update(list(results["masks"].values()))
    def mean_results(self):
        return self.metric_box.mean_results() + self.metric_mask.mean_results()
    def class_result(self, i):
        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
    def get_maps(self, nc):
        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
    @property
    def ap_class_index(self):
        # boxes and masks have the same ap_class_index
        return self.metric_box.ap_class_index
 KEYS = [
    "train/box_loss",
    "train/seg_loss",  # train loss
    "train/obj_loss",
    "train/cls_loss",
    "metrics/precision(B)",
    "metrics/recall(B)",
    "metrics/mAP_0.5(B)",
    "metrics/mAP_0.5:0.95(B)",  # metrics
    "metrics/precision(M)",
    "metrics/recall(M)",
    "metrics/mAP_0.5(M)",
    "metrics/mAP_0.5:0.95(M)",  # metrics
    "val/box_loss",
    "val/seg_loss",  # val loss
    "val/obj_loss",
    "val/cls_loss",
    "x/lr0",
    "x/lr1",
    "x/lr2",]
 BEST_KEYS = [
    "best/epoch",
    "best/precision(B)",
    "best/recall(B)",
    "best/mAP_0.5(B)",
    "best/mAP_0.5:0.95(B)",
    "best/precision(M)",
    "best/recall(M)",
    "best/mAP_0.5(M)",
    "best/mAP_0.5:0.95(M)",]
--- a/utils/segment/plots.py
+++ b/utils/segment/plots.py
@ -0,0 +1,143 @@
 import contextlib
 import math
 from pathlib import Path
 import cv2
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import torch
 from .. import threaded
 from ..general import xywh2xyxy
 from ..plots import Annotator, colors
@threaded
 def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
    # Plot image grid with labels
    if isinstance(images, torch.Tensor):
        images = images.cpu().float().numpy()
    if isinstance(targets, torch.Tensor):
        targets = targets.cpu().numpy()
    if isinstance(masks, torch.Tensor):
        masks = masks.cpu().numpy().astype(int)
    max_size = 1920  # max image size
    max_subplots = 16  # max image subplots, i.e. 4x4
    bs, _, h, w = images.shape  # batch size, _, height, width
    bs = min(bs, max_subplots)  # limit plot images
    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
    if np.max(images[0]) <= 1:
        images *= 255  # de-normalise (optional)
    # Build Image
    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
    for i, im in enumerate(images):
        if i == max_subplots:  # if last batch has fewer images than we expect
            break
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        im = im.transpose(1, 2, 0)
        mosaic[y:y + h, x:x + w, :] = im
    # Resize (optional)
    scale = max_size / ns / max(h, w)
    if scale < 1:
        h = math.ceil(scale * h)
        w = math.ceil(scale * w)
        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
    # Annotate
    fs = int((h + w) * ns * 0.01)  # font size
    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
    for i in range(i + 1):
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
        if paths:
            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
        if len(targets) > 0:
            idx = targets[:, 0] == i
            ti = targets[idx]  # image targets
            boxes = xywh2xyxy(ti[:, 2:6]).T
            classes = ti[:, 1].astype('int')
            labels = ti.shape[1] == 6  # labels if no conf column
            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
            if boxes.shape[1]:
                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
                    boxes[[0, 2]] *= w  # scale to pixels
                    boxes[[1, 3]] *= h
                elif scale < 1:  # absolute coords need scale if image scales
                    boxes *= scale
            boxes[[0, 2]] += x
            boxes[[1, 3]] += y
            for j, box in enumerate(boxes.T.tolist()):
                cls = classes[j]
                color = colors(cls)
                cls = names[cls] if names else cls
                if labels or conf[j] > 0.25:  # 0.25 conf thresh
                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
                    annotator.box_label(box, label, color=color)
            # Plot masks
            if len(masks):
                if masks.max() > 1.0:  # mean that masks are overlap
                    image_masks = masks[[i]]  # (1, 640, 640)
                    nl = len(ti)
                    index = np.arange(nl).reshape(nl, 1, 1) + 1
                    image_masks = np.repeat(image_masks, nl, axis=0)
                    image_masks = np.where(image_masks == index, 1.0, 0.0)
                else:
                    image_masks = masks[idx]
                im = np.asarray(annotator.im).copy()
                for j, box in enumerate(boxes.T.tolist()):
                    if labels or conf[j] > 0.25:  # 0.25 conf thresh
                        color = colors(classes[j])
                        mh, mw = image_masks[j].shape
                        if mh != h or mw != w:
                            mask = image_masks[j].astype(np.uint8)
                            mask = cv2.resize(mask, (w, h))
                            mask = mask.astype(bool)
                        else:
                            mask = image_masks[j].astype(bool)
                        with contextlib.suppress(Exception):
                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
                annotator.fromarray(im)
    annotator.im.save(fname)  # save
 def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
    save_dir = Path(file).parent if file else Path(dir)
    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
    ax = ax.ravel()
    files = list(save_dir.glob("results*.csv"))
    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
    for f in files:
        try:
            data = pd.read_csv(f)
            index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
                              0.1 * data.values[:, 11])
            s = [x.strip() for x in data.columns]
            x = data.values[:, 0]
            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
                y = data.values[:, j]
                # y[y == 0] = np.nan  # don't show zero values
                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
                if best:
                    # best
                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
                else:
                    # last
                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
                # if j in [8, 9, 10]:  # share train and val loss y axes
                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
        except Exception as e:
            print(f"Warning: Plotting error for {f}: {e}")
    ax[1].legend()
    fig.savefig(save_dir / "results.png", dpi=200)
    plt.close()
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@ -47,7 +47,7 @@ def smartCrossEntropyLoss(label_smoothing=0.0):
    if check_version(torch.__version__, '1.10.0'):
        return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
    if label_smoothing > 0:
-        LOGGER.warning(f'WARNING: label smoothing {label_smoothing} requires torch>=1.10.0')
+        LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
    return nn.CrossEntropyLoss()
@ -251,6 +251,7 @@ def fuse_conv_and_bn(conv, bn):
                          kernel_size=conv.kernel_size,
                          stride=conv.stride,
                          padding=conv.padding,
                          dilation=conv.dilation,
                          groups=conv.groups,
                          bias=True).requires_grad_(False).to(conv.weight.device)
--- a/utils/triton.py
+++ b/utils/triton.py
@ -0,0 +1,85 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """ Utils to interact with the Triton Inference Server
 """
 import typing
 from urllib.parse import urlparse
 import torch
 class TritonRemoteModel:
    """ A wrapper over a model served by the Triton Inference Server. It can
    be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
    as input and returns them as outputs.
    """
    def __init__(self, url: str):
        """
        Keyword arguments:
        url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
        """
        parsed_url = urlparse(url)
        if parsed_url.scheme == "grpc":
            from tritonclient.grpc import InferenceServerClient, InferInput
            self.client = InferenceServerClient(parsed_url.netloc)  # Triton GRPC client
            model_repository = self.client.get_model_repository_index()
            self.model_name = model_repository.models[0].name
            self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
            def create_input_placeholders() -> typing.List[InferInput]:
                return [
                    InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
        else:
            from tritonclient.http import InferenceServerClient, InferInput
            self.client = InferenceServerClient(parsed_url.netloc)  # Triton HTTP client
            model_repository = self.client.get_model_repository_index()
            self.model_name = model_repository[0]['name']
            self.metadata = self.client.get_model_metadata(self.model_name)
            def create_input_placeholders() -> typing.List[InferInput]:
                return [
                    InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
        self._create_input_placeholders_fn = create_input_placeholders
    @property
    def runtime(self):
        """Returns the model runtime"""
        return self.metadata.get("backend", self.metadata.get("platform"))
    def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
        """ Invokes the model. Parameters can be provided via args or kwargs.
        args, if provided, are assumed to match the order of inputs of the model.
        kwargs are matched with the model input names.
        """
        inputs = self._create_inputs(*args, **kwargs)
        response = self.client.infer(model_name=self.model_name, inputs=inputs)
        result = []
        for output in self.metadata['outputs']:
            tensor = torch.as_tensor(response.as_numpy(output['name']))
            result.append(tensor)
        return result[0] if len(result) == 1 else result
    def _create_inputs(self, *args, **kwargs):
        args_len, kwargs_len = len(args), len(kwargs)
        if not args_len and not kwargs_len:
            raise RuntimeError("No inputs provided.")
        if args_len and kwargs_len:
            raise RuntimeError("Cannot specify args and kwargs at the same time")
        placeholders = self._create_input_placeholders_fn()
        if args_len:
            if args_len != len(placeholders):
                raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
            for input, value in zip(placeholders, args):
                input.set_data_from_numpy(value.cpu().numpy())
        else:
            for input in placeholders:
                value = kwargs[input.name]
                input.set_data_from_numpy(value.cpu().numpy())
        return placeholders