diff --git a/.gitignore b/.gitignore
index 7acff58..f218b77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,6 @@ yolov5s.engine
 yolov5
 tensorrt-8.4.1.5-cp39-none-win_amd64.whl
 tensorrt-8.4.1.5-cp310-none-win_amd64.whl
+__pycache__
+utils/__pycache__
+models/__pycache__
\ No newline at end of file
diff --git a/main.py b/main.py
index 2f17afb..45ed8fb 100644
--- a/main.py
+++ b/main.py
@@ -5,12 +5,13 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
+import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
 import dxcam
 
+
 def main():
     # Window title of the game, don't need the entire name
     videoGameWindowTitle = "Counter"
@@ -63,8 +64,10 @@ def main():
     # sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080}
 
     # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+    left = aaRightShift + \
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    top = videoGameWindow.top + \
+        (videoGameWindow.height - screenShotHeight) // 2
     right, bottom = left + screenShotWidth, top + screenShotHeight
 
     region = (left, top, right, bottom)
@@ -81,7 +84,8 @@ def main():
     sTime = time.time()
 
     # Loading Yolo5 Small AI Model
-    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
+                           pretrained=True, force_reload=True)
     stride, names, pt = model.stride, model.names, model.pt
 
     model.half()
@@ -96,7 +100,7 @@ def main():
 
             # Getting Frame
             npImg = np.array(camera.get_latest_frame())
-            
+
             # Normalizing Data
             im = torch.from_numpy(npImg).to('cuda')
             im = torch.movedim(im, 2, 0)
@@ -109,23 +113,25 @@ def main():
             results = model(im, size=screenShotHeight)
 
             # Suppressing results that dont meet thresholds
-            pred = non_max_suppression(results, 0.25, 0.25, 0, False, max_det=1000)
-
+            pred = non_max_suppression(
+                results, 0.25, 0.25, 0, False, max_det=1000)
 
             # Converting output to usable cords
             targets = []
             for i, det in enumerate(pred):
                 s = ""
-                gn = torch.tensor(im.shape)[[0, 0, 0, 0]] 
+                gn = torch.tensor(im.shape)[[0, 0, 0, 0]]
                 if len(det):
                     for c in det[:, -1].unique():
                         n = (det[:, -1] == c).sum()  # detections per class
                         s += f"{n} {names[int(c)]}, "  # add to string
 
                     for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
+                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
 
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
+                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
 
             # If there are people in the center bounding box
             if len(targets) > 0:
@@ -134,7 +140,8 @@ def main():
                     targets['last_mid_x'] = last_mid_coord[0]
                     targets['last_mid_y'] = last_mid_coord[1]
                     # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
+                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                     targets.sort_values(by="dist", ascending=False)
 
                 # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@@ -151,7 +158,8 @@ def main():
 
                 # Moving the mouse
                 if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
+                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                 last_mid_coord = [xMid, yMid]
 
             else:
@@ -165,7 +173,8 @@ def main():
                     halfH = round(targets["height"][i] / 2)
                     midX = targets['current_mid_x'][i]
                     midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
+                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
 
                     confidence = .5
 
@@ -174,10 +183,10 @@ def main():
                     # draw the bounding box and label on the frame
                     label = "{}: {:.2f}%".format("Human", confidence * 100)
                     cv2.rectangle(npImg, (startX, startY), (endX, endY),
-                        COLORS[idx], 2)
+                                  COLORS[idx], 2)
                     y = startY - 15 if startY - 15 > 15 else startY + 15
                     cv2.putText(npImg, label, (startX, y),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 
             # Forced garbage cleanup every second
             count += 1
@@ -197,5 +206,6 @@ def main():
                     exit()
     camera.stop()
 
+
 if __name__ == "__main__":
     main()
diff --git a/main_onnx_cpu.py b/main_onnx_cpu.py
index 4181d10..c01f6ff 100644
--- a/main_onnx_cpu.py
+++ b/main_onnx_cpu.py
@@ -5,13 +5,14 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
+import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
 import dxcam
 import torch
 
+
 def main():
     # Window title to go after and the height of the screenshots
     videoGameWindowTitle = "Counter"
@@ -61,8 +62,10 @@ def main():
                          "height": screenShotHeight}
 
     # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+    left = aaRightShift + \
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    top = videoGameWindow.top + \
+        (videoGameWindow.height - screenShotHeight) // 2
     right, bottom = left + 320, top + 320
 
     region = (left, top, right, bottom)
@@ -82,7 +85,8 @@ def main():
 
     so = ort.SessionOptions()
     so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
+    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
+                                    'CUDAExecutionProvider'])
 
     # Used for colors drawn on bounding boxes
     COLORS = np.random.uniform(0, 255, size=(1500, 3))
@@ -99,21 +103,24 @@ def main():
 
         im = torch.from_numpy(outputs[0]).to('cpu')
 
-        pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
+        pred = non_max_suppression(
+            im, confidence, confidence, 0, False, max_det=10)
 
         targets = []
         for i, det in enumerate(pred):
             s = ""
-            gn = torch.tensor(npImg.shape)[[0, 0, 0, 0]] 
+            gn = torch.tensor(npImg.shape)[[0, 0, 0, 0]]
             if len(det):
                 for c in det[:, -1].unique():
                     n = (det[:, -1] == c).sum()  # detections per class
                     s += f"{n} {int(c)}, "  # add to string
 
                 for *xyxy, conf, cls in reversed(det):
-                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(
+                        1, 4)) / gn).view(-1).tolist())  # normalized xywh
 
-        targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+        targets = pd.DataFrame(
+            targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
 
         # If there are people in the center bounding box
         if len(targets) > 0:
@@ -122,7 +129,8 @@ def main():
                 targets['last_mid_x'] = last_mid_coord[0]
                 targets['last_mid_y'] = last_mid_coord[1]
                 # Take distance between current person mid coordinate and last person mid coordinate
-                targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                targets['dist'] = np.linalg.norm(
+                    targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                 targets.sort_values(by="dist", ascending=False)
 
             # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@@ -139,7 +147,8 @@ def main():
 
             # Moving the mouse
             if win32api.GetKeyState(0x14):
-                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
+                    mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
             last_mid_coord = [xMid, yMid]
 
         else:
@@ -153,16 +162,17 @@ def main():
                 halfH = round(targets["height"][i] / 2)
                 midX = targets['current_mid_x'][i]
                 midY = targets['current_mid_y'][i]
-                (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                (startX, startY, endX, endY) = int(midX + halfW), int(midY +
+                                                                      halfH), int(midX - halfW), int(midY - halfH)
 
                 idx = 0
                 # draw the bounding box and label on the frame
                 label = "{}: {:.2f}%".format("Human", confidence * 100)
                 cv2.rectangle(npImg, (startX, startY), (endX, endY),
-                    COLORS[idx], 2)
+                              COLORS[idx], 2)
                 y = startY - 15 if startY - 15 > 15 else startY + 15
                 cv2.putText(npImg, label, (startX, y),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 
         # Forced garbage cleanup every second
         count += 1
@@ -183,5 +193,6 @@ def main():
 
     camera.stop()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/main_onnx_gpu.py b/main_onnx_gpu.py
index 91d85f4..cbeedd9 100644
--- a/main_onnx_gpu.py
+++ b/main_onnx_gpu.py
@@ -6,13 +6,14 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
+import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
 import dxcam
 import torch
 
+
 def main():
     # Window title to go after and the height of the screenshots
     videoGameWindowTitle = "Counter"
@@ -62,8 +63,10 @@ def main():
                          "height": screenShotHeight}
 
     # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+    left = aaRightShift + \
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    top = videoGameWindow.top + \
+        (videoGameWindow.height - screenShotHeight) // 2
     right, bottom = left + 320, top + 320
 
     region = (left, top, right, bottom)
@@ -81,7 +84,8 @@ def main():
 
     so = ort.SessionOptions()
     so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
+    ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
+                                    'CUDAExecutionProvider'])
 
     # Used for colors drawn on bounding boxes
     COLORS = np.random.uniform(0, 255, size=(1500, 3))
@@ -98,21 +102,24 @@ def main():
 
         im = torch.from_numpy(outputs[0]).to('cpu')
 
-        pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
+        pred = non_max_suppression(
+            im, confidence, confidence, 0, False, max_det=10)
 
         targets = []
         for i, det in enumerate(pred):
             s = ""
-            gn = torch.tensor(npImg.shape)[[0, 0, 0, 0]] 
+            gn = torch.tensor(npImg.shape)[[0, 0, 0, 0]]
             if len(det):
                 for c in det[:, -1].unique():
                     n = (det[:, -1] == c).sum()  # detections per class
                     s += f"{n} {int(c)}, "  # add to string
 
                 for *xyxy, conf, cls in reversed(det):
-                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                    targets.append((xyxy2xywh(torch.tensor(xyxy).view(
+                        1, 4)) / gn).view(-1).tolist())  # normalized xywh
 
-        targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+        targets = pd.DataFrame(
+            targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
 
         # If there are people in the center bounding box
         if len(targets) > 0:
@@ -121,7 +128,8 @@ def main():
                 targets['last_mid_x'] = last_mid_coord[0]
                 targets['last_mid_y'] = last_mid_coord[1]
                 # Take distance between current person mid coordinate and last person mid coordinate
-                targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                targets['dist'] = np.linalg.norm(
+                    targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                 targets.sort_values(by="dist", ascending=False)
 
             # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@@ -138,7 +146,8 @@ def main():
 
             # Moving the mouse
             if win32api.GetKeyState(0x14):
-                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
+                    mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
             last_mid_coord = [xMid, yMid]
 
         else:
@@ -152,16 +161,17 @@ def main():
                 halfH = round(targets["height"][i] / 2)
                 midX = targets['current_mid_x'][i]
                 midY = targets['current_mid_y'][i]
-                (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                (startX, startY, endX, endY) = int(midX + halfW), int(midY +
+                                                                      halfH), int(midX - halfW), int(midY - halfH)
 
                 idx = 0
                 # draw the bounding box and label on the frame
                 label = "{}: {:.2f}%".format("Human", confidence * 100)
                 cv2.rectangle(npImg, (startX, startY), (endX, endY),
-                    COLORS[idx], 2)
+                              COLORS[idx], 2)
                 y = startY - 15 if startY - 15 > 15 else startY + 15
                 cv2.putText(npImg, label, (startX, y),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 
         # Forced garbage cleanup every second
         count += 1
@@ -181,5 +191,6 @@ def main():
                 exit()
     camera.stop()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/main_tensorrt_gpu.py b/main_tensorrt_gpu.py
index c9c9d46..a45b8f4 100644
--- a/main_tensorrt_gpu.py
+++ b/main_tensorrt_gpu.py
@@ -5,14 +5,15 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
+import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
 from models.common import DetectMultiBackend
 import dxcam
 import cupy as cp
 
+
 def main():
     # Window title to go after and the height of the screenshots
     videoGameWindowTitle = "Counter"
@@ -62,8 +63,10 @@ def main():
                          "height": screenShotHeight}
 
     # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+    left = aaRightShift + \
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    top = videoGameWindow.top + \
+        (videoGameWindow.height - screenShotHeight) // 2
     right, bottom = left + screenShotWidth, top + screenShotHeight
 
     region = (left, top, right, bottom)
@@ -80,9 +83,10 @@ def main():
     sTime = time.time()
 
     # Loading Yolo5 Small AI Model
-    model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device('cuda'), dnn=False, data='', fp16=True)
+    model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device(
+        'cuda'), dnn=False, data='', fp16=True)
     stride, names, pt = model.stride, model.names, model.pt
-    
+
     # Used for colors drawn on bounding boxes
     COLORS = np.random.uniform(0, 255, size=(1500, 3))
 
@@ -96,30 +100,34 @@ def main():
 
             im = cp.moveaxis(npImg, 3, 1)
             im = torch.from_numpy(cp.asnumpy(im)).to('cuda')
-            
-            #Converting to numpy for visuals
+
+            # Converting to numpy for visuals
             im0 = im[0].permute(1, 2, 0) * 255
             im0 = im0.cpu().numpy().astype(np.uint8)
-            im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #Image has to be in BGR for visualization 
-              
+            # Image has to be in BGR for visualization
+            im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR)
+
             # Detecting all the objects
             results = model(im)
 
-            pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
+            pred = non_max_suppression(
+                results, confidence, confidence, 0, False, max_det=10)
 
             targets = []
             for i, det in enumerate(pred):
                 s = ""
-                gn = torch.tensor(im.shape)[[0, 0, 0, 0]] 
+                gn = torch.tensor(im.shape)[[0, 0, 0, 0]]
                 if len(det):
                     for c in det[:, -1].unique():
                         n = (det[:, -1] == c).sum()  # detections per class
                         s += f"{n} {names[int(c)]}, "  # add to string
 
                     for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
+                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
 
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
+                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
 
             # If there are people in the center bounding box
             if len(targets) > 0:
@@ -128,7 +136,8 @@ def main():
                     targets['last_mid_x'] = last_mid_coord[0]
                     targets['last_mid_y'] = last_mid_coord[1]
                     # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
+                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                     targets.sort_values(by="dist", ascending=False)
 
                 # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@@ -145,7 +154,8 @@ def main():
 
                 # Moving the mouse
                 if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
+                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                 last_mid_coord = [xMid, yMid]
 
             else:
@@ -159,16 +169,17 @@ def main():
                     halfH = round(targets["height"][i] / 2)
                     midX = targets['current_mid_x'][i]
                     midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
+                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
 
                     idx = 0
                     # draw the bounding box and label on the frame
                     label = "{}: {:.2f}%".format("Human", confidence * 100)
                     cv2.rectangle(im0, (startX, startY), (endX, endY),
-                        COLORS[idx], 2)
+                                  COLORS[idx], 2)
                     y = startY - 15 if startY - 15 > 15 else startY + 15
                     cv2.putText(im0, label, (startX, y),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 
             # Forced garbage cleanup every second
             count += 1
@@ -188,5 +199,6 @@ def main():
                     exit()
     camera.stop()
 
+
 if __name__ == "__main__":
     main()
diff --git a/main_torch_gpu.py b/main_torch_gpu.py
index 54f85b6..aad4830 100644
--- a/main_torch_gpu.py
+++ b/main_torch_gpu.py
@@ -5,12 +5,13 @@ import gc
 import numpy as np
 import cv2
 import time
-import win32api, win32con
+import win32api
+import win32con
 import pandas as pd
-from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
-                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.general import (cv2, non_max_suppression, xyxy2xywh)
 import dxcam
 
+
 def main():
     # Window title to go after and the height of the screenshots
     videoGameWindowTitle = "Counter"
@@ -60,8 +61,10 @@ def main():
                          "height": screenShotHeight}
 
     # Starting screenshoting engine
-    left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
-    top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
+    left = aaRightShift + \
+        ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
+    top = videoGameWindow.top + \
+        (videoGameWindow.height - screenShotHeight) // 2
     right, bottom = left + screenShotWidth, top + screenShotHeight
 
     region = (left, top, right, bottom)
@@ -78,7 +81,8 @@ def main():
     sTime = time.time()
 
     # Loading Yolo5 Small AI Model
-    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
+                           pretrained=True, force_reload=True)
     stride, names, pt = model.stride, model.names, model.pt
 
     model.half()
@@ -93,7 +97,7 @@ def main():
 
             # Getting Frame
             npImg = np.array(camera.get_latest_frame())
-            
+
             # Normalizing Data
             im = torch.from_numpy(npImg).to('cuda')
             im = torch.movedim(im, 2, 0)
@@ -106,22 +110,25 @@ def main():
             results = model(im, size=screenShotHeight)
 
             # Suppressing results that dont meet thresholds
-            pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
+            pred = non_max_suppression(
+                results, confidence, confidence, 0, False, max_det=10)
 
             # Converting output to usable cords
             targets = []
             for i, det in enumerate(pred):
                 s = ""
-                gn = torch.tensor(im.shape)[[0, 0, 0, 0]] 
+                gn = torch.tensor(im.shape)[[0, 0, 0, 0]]
                 if len(det):
                     for c in det[:, -1].unique():
                         n = (det[:, -1] == c).sum()  # detections per class
                         s += f"{n} {names[int(c)]}, "  # add to string
 
                     for *xyxy, conf, cls in reversed(det):
-                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist())  # normalized xywh
+                        targets.append((xyxy2xywh(torch.tensor(xyxy).view(
+                            1, 4)) / gn).view(-1).tolist())  # normalized xywh
 
-            targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
+            targets = pd.DataFrame(
+                targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
 
             # If there are people in the center bounding box
             if len(targets) > 0:
@@ -130,7 +137,8 @@ def main():
                     targets['last_mid_x'] = last_mid_coord[0]
                     targets['last_mid_y'] = last_mid_coord[1]
                     # Take distance between current person mid coordinate and last person mid coordinate
-                    targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
+                    targets['dist'] = np.linalg.norm(
+                        targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
                     targets.sort_values(by="dist", ascending=False)
 
                 # Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
@@ -147,7 +155,8 @@ def main():
 
                 # Moving the mouse
                 if win32api.GetKeyState(0x14):
-                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
+                    win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
+                        mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
                 last_mid_coord = [xMid, yMid]
 
             else:
@@ -161,16 +170,17 @@ def main():
                     halfH = round(targets["height"][i] / 2)
                     midX = targets['current_mid_x'][i]
                     midY = targets['current_mid_y'][i]
-                    (startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
+                    (startX, startY, endX, endY) = int(
+                        midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
 
                     idx = 0
                     # draw the bounding box and label on the frame
                     label = "{}: {:.2f}%".format("Human", confidence * 100)
                     cv2.rectangle(npImg, (startX, startY), (endX, endY),
-                        COLORS[idx], 2)
+                                  COLORS[idx], 2)
                     y = startY - 15 if startY - 15 > 15 else startY + 15
                     cv2.putText(npImg, label, (startX, y),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 
             # Forced garbage cleanup every second
             count += 1
@@ -190,5 +200,6 @@ def main():
                     exit()
     camera.stop()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/models/__pycache__/__init__.cpython-39.pyc b/models/__pycache__/__init__.cpython-39.pyc
index f65bc00..230c9e6 100644
Binary files a/models/__pycache__/__init__.cpython-39.pyc and b/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/__pycache__/common.cpython-310.pyc b/models/__pycache__/common.cpython-310.pyc
index 6f5ff24..95d2011 100644
Binary files a/models/__pycache__/common.cpython-310.pyc and b/models/__pycache__/common.cpython-310.pyc differ
diff --git a/models/__pycache__/common.cpython-39.pyc b/models/__pycache__/common.cpython-39.pyc
index 1c3aa15..3cf3602 100644
Binary files a/models/__pycache__/common.cpython-39.pyc and b/models/__pycache__/common.cpython-39.pyc differ
diff --git a/models/__pycache__/experimental.cpython-310.pyc b/models/__pycache__/experimental.cpython-310.pyc
index 43d91f4..72783d9 100644
Binary files a/models/__pycache__/experimental.cpython-310.pyc and b/models/__pycache__/experimental.cpython-310.pyc differ
diff --git a/models/__pycache__/yolo.cpython-310.pyc b/models/__pycache__/yolo.cpython-310.pyc
index 0be9fe1..1a94ae2 100644
Binary files a/models/__pycache__/yolo.cpython-310.pyc and b/models/__pycache__/yolo.cpython-310.pyc differ
diff --git a/models/common.py b/models/common.py
index 9ac7e0f..833ece2 100644
--- a/models/common.py
+++ b/models/common.py
@@ -10,6 +10,7 @@ import warnings
 from collections import OrderedDict, namedtuple
 from copy import copy
 from pathlib import Path
+from urllib.parse import urlparse
 
 import cv2
 import numpy as np
@@ -22,26 +23,51 @@ from torch.cuda import amp
 
 from utils.dataloaders import exif_transpose, letterbox
 from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
-                           increment_path, make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh,
+                           increment_path, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh,
                            yaml_load)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import copy_attr, smart_inference_mode
 
 
-def autopad(k, p=None):  # kernel, padding
-    # Pad to 'same'
+def export_formats():
+    # YOLOv5 export formats
+    x = [
+        ['PyTorch', '-', '.pt', True, True],
+        ['TorchScript', 'torchscript', '.torchscript', True, True],
+        ['ONNX', 'onnx', '.onnx', True, True],
+        ['OpenVINO', 'openvino', '_openvino_model', True, False],
+        ['TensorRT', 'engine', '.engine', False, True],
+        ['CoreML', 'coreml', '.mlmodel', True, False],
+        ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
+        ['TensorFlow GraphDef', 'pb', '.pb', True, True],
+        ['TensorFlow Lite', 'tflite', '.tflite', True, False],
+        ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
+        ['TensorFlow.js', 'tfjs', '_web_model', False, False],
+        ['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]
+    return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
+
+
+def autopad(k, p=None, d=1):  # kernel, padding, dilation
+    # Pad to 'same' shape outputs
+    if d > 1:
+        k = d * (k - 1) + 1 if isinstance(k,
+                                          int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
     if p is None:
         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
     return p
 
 
 class Conv(nn.Module):
-    # Standard convolution
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
         super().__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(
+            k, p, d), groups=g, dilation=d, bias=False)
         self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+        self.act = self.default_act if act is True else act if isinstance(
+            act, nn.Module) else nn.Identity()
 
     def forward(self, x):
         return self.act(self.bn(self.conv(x)))
@@ -51,14 +77,16 @@ class Conv(nn.Module):
 
 
 class DWConv(Conv):
-    # Depth-wise convolution class
-    def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+    # Depth-wise convolution
+    # ch_in, ch_out, kernel, stride, dilation, activation
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
+        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
 
 
 class DWConvTranspose2d(nn.ConvTranspose2d):
-    # Depth-wise transpose convolution class
-    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
+    # Depth-wise transpose convolution
+    # ch_in, ch_out, kernel, stride, padding, padding_out
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
         super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
 
 
@@ -87,7 +115,8 @@ class TransformerBlock(nn.Module):
         if c1 != c2:
             self.conv = Conv(c1, c2)
         self.linear = nn.Linear(c2, c2)  # learnable position embedding
-        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
+        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads)
+                                for _ in range(num_layers)))
         self.c2 = c2
 
     def forward(self, x):
@@ -100,7 +129,8 @@ class TransformerBlock(nn.Module):
 
 class Bottleneck(nn.Module):
     # Standard bottleneck
-    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+    # ch_in, ch_out, shortcut, groups, expansion
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -113,7 +143,8 @@ class Bottleneck(nn.Module):
 
 class BottleneckCSP(nn.Module):
     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    # ch_in, ch_out, number, shortcut, groups, expansion
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -122,7 +153,8 @@ class BottleneckCSP(nn.Module):
         self.cv4 = Conv(2 * c_, c2, 1, 1)
         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
         self.act = nn.SiLU()
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(
+            *(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
 
     def forward(self, x):
         y1 = self.cv3(self.m(self.cv1(x)))
@@ -146,13 +178,15 @@ class CrossConv(nn.Module):
 
 class C3(nn.Module):
     # CSP Bottleneck with 3 convolutions
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    # ch_in, ch_out, number, shortcut, groups, expansion
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c1, c_, 1, 1)
         self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(
+            *(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
 
     def forward(self, x):
         return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
@@ -163,7 +197,8 @@ class C3x(C3):
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         super().__init__(c1, c2, n, shortcut, g, e)
         c_ = int(c2 * e)
-        self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
+        self.m = nn.Sequential(
+            *(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
 
 
 class C3TR(C3):
@@ -197,12 +232,14 @@ class SPP(nn.Module):
         c_ = c1 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
-        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+        self.m = nn.ModuleList(
+            [nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 
     def forward(self, x):
         x = self.cv1(x)
         with warnings.catch_warnings():
-            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            # suppress torch 1.9.0 max_pool2d() warning
+            warnings.simplefilter('ignore')
             return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
 
 
@@ -218,7 +255,8 @@ class SPPF(nn.Module):
     def forward(self, x):
         x = self.cv1(x)
         with warnings.catch_warnings():
-            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            # suppress torch 1.9.0 max_pool2d() warning
+            warnings.simplefilter('ignore')
             y1 = self.m(x)
             y2 = self.m(y1)
             return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
@@ -226,9 +264,10 @@ class SPPF(nn.Module):
 
 class Focus(nn.Module):
     # Focus wh information into c-space
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    # ch_in, ch_out, kernel, stride, padding, groups
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
         super().__init__()
-        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
         # self.contract = Contract(gain=2)
 
     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
@@ -238,11 +277,12 @@ class Focus(nn.Module):
 
 class GhostConv(nn.Module):
     # Ghost Convolution https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+    # ch_in, ch_out, kernel, stride, groups
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
         super().__init__()
         c_ = c2 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, k, s, None, g, act)
-        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
 
     def forward(self, x):
         y = self.cv1(x)
@@ -310,7 +350,7 @@ class DetectMultiBackend(nn.Module):
         #   PyTorch:              weights = *.pt
         #   TorchScript:                    *.torchscript
         #   ONNX Runtime:                   *.onnx
-        #   ONNX OpenCV DNN:                *.onnx with --dnn
+        #   ONNX OpenCV DNN:                *.onnx --dnn
         #   OpenVINO:                       *.xml
         #   CoreML:                         *.mlmodel
         #   TensorRT:                       *.engine
@@ -318,25 +358,35 @@ class DetectMultiBackend(nn.Module):
         #   TensorFlow GraphDef:            *.pb
         #   TensorFlow Lite:                *.tflite
         #   TensorFlow Edge TPU:            *_edgetpu.tflite
-        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
+        #   PaddlePaddle:                   *_paddle_model
+        # scoped to avoid circular import
+        from models.experimental import attempt_download, attempt_load
 
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
-        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self._model_type(w)  # get backend
-        w = attempt_download(w)  # download if not local
+        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(
+            w)
         fp16 &= pt or jit or onnx or engine  # FP16
+        # BHWC formats (vs torch BCWH)
+        nhwc = coreml or saved_model or pb or tflite or edgetpu
         stride = 32  # default stride
+        cuda = torch.cuda.is_available() and device.type != 'cpu'  # use CUDA
+        if not (pt or triton):
+            w = attempt_download(w)  # download if not local
 
         if pt:  # PyTorch
-            model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
+            model = attempt_load(weights if isinstance(
+                weights, list) else w, device=device, inplace=True, fuse=fuse)
             stride = max(int(model.stride.max()), 32)  # model stride
-            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            names = model.module.names if hasattr(
+                model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
         elif jit:  # TorchScript
             LOGGER.info(f'Loading {w} for TorchScript inference...')
             extra_files = {'config.txt': ''}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files)
+            model = torch.jit.load(
+                w, _extra_files=extra_files, map_location=device)
             model.half() if fp16 else model.float()
             if extra_files['config.txt']:  # load metadata dict
                 d = json.loads(extra_files['config.txt'],
@@ -345,14 +395,15 @@ class DetectMultiBackend(nn.Module):
                 stride, names = int(d['stride']), d['names']
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
-            check_requirements(('opencv-python>=4.5.4',))
+            check_requirements('opencv-python>=4.5.4')
             net = cv2.dnn.readNetFromONNX(w)
         elif onnx:  # ONNX Runtime
             LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
-            cuda = torch.cuda.is_available() and device.type != 'cpu'
-            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
+            check_requirements(
+                ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
             import onnxruntime
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else [
+                'CPUExecutionProvider']
             session = onnxruntime.InferenceSession(w, providers=providers)
             output_names = [x.name for x in session.get_outputs()]
             meta = session.get_modelmeta().custom_metadata_map  # metadata
@@ -360,100 +411,149 @@ class DetectMultiBackend(nn.Module):
                 stride, names = int(meta['stride']), eval(meta['names'])
         elif xml:  # OpenVINO
             LOGGER.info(f'Loading {w} for OpenVINO inference...')
-            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            check_requirements('openvino')
             from openvino.runtime import Core, Layout, get_batch
             ie = Core()
             if not Path(w).is_file():  # if not *.xml
-                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
-            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
+                # get *.xml file from *_openvino_model dir
+                w = next(Path(w).glob('*.xml'))
+            network = ie.read_model(
+                model=w, weights=Path(w).with_suffix('.bin'))
             if network.get_parameters()[0].get_layout().empty:
                 network.get_parameters()[0].set_layout(Layout("NCHW"))
             batch_dim = get_batch(network)
             if batch_dim.is_static:
                 batch_size = batch_dim.get_length()
-            executable_network = ie.compile_model(network, device_name="CPU")  # device_name="MYRIAD" for Intel NCS2
-            output_layer = next(iter(executable_network.outputs))
-            stride, names = self._load_metadata(Path(w).with_suffix('.yaml'))  # load metadata
+            # device_name="MYRIAD" for Intel NCS2
+            executable_network = ie.compile_model(network, device_name="CPU")
+            stride, names = self._load_metadata(
+                Path(w).with_suffix('.yaml'))  # load metadata
         elif engine:  # TensorRT
             LOGGER.info(f'Loading {w} for TensorRT inference...')
             import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
-            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
+            # require tensorrt>=7.0.0
+            check_version(trt.__version__, '7.0.0', hard=True)
             if device.type == 'cpu':
                 device = torch.device('cuda:0')
-            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            Binding = namedtuple(
+                'Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
             logger = trt.Logger(trt.Logger.INFO)
             with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
                 model = runtime.deserialize_cuda_engine(f.read())
             context = model.create_execution_context()
             bindings = OrderedDict()
+            output_names = []
             fp16 = False  # default updated below
             dynamic = False
-            for index in range(model.num_bindings):
-                name = model.get_binding_name(index)
-                dtype = trt.nptype(model.get_binding_dtype(index))
-                if model.binding_is_input(index):
-                    if -1 in tuple(model.get_binding_shape(index)):  # dynamic
+            for i in range(model.num_bindings):
+                name = model.get_binding_name(i)
+                dtype = trt.nptype(model.get_binding_dtype(i))
+                if model.binding_is_input(i):
+                    if -1 in tuple(model.get_binding_shape(i)):  # dynamic
                         dynamic = True
-                        context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
+                        context.set_binding_shape(
+                            i, tuple(model.get_profile_shape(0, i)[2]))
                     if dtype == np.float16:
                         fp16 = True
-                shape = tuple(context.get_binding_shape(index))
+                else:  # output
+                    output_names.append(name)
+                shape = tuple(context.get_binding_shape(i))
                 im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
-                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
-            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
-            batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+                bindings[name] = Binding(
+                    name, dtype, shape, im, int(im.data_ptr()))
+            binding_addrs = OrderedDict((n, d.ptr)
+                                        for n, d in bindings.items())
+            # if dynamic, this is instead max batch size
+            batch_size = bindings['images'].shape[0]
         elif coreml:  # CoreML
             LOGGER.info(f'Loading {w} for CoreML inference...')
             import coremltools as ct
             model = ct.models.MLModel(w)
-        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-            if saved_model:  # SavedModel
-                LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
-                import tensorflow as tf
-                keras = False  # assume TF1 saved_model
-                model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
-            elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-                LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
-                import tensorflow as tf
+        elif saved_model:  # TF SavedModel
+            LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
+            import tensorflow as tf
+            keras = False  # assume TF1 saved_model
+            model = tf.keras.models.load_model(
+                w) if keras else tf.saved_model.load(w)
+        elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+            LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
+            import tensorflow as tf
 
-                def wrap_frozen_graph(gd, inputs, outputs):
-                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
-                    ge = x.graph.as_graph_element
-                    return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
+            def wrap_frozen_graph(gd, inputs, outputs):
+                x = tf.compat.v1.wrap_function(
+                    lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
+                ge = x.graph.as_graph_element
+                return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
 
-                gd = tf.Graph().as_graph_def()  # graph_def
-                with open(w, 'rb') as f:
-                    gd.ParseFromString(f.read())
-                frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
-            elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
-                    from tflite_runtime.interpreter import Interpreter, load_delegate
-                except ImportError:
-                    import tensorflow as tf
-                    Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
-                if edgetpu:  # Edge TPU https://coral.ai/software/#edgetpu-runtime
-                    LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
-                    delegate = {
-                        'Linux': 'libedgetpu.so.1',
-                        'Darwin': 'libedgetpu.1.dylib',
-                        'Windows': 'edgetpu.dll'}[platform.system()]
-                    interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
-                else:  # Lite
-                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
-                    interpreter = Interpreter(model_path=w)  # load TFLite model
-                interpreter.allocate_tensors()  # allocate
-                input_details = interpreter.get_input_details()  # inputs
-                output_details = interpreter.get_output_details()  # outputs
-            elif tfjs:
-                raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
-            else:
-                raise NotImplementedError(f'ERROR: {w} is not a supported format')
+            def gd_outputs(gd):
+                name_list, input_list = [], []
+                for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
+                    name_list.append(node.name)
+                    input_list.extend(node.input)
+                return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
+
+            gd = tf.Graph().as_graph_def()  # TF GraphDef
+            with open(w, 'rb') as f:
+                gd.ParseFromString(f.read())
+            frozen_func = wrap_frozen_graph(
+                gd, inputs="x:0", outputs=gd_outputs(gd))
+        elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
+                from tflite_runtime.interpreter import Interpreter, load_delegate
+            except ImportError:
+                import tensorflow as tf
+                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
+            if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
+                LOGGER.info(
+                    f'Loading {w} for TensorFlow Lite Edge TPU inference...')
+                delegate = {
+                    'Linux': 'libedgetpu.so.1',
+                    'Darwin': 'libedgetpu.1.dylib',
+                    'Windows': 'edgetpu.dll'}[platform.system()]
+                interpreter = Interpreter(model_path=w, experimental_delegates=[
+                                          load_delegate(delegate)])
+            else:  # TFLite
+                LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
+                interpreter = Interpreter(model_path=w)  # load TFLite model
+            interpreter.allocate_tensors()  # allocate
+            input_details = interpreter.get_input_details()  # inputs
+            output_details = interpreter.get_output_details()  # outputs
+        elif tfjs:  # TF.js
+            raise NotImplementedError(
+                'ERROR: YOLOv5 TF.js inference is not supported')
+        elif paddle:  # PaddlePaddle
+            LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
+            check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
+            import paddle.inference as pdi
+            if not Path(w).is_file():  # if not *.pdmodel
+                # get *.xml file from *_openvino_model dir
+                w = next(Path(w).rglob('*.pdmodel'))
+            weights = Path(w).with_suffix('.pdiparams')
+            config = pdi.Config(str(w), str(weights))
+            if cuda:
+                config.enable_use_gpu(
+                    memory_pool_init_size_mb=2048, device_id=0)
+            predictor = pdi.create_predictor(config)
+            input_handle = predictor.get_input_handle(
+                predictor.get_input_names()[0])
+            output_names = predictor.get_output_names()
+        elif triton:  # NVIDIA Triton Inference Server
+            LOGGER.info(f'Using {w} as Triton Inference Server...')
+            check_requirements('tritonclient[all]')
+            from utils.triton import TritonRemoteModel
+            model = TritonRemoteModel(url=w)
+            nhwc = model.runtime.startswith("tensorflow")
+        else:
+            raise NotImplementedError(f'ERROR: {w} is not a supported format')
 
         # class names
         if 'names' not in locals():
-            names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
+            names = yaml_load(data)['names'] if data else {
+                i: f'class{i}' for i in range(999)}
         if names[0] == 'n01440764' and len(names) == 1000:  # ImageNet
-            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']  # human-readable names
+            # human-readable names
+            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']
 
         self.__dict__.update(locals())  # assign all variables to self
 
@@ -462,9 +562,13 @@ class DetectMultiBackend(nn.Module):
         b, ch, h, w = im.shape  # batch, channel, height, width
         if self.fp16 and im.dtype != torch.float16:
             im = im.half()  # to FP16
+        if self.nhwc:
+            # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = im.permute(0, 2, 3, 1)
 
         if self.pt:  # PyTorch
-            y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
+            y = self.model(
+                im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
         elif self.jit:  # TorchScript
             y = self.model(im)
         elif self.dnn:  # ONNX OpenCV DNN
@@ -473,52 +577,77 @@ class DetectMultiBackend(nn.Module):
             y = self.net.forward()
         elif self.onnx:  # ONNX Runtime
             im = im.cpu().numpy()  # torch to numpy
-            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+            y = self.session.run(self.output_names, {
+                                 self.session.get_inputs()[0].name: im})
         elif self.xml:  # OpenVINO
             im = im.cpu().numpy()  # FP32
-            y = self.executable_network([im])[self.output_layer]
+            y = list(self.executable_network([im]).values())
         elif self.engine:  # TensorRT
             if self.dynamic and im.shape != self.bindings['images'].shape:
-                i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output'))
-                self.context.set_binding_shape(i_in, im.shape)  # reshape if dynamic
-                self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
-                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
+                i = self.model.get_binding_index('images')
+                self.context.set_binding_shape(
+                    i, im.shape)  # reshape if dynamic
+                self.bindings['images'] = self.bindings['images']._replace(
+                    shape=im.shape)
+                for name in self.output_names:
+                    i = self.model.get_binding_index(name)
+                    self.bindings[name].data.resize_(
+                        tuple(self.context.get_binding_shape(i)))
             s = self.bindings['images'].shape
             assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
             self.binding_addrs['images'] = int(im.data_ptr())
             self.context.execute_v2(list(self.binding_addrs.values()))
-            y = self.bindings['output'].data
+            y = [self.bindings[x].data for x in sorted(self.output_names)]
         elif self.coreml:  # CoreML
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = im.cpu().numpy()
             im = Image.fromarray((im[0] * 255).astype('uint8'))
             # im = im.resize((192, 320), Image.ANTIALIAS)
-            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            # coordinates are xywh normalized
+            y = self.model.predict({'image': im})
             if 'confidence' in y:
-                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
-                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
-                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
+                box = xywh2xyxy(y['coordinates'] *
+                                [[w, h, w, h]])  # xyxy pixels
+                conf, cls = y['confidence'].max(
+                    1), y['confidence'].argmax(1).astype(np.float)
+                y = np.concatenate(
+                    (box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
             else:
-                k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1])  # output key
-                y = y[k]  # output
+                # reversed for segmentation models (pred, proto)
+                y = list(reversed(y.values()))
+        elif self.paddle:  # PaddlePaddle
+            im = im.cpu().numpy().astype(np.float32)
+            self.input_handle.copy_from_cpu(im)
+            self.predictor.run()
+            y = [self.predictor.get_output_handle(
+                x).copy_to_cpu() for x in self.output_names]
+        elif self.triton:  # NVIDIA Triton Inference Server
+            y = self.model(im)
         else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = im.cpu().numpy()
             if self.saved_model:  # SavedModel
-                y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
+                y = self.model(
+                    im, training=False) if self.keras else self.model(im)
             elif self.pb:  # GraphDef
-                y = self.frozen_func(x=self.tf.constant(im)).numpy()
+                y = self.frozen_func(x=self.tf.constant(im))
             else:  # Lite or Edge TPU
-                input, output = self.input_details[0], self.output_details[0]
-                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                input = self.input_details[0]
+                # is TFLite quantized uint8 model
+                int8 = input['dtype'] == np.uint8
                 if int8:
                     scale, zero_point = input['quantization']
                     im = (im / scale + zero_point).astype(np.uint8)  # de-scale
                 self.interpreter.set_tensor(input['index'], im)
                 self.interpreter.invoke()
-                y = self.interpreter.get_tensor(output['index'])
-                if int8:
-                    scale, zero_point = output['quantization']
-                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
-            y[..., :4] *= [w, h, w, h]  # xywh normalized to pixels
+                y = []
+                for output in self.output_details:
+                    x = self.interpreter.get_tensor(output['index'])
+                    if int8:
+                        scale, zero_point = output['quantization']
+                        x = (x.astype(np.float32) - zero_point) * \
+                            scale  # re-scale
+                    y.append(x)
+            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
+            y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels
 
         if isinstance(y, (list, tuple)):
             return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
@@ -530,23 +659,27 @@ class DetectMultiBackend(nn.Module):
 
     def warmup(self, imgsz=(1, 3, 640, 640)):
         # Warmup model by running inference once
-        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
-        if any(warmup_types) and self.device.type != 'cpu':
-            im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
+        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
+        if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
+            im = torch.empty(
+                *imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
             for _ in range(2 if self.jit else 1):  #
                 self.forward(im)  # warmup
 
     @staticmethod
     def _model_type(p='path/to/model.pt'):
         # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
-        from yolov5.export import export_formats
-        suffixes = list(export_formats().Suffix) + ['.xml']  # export suffixes
-        check_suffix(p, suffixes)  # checks
-        p = Path(p).name  # eliminate trailing separators
-        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
-        xml |= xml2  # *_openvino_model or *.xml
-        tflite &= not edgetpu  # *.tflite
-        return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
+        # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
+        from utils.downloads import is_url
+        sf = list(export_formats().Suffix)  # export suffixes
+        if not is_url(p, check=False):
+            check_suffix(p, sf)  # checks
+        url = urlparse(p)  # if url may be Triton inference server
+        types = [s in Path(p).name for s in sf]
+        types[8] &= not types[9]  # tflite &= not edgetpu
+        triton = not any(types) and all(
+            [any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
+        return types + [triton]
 
     @staticmethod
     def _load_metadata(f=Path('path/to/meta.yaml')):
@@ -563,7 +696,8 @@ class AutoShape(nn.Module):
     iou = 0.45  # NMS IoU threshold
     agnostic = False  # NMS class-agnostic
     multi_label = False  # NMS multiple labels per box
-    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    classes = None
     max_det = 1000  # maximum number of detections per image
     amp = False  # Automatic Mixed Precision (AMP) inference
 
@@ -571,19 +705,24 @@ class AutoShape(nn.Module):
         super().__init__()
         if verbose:
             LOGGER.info('Adding AutoShape... ')
-        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
-        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
+        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names',
+                  'stride', 'abc'), exclude=())  # copy attributes
+        # DetectMultiBackend() instance
+        self.dmb = isinstance(model, DetectMultiBackend)
         self.pt = not self.dmb or model.pt  # PyTorch model
         self.model = model.eval()
         if self.pt:
-            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            # Detect()
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
             m.inplace = False  # Detect.inplace=False for safe multithread inference
+            m.export = True  # do not output loss values
 
     def _apply(self, fn):
         # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
         self = super()._apply(fn)
         if self.pt:
-            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            # Detect()
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
             m.stride = fn(m.stride)
             m.grid = list(map(fn, m.grid))
             if isinstance(m.anchor_grid, list):
@@ -605,40 +744,52 @@ class AutoShape(nn.Module):
         with dt[0]:
             if isinstance(size, int):  # expand
                 size = (size, size)
-            p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # param
-            autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
+            p = next(self.model.parameters()) if self.pt else torch.empty(
+                1, device=self.model.device)  # param
+            # Automatic Mixed Precision (AMP) inference
+            autocast = self.amp and (p.device.type != 'cpu')
             if isinstance(ims, torch.Tensor):  # torch
                 with amp.autocast(autocast):
-                    return self.model(ims.to(p.device).type_as(p), augment, profile)  # inference
+                    # inference
+                    return self.model(ims.to(p.device).type_as(p), augment=augment)
 
             # Pre-process
-            n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
+            n, ims = (len(ims), list(ims)) if isinstance(
+                ims, (list, tuple)) else (1, [ims])  # number, list of images
             shape0, shape1, files = [], [], []  # image and inference shapes, filenames
             for i, im in enumerate(ims):
                 f = f'image{i}'  # filename
                 if isinstance(im, (str, Path)):  # filename or uri
-                    im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
+                    im, f = Image.open(requests.get(im, stream=True).raw if str(
+                        im).startswith('http') else im), im
                     im = np.asarray(exif_transpose(im))
                 elif isinstance(im, Image.Image):  # PIL Image
-                    im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
+                    im, f = np.asarray(exif_transpose(im)), getattr(
+                        im, 'filename', f) or f
                 files.append(Path(f).with_suffix('.jpg').name)
                 if im.shape[0] < 5:  # image in CHW
-                    im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
-                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
+                    # reverse dataloader .transpose(2, 0, 1)
+                    im = im.transpose((1, 2, 0))
+                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(
+                    im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
                 s = im.shape[:2]  # HWC
                 shape0.append(s)  # image shape
                 g = max(size) / max(s)  # gain
                 shape1.append([y * g for y in s])
-                ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
-            shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size  # inf shape
+                ims[i] = im if im.data.contiguous else np.ascontiguousarray(
+                    im)  # update
+            shape1 = [make_divisible(x, self.stride) for x in np.array(
+                shape1).max(0)] if self.pt else size  # inf shape
             x = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad
-            x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHW
-            x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
+            x = np.ascontiguousarray(np.array(x).transpose(
+                (0, 3, 1, 2)))  # stack and BHWC to BCHW
+            x = torch.from_numpy(x).to(p.device).type_as(
+                p) / 255  # uint8 to fp16/32
 
         with amp.autocast(autocast):
             # Inference
             with dt[1]:
-                y = self.model(x, augment, profile)  # forward
+                y = self.model(x, augment=augment)  # forward
 
             # Post-process
             with dt[2]:
@@ -650,7 +801,7 @@ class AutoShape(nn.Module):
                                         self.multi_label,
                                         max_det=self.max_det)  # NMS
                 for i in range(n):
-                    scale_coords(shape1, y[i][:, :4], shape0[i])
+                    scale_boxes(shape1, y[i][:, :4], shape0[i])
 
             return Detections(ims, y, files, dt, self.names, x.shape)
 
@@ -660,7 +811,8 @@ class Detections:
     def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
         super().__init__()
         d = pred[0].device  # device
-        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizations
+        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d)
+              for im in ims]  # normalizations
         self.ims = ims  # list of images as numpy arrays
         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
         self.names = names  # class names
@@ -672,22 +824,28 @@ class Detections:
         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
         self.n = len(self.pred)  # number of images (batch size)
         self.t = tuple(x.t / self.n * 1E3 for x in times)  # timestamps (ms)
-        self.s = shape  # inference BCHW shape
+        self.s = tuple(shape)  # inference BCHW shape
 
-    def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
-        crops = []
+    def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
+        s, crops = '', []
         for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
-            s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
+            # string
+            s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
             if pred.shape[0]:
                 for c in pred[:, -1].unique():
                     n = (pred[:, -1] == c).sum()  # detections per class
-                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                    # add to string
+                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
+                s = s.rstrip(', ')
                 if show or save or render or crop:
                     annotator = Annotator(im, example=str(self.names))
-                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
+                    # xyxy, confidence, class
+                    for *box, conf, cls in reversed(pred):
                         label = f'{self.names[int(cls)]} {conf:.2f}'
                         if crop:
-                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
+                            file = save_dir / 'crops' / \
+                                self.names[int(cls)] / \
+                                self.files[i] if save else None
                             crops.append({
                                 'box': box,
                                 'conf': conf,
@@ -695,45 +853,48 @@ class Detections:
                                 'label': label,
                                 'im': save_one_box(box, im, file=file, save=save)})
                         else:  # all others
-                            annotator.box_label(box, label if labels else '', color=colors(cls))
+                            annotator.box_label(
+                                box, label if labels else '', color=colors(cls))
                     im = annotator.im
             else:
                 s += '(no detections)'
 
-            im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
-            if pprint:
-                print(s.rstrip(', '))
+            im = Image.fromarray(im.astype(np.uint8)) if isinstance(
+                im, np.ndarray) else im  # from np
             if show:
                 im.show(self.files[i])  # show
             if save:
                 f = self.files[i]
                 im.save(save_dir / f)  # save
                 if i == self.n - 1:
-                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
+                    LOGGER.info(
+                        f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
             if render:
                 self.ims[i] = np.asarray(im)
+        if pprint:
+            s = s.lstrip('\n')
+            return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
         if crop:
             if save:
                 LOGGER.info(f'Saved results to {save_dir}\n')
             return crops
 
-    def print(self):
-        self.display(pprint=True)  # print results
-        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
-
     def show(self, labels=True):
-        self.display(show=True, labels=labels)  # show results
+        self._run(show=True, labels=labels)  # show results
 
     def save(self, labels=True, save_dir='runs/detect/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
-        self.display(save=True, labels=labels, save_dir=save_dir)  # save results
+        save_dir = increment_path(
+            save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
+        self._run(save=True, labels=labels, save_dir=save_dir)  # save results
 
     def crop(self, save=True, save_dir='runs/detect/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
-        return self.display(crop=True, save=save, save_dir=save_dir)  # crop results
+        save_dir = increment_path(
+            save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
+        # crop results
+        return self._run(crop=True, save=save, save_dir=save_dir)
 
     def render(self, labels=True):
-        self.display(render=True, labels=labels)  # render results
+        self._run(render=True, labels=labels)  # render results
         return self.ims
 
     def pandas(self):
@@ -742,30 +903,51 @@ class Detections:
         ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
         cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
         for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
-            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
+            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]]
+                  for x in x.tolist()] for x in getattr(self, k)]  # update
             setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
         return new
 
     def tolist(self):
         # return a list of Detections objects, i.e. 'for result in results.tolist():'
         r = range(self.n)  # iterable
-        x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
+        x = [Detections([self.ims[i]], [self.pred[i]], [
+                        self.files[i]], self.times, self.names, self.s) for i in r]
         # for d in x:
         #    for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
         #        setattr(d, k, getattr(d, k)[0])  # pop out of list
         return x
 
-    def __len__(self):
-        return self.n  # override len(results)
+    def print(self):
+        LOGGER.info(self.__str__())
 
-    def __str__(self):
-        self.print()  # override print(results)
-        return ''
+    def __len__(self):  # override len(results)
+        return self.n
+
+    def __str__(self):  # override print(results)
+        return self._run(pprint=True)  # print results
+
+    def __repr__(self):
+        return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
+
+
+class Proto(nn.Module):
+    # YOLOv5 mask Proto module for segmentation models
+    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
+        super().__init__()
+        self.cv1 = Conv(c1, c_, k=3)
+        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = Conv(c_, c_, k=3)
+        self.cv3 = Conv(c_, c2)
+
+    def forward(self, x):
+        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
 
 
 class Classify(nn.Module):
-    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    # ch_in, ch_out, kernel, stride, padding, groups
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
         super().__init__()
         c_ = 1280  # efficientnet_b0 size
         self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
diff --git a/models/hub/yolov5s-LeakyReLU.yaml b/models/hub/yolov5s-LeakyReLU.yaml
new file mode 100644
index 0000000..3a179bf
--- /dev/null
+++ b/models/hub/yolov5s-LeakyReLU.yaml
@@ -0,0 +1,49 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+activation: nn.LeakyReLU(0.1)  # <----- Conv() activation used throughout entire YOLOv5 model
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5l-seg.yaml b/models/segment/yolov5l-seg.yaml
new file mode 100644
index 0000000..4782de1
--- /dev/null
+++ b/models/segment/yolov5l-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5m-seg.yaml b/models/segment/yolov5m-seg.yaml
new file mode 100644
index 0000000..f73d199
--- /dev/null
+++ b/models/segment/yolov5m-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
\ No newline at end of file
diff --git a/models/segment/yolov5n-seg.yaml b/models/segment/yolov5n-seg.yaml
new file mode 100644
index 0000000..c28225a
--- /dev/null
+++ b/models/segment/yolov5n-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml
new file mode 100644
index 0000000..7cbdb36
--- /dev/null
+++ b/models/segment/yolov5s-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.5  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
\ No newline at end of file
diff --git a/models/segment/yolov5x-seg.yaml b/models/segment/yolov5x-seg.yaml
new file mode 100644
index 0000000..5d0c452
--- /dev/null
+++ b/models/segment/yolov5x-seg.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
diff --git a/models/tf.py b/models/tf.py
index ecb0d4d..1446d88 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -30,7 +30,7 @@ from tensorflow import keras
 from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
                            DWConvTranspose2d, Focus, autopad)
 from models.experimental import MixConv2d, attempt_load
-from models.yolo import Detect
+from models.yolo import Detect, Segment
 from utils.activations import SiLU
 from utils.general import LOGGER, make_divisible, print_args
 
@@ -299,18 +299,18 @@ class TFDetect(keras.layers.Layer):
             x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
 
             if not self.training:  # inference
-                y = tf.sigmoid(x[i])
+                y = x[i]
                 grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
                 anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
-                xy = (y[..., 0:2] * 2 + grid) * self.stride[i]  # xy
-                wh = y[..., 2:4] ** 2 * anchor_grid
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
                 # Normalize xywh to 0-1 to reduce calibration error
                 xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                 wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
-                y = tf.concat([xy, wh, y[..., 4:]], -1)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
                 z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
 
-        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x)
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
 
     @staticmethod
     def _make_grid(nx=20, ny=20):
@@ -320,6 +320,37 @@ class TFDetect(keras.layers.Layer):
         return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
 
 
+class TFSegment(TFDetect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+
+    def call(self, x):
+        p = self.proto(x[0])
+        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p)
+
+
+class TFProto(keras.layers.Layer):
+
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
+
+    def call(self, inputs):
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
+
+
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()
     def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
@@ -377,10 +408,12 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
             args = [ch[f]]
         elif m is Concat:
             c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
-        elif m is Detect:
+        elif m in [Detect, Segment]:
             args.append([ch[x + 1] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
             args.append(imgsz)
         else:
             c2 = ch[f]
@@ -452,9 +485,9 @@ class TFModel:
                                                             iou_thres,
                                                             conf_thres,
                                                             clip_boxes=False)
-            return nms, x[1]
-        return x[0]  # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
-        # x = x[0][0]  # [x(1,6300,85), ...] to x(6300,85)
+            return (nms,)
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
         # xywh = x[..., :4]  # x(6300,4) boxes
         # conf = x[..., 4:5]  # x(6300,1) confidences
         # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
diff --git a/models/yolo.py b/models/yolo.py
index fa05fcf..ed21c06 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -36,6 +36,7 @@ except ImportError:
 
 
 class Detect(nn.Module):
+    # YOLOv5 Detect head for detection models
     stride = None  # strides computed during build
     dynamic = False  # force grid reconstruction
     export = False  # export mode
@@ -46,8 +47,8 @@ class Detect(nn.Module):
         self.no = nc + 5  # number of outputs per anchor
         self.nl = len(anchors)  # number of detection layers
         self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid = [torch.empty(1)] * self.nl  # init grid
-        self.anchor_grid = [torch.empty(1)] * self.nl  # init anchor grid
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
         self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
         self.inplace = inplace  # use inplace ops (e.g. slice assignment)
@@ -63,16 +64,17 @@ class Detect(nn.Module):
                 if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                     self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
 
-                y = x[i].sigmoid()
-                if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i]  # xy
-                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                if isinstance(self, Segment):  # (boxes + masks)
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
+                else:  # Detect (boxes only)
+                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
                     xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
                     wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
                     y = torch.cat((xy, wh, conf), 4)
-                z.append(y.view(bs, -1, self.no))
+                z.append(y.view(bs, self.na * nx * ny, self.no))
 
         return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
 
@@ -87,6 +89,23 @@ class Detect(nn.Module):
         return grid, anchor_grid
 
 
+class Segment(Detect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
+        super().__init__(nc, anchors, ch, inplace)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
+
+    def forward(self, x):
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
+
+
 class BaseModel(nn.Module):
     # YOLOv5 base model
     def forward(self, x, profile=False, visualize=False):
@@ -135,7 +154,7 @@ class BaseModel(nn.Module):
         # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
         self = super()._apply(fn)
         m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
             m.stride = fn(m.stride)
             m.grid = list(map(fn, m.grid))
             if isinstance(m.anchor_grid, list):
@@ -169,11 +188,12 @@ class DetectionModel(BaseModel):
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
-        if isinstance(m, Detect):
+        if isinstance(m, (Detect, Segment)):
             s = 256  # 2x min stride
             m.inplace = self.inplace
-            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))])  # forward
-            check_anchor_order(m)  # must be in pixel-space (not grid-space)
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
+            check_anchor_order(m)
             m.anchors /= m.stride.view(-1, 1, 1)
             self.stride = m.stride
             self._initialize_biases()  # only run once
@@ -235,15 +255,21 @@ class DetectionModel(BaseModel):
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
         m = self.model[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
-            b = mi.bias.view(m.na, -1).detach()  # conv.bias(255) to (3,85)
-            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
 
 Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
 
 
+class SegmentationModel(DetectionModel):
+    # YOLOv5 segmentation model
+    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
+        super().__init__(cfg, ch, nc, anchors)
+
+
 class ClassificationModel(BaseModel):
     # YOLOv5 classification model
     def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
@@ -271,8 +297,12 @@ class ClassificationModel(BaseModel):
 
 
 def parse_model(d, ch):  # model_dict, input_channels(3)
+    # Parse a YOLOv5 model.yaml dictionary
     LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
-    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
 
@@ -284,24 +314,28 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
 
         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
-                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
+        if m in {
+                Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output
                 c2 = make_divisible(c2 * gw, 8)
 
             args = [c1, c2, *args[1:]]
-            if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
                 args.insert(2, n)  # number of repeats
                 n = 1
         elif m is nn.BatchNorm2d:
             args = [ch[f]]
         elif m is Concat:
             c2 = sum(ch[x] for x in f)
-        elif m is Detect:
+        # TODO: channel, gw, gd
+        elif m in {Detect, Segment}:
             args.append([ch[x] for x in f])
             if isinstance(args[1], int):  # number of anchors
                 args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
         elif m is Contract:
             c2 = ch[f] * args[0] ** 2
         elif m is Expand:
diff --git a/utils/__init__.py b/utils/__init__.py
index c534e39..24746d2 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -4,9 +4,15 @@ utils/initialization
 """
 
 import contextlib
+import platform
 import threading
 
 
+def emojis(str=''):
+    # Return platform-dependent emoji-safe version of string
+    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
+
+
 class TryExcept(contextlib.ContextDecorator):
     # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
     def __init__(self, msg=''):
@@ -17,7 +23,7 @@ class TryExcept(contextlib.ContextDecorator):
 
     def __exit__(self, exc_type, value, traceback):
         if value:
-            print(f'{self.msg}{value}')
+            print(emojis(f'{self.msg}{value}'))
         return True
 
 
@@ -38,7 +44,7 @@ def notebook_init(verbose=True):
     import os
     import shutil
 
-    from utils.general import check_font, check_requirements, emojis, is_colab
+    from utils.general import check_font, check_requirements, is_colab
     from utils.torch_utils import select_device  # imports
 
     check_requirements(('psutil', 'IPython'))
diff --git a/utils/__pycache__/__init__.cpython-310.pyc b/utils/__pycache__/__init__.cpython-310.pyc
index 01c6a2c..ba6c3d5 100644
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/utils/__pycache__/__init__.cpython-39.pyc b/utils/__pycache__/__init__.cpython-39.pyc
index c224a73..ec5d71f 100644
Binary files a/utils/__pycache__/__init__.cpython-39.pyc and b/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/utils/__pycache__/augmentations.cpython-310.pyc b/utils/__pycache__/augmentations.cpython-310.pyc
index 446a465..eb6e497 100644
Binary files a/utils/__pycache__/augmentations.cpython-310.pyc and b/utils/__pycache__/augmentations.cpython-310.pyc differ
diff --git a/utils/__pycache__/augmentations.cpython-39.pyc b/utils/__pycache__/augmentations.cpython-39.pyc
index 84002b6..8ebec04 100644
Binary files a/utils/__pycache__/augmentations.cpython-39.pyc and b/utils/__pycache__/augmentations.cpython-39.pyc differ
diff --git a/utils/__pycache__/autoanchor.cpython-310.pyc b/utils/__pycache__/autoanchor.cpython-310.pyc
index d30b81c..d606b9e 100644
Binary files a/utils/__pycache__/autoanchor.cpython-310.pyc and b/utils/__pycache__/autoanchor.cpython-310.pyc differ
diff --git a/utils/__pycache__/autoanchor.cpython-39.pyc b/utils/__pycache__/autoanchor.cpython-39.pyc
index 3cd03c7..4c1a947 100644
Binary files a/utils/__pycache__/autoanchor.cpython-39.pyc and b/utils/__pycache__/autoanchor.cpython-39.pyc differ
diff --git a/utils/__pycache__/dataloaders.cpython-310.pyc b/utils/__pycache__/dataloaders.cpython-310.pyc
index da3ec19..9285b99 100644
Binary files a/utils/__pycache__/dataloaders.cpython-310.pyc and b/utils/__pycache__/dataloaders.cpython-310.pyc differ
diff --git a/utils/__pycache__/dataloaders.cpython-39.pyc b/utils/__pycache__/dataloaders.cpython-39.pyc
index af0308a..bb28764 100644
Binary files a/utils/__pycache__/dataloaders.cpython-39.pyc and b/utils/__pycache__/dataloaders.cpython-39.pyc differ
diff --git a/utils/__pycache__/downloads.cpython-310.pyc b/utils/__pycache__/downloads.cpython-310.pyc
index 786f4d9..ce5ff93 100644
Binary files a/utils/__pycache__/downloads.cpython-310.pyc and b/utils/__pycache__/downloads.cpython-310.pyc differ
diff --git a/utils/__pycache__/downloads.cpython-39.pyc b/utils/__pycache__/downloads.cpython-39.pyc
index 6f887e4..699f0ad 100644
Binary files a/utils/__pycache__/downloads.cpython-39.pyc and b/utils/__pycache__/downloads.cpython-39.pyc differ
diff --git a/utils/__pycache__/general.cpython-310.pyc b/utils/__pycache__/general.cpython-310.pyc
index 5d43584..010e088 100644
Binary files a/utils/__pycache__/general.cpython-310.pyc and b/utils/__pycache__/general.cpython-310.pyc differ
diff --git a/utils/__pycache__/general.cpython-39.pyc b/utils/__pycache__/general.cpython-39.pyc
index 6371f70..f096d60 100644
Binary files a/utils/__pycache__/general.cpython-39.pyc and b/utils/__pycache__/general.cpython-39.pyc differ
diff --git a/utils/__pycache__/metrics.cpython-310.pyc b/utils/__pycache__/metrics.cpython-310.pyc
index cb615cf..c24832e 100644
Binary files a/utils/__pycache__/metrics.cpython-310.pyc and b/utils/__pycache__/metrics.cpython-310.pyc differ
diff --git a/utils/__pycache__/metrics.cpython-39.pyc b/utils/__pycache__/metrics.cpython-39.pyc
index 68ae29c..9d80778 100644
Binary files a/utils/__pycache__/metrics.cpython-39.pyc and b/utils/__pycache__/metrics.cpython-39.pyc differ
diff --git a/utils/__pycache__/plots.cpython-310.pyc b/utils/__pycache__/plots.cpython-310.pyc
index 0e47a8e..b7b7670 100644
Binary files a/utils/__pycache__/plots.cpython-310.pyc and b/utils/__pycache__/plots.cpython-310.pyc differ
diff --git a/utils/__pycache__/plots.cpython-39.pyc b/utils/__pycache__/plots.cpython-39.pyc
index 4316f9d..77cf6c1 100644
Binary files a/utils/__pycache__/plots.cpython-39.pyc and b/utils/__pycache__/plots.cpython-39.pyc differ
diff --git a/utils/__pycache__/torch_utils.cpython-310.pyc b/utils/__pycache__/torch_utils.cpython-310.pyc
index d110637..c9e1ca1 100644
Binary files a/utils/__pycache__/torch_utils.cpython-310.pyc and b/utils/__pycache__/torch_utils.cpython-310.pyc differ
diff --git a/utils/__pycache__/torch_utils.cpython-39.pyc b/utils/__pycache__/torch_utils.cpython-39.pyc
index 6a688b2..3239699 100644
Binary files a/utils/__pycache__/torch_utils.cpython-39.pyc and b/utils/__pycache__/torch_utils.cpython-39.pyc differ
diff --git a/utils/augmentations.py b/utils/augmentations.py
index a558735..7c8e0bc 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -12,7 +12,7 @@ import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF
 
-from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box
+from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
 from utils.metrics import bbox_ioa
 
 IMAGENET_MEAN = 0.485, 0.456, 0.406  # RGB mean
@@ -21,7 +21,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
 
 class Albumentations:
     # YOLOv5 Albumentations class (optional, only used if package is installed)
-    def __init__(self):
+    def __init__(self, size=640):
         self.transform = None
         prefix = colorstr('albumentations: ')
         try:
@@ -29,6 +29,7 @@ class Albumentations:
             check_version(A.__version__, '1.0.3', hard=True)  # version requirement
 
             T = [
+                A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
                 A.Blur(p=0.01),
                 A.MedianBlur(p=0.01),
                 A.ToGray(p=0.01),
@@ -281,7 +282,7 @@ def cutout(im, labels, p=0.5):
             # return unobscured labels
             if len(labels) and s > 0.03:
                 box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
-                ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+                ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))  # intersection over area
                 labels = labels[ioa < 0.60]  # remove >60% obscured labels
 
     return labels
@@ -303,15 +304,17 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
     return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
 
 
-def classify_albumentations(augment=True,
-                            size=224,
-                            scale=(0.08, 1.0),
-                            hflip=0.5,
-                            vflip=0.0,
-                            jitter=0.4,
-                            mean=IMAGENET_MEAN,
-                            std=IMAGENET_STD,
-                            auto_aug=False):
+def classify_albumentations(
+        augment=True,
+        size=224,
+        scale=(0.08, 1.0),
+        ratio=(0.75, 1.0 / 0.75),  # 0.75, 1.33
+        hflip=0.5,
+        vflip=0.0,
+        jitter=0.4,
+        mean=IMAGENET_MEAN,
+        std=IMAGENET_STD,
+        auto_aug=False):
     # YOLOv5 classification Albumentations (optional, only used if package is installed)
     prefix = colorstr('albumentations: ')
     try:
@@ -319,7 +322,7 @@ def classify_albumentations(augment=True,
         from albumentations.pytorch import ToTensorV2
         check_version(A.__version__, '1.0.3', hard=True)  # version requirement
         if augment:  # Resize and crop
-            T = [A.RandomResizedCrop(height=size, width=size, scale=scale)]
+            T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
             if auto_aug:
                 # TODO: implement AugMix, AutoAug & RandAug in albumentation
                 LOGGER.info(f'{prefix}auto augmentations are currently not supported')
@@ -338,7 +341,7 @@ def classify_albumentations(augment=True,
         return A.Compose(T)
 
     except ImportError:  # package not installed, skip
-        pass
+        LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
     except Exception as e:
         LOGGER.info(f'{prefix}{e}')
 
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 0b49ab3..7e7e998 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -122,7 +122,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
     # Filter
     i = (wh0 < 3.0).any(1).sum()
     if i:
-        LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size')
+        LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
     wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32)  # filter > 2 pixels
     # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
 
@@ -134,7 +134,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
         k = kmeans(wh / s, n, iter=30)[0] * s  # points
         assert n == len(k)  # kmeans may return fewer points than requested if wh is insufficient or too similar
     except Exception:
-        LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init')
+        LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
         k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
     wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
     k = print_results(k, verbose=False)
diff --git a/utils/autobatch.py b/utils/autobatch.py
index 641b055..bdeb91c 100644
--- a/utils/autobatch.py
+++ b/utils/autobatch.py
@@ -19,7 +19,7 @@ def check_train_batch_size(model, imgsz=640, amp=True):
 
 
 def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
-    # Automatically estimate best batch size to use `fraction` of available CUDA memory
+    # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
     # Usage:
     #     import torch
     #     from utils.autobatch import autobatch
@@ -33,6 +33,9 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
     if device.type == 'cpu':
         LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
         return batch_size
+    if torch.backends.cudnn.benchmark:
+        LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
+        return batch_size
 
     # Inspect CUDA memory
     gb = 1 << 30  # bytes to GiB (1024 ** 3)
@@ -62,8 +65,8 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
             b = batch_sizes[max(i - 1, 0)]  # select prior safe point
     if b < 1 or b > 1024:  # b outside of safe range
         b = batch_size
-        LOGGER.warning(f'{prefix}WARNING: ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
+        LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
 
-    fraction = np.polyval(p, b) / t  # actual fraction predicted
+    fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
     LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
     return b
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index c1ad1f1..6cd1da6 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -40,6 +40,7 @@ IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp',
 VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
 BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}'  # tqdm bar format
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv('RANK', -1))
 PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true'  # global pin_memory for dataloaders
 
 # Get orientation exif tag
@@ -116,7 +117,7 @@ def create_dataloader(path,
                       prefix='',
                       shuffle=False):
     if rect and shuffle:
-        LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
         shuffle = False
     with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
         dataset = LoadImagesAndLabels(
@@ -139,7 +140,7 @@ def create_dataloader(path,
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
     generator = torch.Generator()
-    generator.manual_seed(0)
+    generator.manual_seed(6148914691236517205 + RANK)
     return loader(dataset,
                   batch_size=batch_size,
                   shuffle=shuffle and sampler is None,
@@ -185,6 +186,55 @@ class _RepeatSampler:
             yield from iter(self.sampler)
 
 
+class LoadScreenshots:
+    # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
+    def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
+        # source = [screen_number left top width height] (pixels)
+        check_requirements('mss')
+        import mss
+
+        source, *params = source.split()
+        self.screen, left, top, width, height = 0, None, None, None, None  # default to full screen 0
+        if len(params) == 1:
+            self.screen = int(params[0])
+        elif len(params) == 4:
+            left, top, width, height = (int(x) for x in params)
+        elif len(params) == 5:
+            self.screen, left, top, width, height = (int(x) for x in params)
+        self.img_size = img_size
+        self.stride = stride
+        self.transforms = transforms
+        self.auto = auto
+        self.mode = 'stream'
+        self.frame = 0
+        self.sct = mss.mss()
+
+        # Parse monitor shape
+        monitor = self.sct.monitors[self.screen]
+        self.top = monitor["top"] if top is None else (monitor["top"] + top)
+        self.left = monitor["left"] if left is None else (monitor["left"] + left)
+        self.width = width or monitor["width"]
+        self.height = height or monitor["height"]
+        self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        # mss screen capture: get raw pixels from the screen as np array
+        im0 = np.array(self.sct.grab(self.monitor))[:, :, :3]  # [:, :, :3] BGRA to BGR
+        s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
+
+        if self.transforms:
+            im = self.transforms(im0)  # transforms
+        else:
+            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
+            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+            im = np.ascontiguousarray(im)  # contiguous
+        self.frame += 1
+        return str(self.screen), im, im0, None, s  # screen, img, original img, im0s, s
+
+
 class LoadImages:
     # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
     def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
@@ -232,8 +282,9 @@ class LoadImages:
         if self.video_flag[self.count]:
             # Read video
             self.mode = 'video'
-            ret_val, im0 = self.cap.read()
-            self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1))  # read at vid_stride
+            for _ in range(self.vid_stride):
+                self.cap.grab()
+            ret_val, im0 = self.cap.retrieve()
             while not ret_val:
                 self.count += 1
                 self.cap.release()
@@ -328,7 +379,7 @@ class LoadStreams:
         self.auto = auto and self.rect
         self.transforms = transforms  # optional
         if not self.rect:
-            LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
+            LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
 
     def update(self, i, cap, stream):
         # Read stream `i` frames in daemon thread
@@ -341,7 +392,7 @@ class LoadStreams:
                 if success:
                     self.imgs[i] = im
                 else:
-                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
+                    LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
                     self.imgs[i] = np.zeros_like(self.imgs[i])
                     cap.open(stream)  # re-open stream if signal was lost
             time.sleep(0.0)  # wait time
@@ -403,7 +454,7 @@ class LoadImagesAndLabels(Dataset):
         self.mosaic_border = [-img_size // 2, -img_size // 2]
         self.stride = stride
         self.path = path
-        self.albumentations = Albumentations() if augment else None
+        self.albumentations = Albumentations(size=img_size) if augment else None
 
         try:
             f = []  # image files
@@ -455,7 +506,7 @@ class LoadImagesAndLabels(Dataset):
         self.im_files = list(cache.keys())  # update
         self.label_files = img2label_paths(cache.keys())  # update
         n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(int)  # batch index
         nb = bi[-1] + 1  # number of batches
         self.batch = bi  # batch index of image
         self.n = n
@@ -484,6 +535,7 @@ class LoadImagesAndLabels(Dataset):
             self.im_files = [self.im_files[i] for i in irect]
             self.label_files = [self.label_files[i] for i in irect]
             self.labels = [self.labels[i] for i in irect]
+            self.segments = [self.segments[i] for i in irect]
             self.shapes = s[irect]  # wh
             ar = ar[irect]
 
@@ -497,7 +549,7 @@ class LoadImagesAndLabels(Dataset):
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
 
         # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
         self.ims = [None] * n
@@ -542,7 +594,7 @@ class LoadImagesAndLabels(Dataset):
         if msgs:
             LOGGER.info('\n'.join(msgs))
         if nf == 0:
-            LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}')
+            LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
         x['hash'] = get_hash(self.label_files + self.im_files)
         x['results'] = nf, nm, ne, nc, len(self.im_files)
         x['msgs'] = msgs  # warnings
@@ -552,7 +604,7 @@ class LoadImagesAndLabels(Dataset):
             path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
             LOGGER.info(f'{prefix}New cache created: {path}')
         except Exception as e:
-            LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # not writeable
+            LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}')  # not writeable
         return x
 
     def __len__(self):
@@ -867,7 +919,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'):  # from utils.dataloaders impo
                     b = x[1:] * [w, h, w, h]  # box
                     # b[2:] = b[2:].max()  # rectangle to square
                     b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
 
                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
@@ -916,7 +968,7 @@ def verify_image_label(args):
                 f.seek(-2, 2)
                 if f.read() != b'\xff\xd9':  # corrupt JPEG
                     ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
-                    msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
 
         # verify labels
         if os.path.isfile(lb_file):
@@ -938,7 +990,7 @@ def verify_image_label(args):
                     lb = lb[i]  # remove duplicates
                     if segments:
                         segments = [segments[x] for x in i]
-                    msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'
+                    msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
             else:
                 ne = 1  # label empty
                 lb = np.zeros((0, 5), dtype=np.float32)
@@ -948,7 +1000,7 @@ def verify_image_label(args):
         return im_file, lb, shape, segments, nm, nf, ne, nc, msg
     except Exception as e:
         nc = 1
-        msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
+        msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
         return [None, None, None, None, nm, nf, ne, nc, msg]
 
 
@@ -1011,7 +1063,7 @@ class HUBDatasetStats():
                 im = im.resize((int(im.width * r), int(im.height * r)))
             im.save(f_new, 'JPEG', quality=50, optimize=True)  # save
         except Exception as e:  # use OpenCV
-            print(f'WARNING: HUB ops PIL failure {f}: {e}')
+            LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
             im = cv2.imread(f)
             im_height, im_width = im.shape[:2]
             r = max_dim / max(im_height, im_width)  # ratio
@@ -1118,7 +1170,7 @@ def create_classification_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     generator = torch.Generator()
-    generator.manual_seed(0)
+    generator.manual_seed(6148914691236517205 + RANK)
     return InfiniteDataLoader(dataset,
                               batch_size=batch_size,
                               shuffle=shuffle and sampler is None,
diff --git a/utils/docker/Dockerfile b/utils/docker/Dockerfile
index 4b9367c..764ee27 100644
--- a/utils/docker/Dockerfile
+++ b/utils/docker/Dockerfile
@@ -3,7 +3,7 @@
 # Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
 
 # Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
-FROM nvcr.io/nvidia/pytorch:22.07-py3
+FROM nvcr.io/nvidia/pytorch:22.08-py3
 RUN rm -rf /opt/pytorch  # remove 1.2GB dir
 
 # Downloads to user config dir
diff --git a/utils/downloads.py b/utils/downloads.py
index dd2698f..73b8334 100644
--- a/utils/downloads.py
+++ b/utils/downloads.py
@@ -16,13 +16,13 @@ import requests
 import torch
 
 
-def is_url(url, check_online=True):
-    # Check if online file exists
+def is_url(url, check=True):
+    # Check if string is URL and check if URL exists
     try:
         url = str(url)
         result = urllib.parse.urlparse(url)
         assert all([result.scheme, result.netloc, result.path])  # check if is url
-        return (urllib.request.urlopen(url).getcode() == 200) if check_online else True  # check if exists online
+        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
     except (AssertionError, urllib.request.HTTPError):
         return False
 
@@ -87,9 +87,7 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
             return file
 
         # GitHub assets
-        assets = [
-            'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt',
-            'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
+        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
         try:
             tag, assets = github_assets(repo, release)
         except Exception:
@@ -107,7 +105,6 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
             safe_download(
                 file,
                 url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
-                url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}',  # backup url (optional)
                 min_bytes=1E5,
                 error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
 
diff --git a/utils/general.py b/utils/general.py
index cae63fd..de7871c 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -17,6 +17,7 @@ import signal
 import sys
 import time
 import urllib
+from copy import deepcopy
 from datetime import datetime
 from itertools import repeat
 from multiprocessing.pool import ThreadPool
@@ -33,7 +34,7 @@ import torch
 import torchvision
 import yaml
 
-from utils import TryExcept
+from utils import TryExcept, emojis
 from utils.downloads import gsutil_getsize
 from utils.metrics import box_iou, fitness
 
@@ -42,8 +43,8 @@ ROOT = FILE.parents[1]  # YOLOv5 root directory
 RANK = int(os.getenv('RANK', -1))
 
 # Settings
-DATASETS_DIR = ROOT.parent / 'datasets'  # YOLOv5 datasets directory
 NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiprocessing threads
+DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets'))  # global datasets directory
 AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true'  # global auto-install mode
 VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true'  # global verbose mode
 FONT = 'Arial.ttf'  # https://ultralytics.com/assets/Arial.ttf
@@ -222,7 +223,7 @@ def init_seeds(seed=0, deterministic=False):
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)  # for Multi-GPU, exception safe
-    torch.backends.cudnn.benchmark = True  # for faster training
+    # torch.backends.cudnn.benchmark = True  # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
     if deterministic and check_version(torch.__version__, '1.12.0'):  # https://github.com/ultralytics/yolov5/pull/8213
         torch.use_deterministic_algorithms(True)
         torch.backends.cudnn.deterministic = True
@@ -247,11 +248,6 @@ def get_latest_run(search_dir='.'):
     return max(last_list, key=os.path.getctime) if last_list else ''
 
 
-def emojis(str=''):
-    # Return platform-dependent emoji-safe version of string
-    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
-
-
 def file_age(path=__file__):
     # Return days since last file update
     dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime))  # delta
@@ -332,7 +328,7 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
     # Check version vs. required version
     current, minimum = (pkg.parse_version(x) for x in (current, minimum))
     result = (current == minimum) if pinned else (current >= minimum)  # bool
-    s = f'WARNING: ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed'  # string
+    s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed'  # string
     if hard:
         assert result, emojis(s)  # assert min requirements met
     if verbose and not result:
@@ -341,40 +337,38 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
 
 
 @TryExcept()
-def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()):
-    # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages)
+def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
+    # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
     prefix = colorstr('red', 'bold', 'requirements:')
     check_python()  # check python version
-    if isinstance(requirements, (str, Path)):  # requirements.txt file
-        file = Path(requirements)
-        assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
+    if isinstance(requirements, Path):  # requirements.txt file
+        file = requirements.resolve()
+        assert file.exists(), f"{prefix} {file} not found, check failed."
         with file.open() as f:
             requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
-    else:  # list or tuple of packages
-        requirements = [x for x in requirements if x not in exclude]
+    elif isinstance(requirements, str):
+        requirements = [requirements]
 
-    n = 0  # number of packages updates
-    for i, r in enumerate(requirements):
+    s = ''
+    n = 0
+    for r in requirements:
         try:
             pkg.require(r)
-        except Exception:  # DistributionNotFound or VersionConflict if requirements not met
-            s = f"{prefix} {r} not found and is required by YOLOv5"
-            if install and AUTOINSTALL:  # check environment variable
-                LOGGER.info(f"{s}, attempting auto-update...")
-                try:
-                    assert check_online(), f"'pip install {r}' skipped (offline)"
-                    LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
-                    n += 1
-                except Exception as e:
-                    LOGGER.warning(f'{prefix} {e}')
-            else:
-                LOGGER.info(f'{s}. Please install and rerun your command.')
+        except (pkg.VersionConflict, pkg.DistributionNotFound):  # exception if requirements not met
+            s += f'"{r}" '
+            n += 1
 
-    if n:  # if packages updated
-        source = file.resolve() if 'file' in locals() else requirements
-        s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
-            f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
-        LOGGER.info(s)
+    if s and install and AUTOINSTALL:  # check environment variable
+        LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
+        try:
+            assert check_online(), "AutoUpdate skipped (offline)"
+            LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
+            source = file if 'file' in locals() else requirements
+            s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
+                f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
+            LOGGER.info(s)
+        except Exception as e:
+            LOGGER.warning(f'{prefix} ❌ {e}')
 
 
 def check_img_size(imgsz, s=32, floor=0):
@@ -385,7 +379,7 @@ def check_img_size(imgsz, s=32, floor=0):
         imgsz = list(imgsz)  # convert to list if tuple
         new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
     if new_size != imgsz:
-        LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
+        LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
     return new_size
 
 
@@ -400,7 +394,7 @@ def check_imshow():
         cv2.waitKey(1)
         return True
     except Exception as e:
-        LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
+        LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
         return False
 
 
@@ -470,8 +464,7 @@ def check_dataset(data, autodownload=True):
 
     # Read yaml (optional)
     if isinstance(data, (str, Path)):
-        with open(data, errors='ignore') as f:
-            data = yaml.safe_load(f)  # dictionary
+        data = yaml_load(data)  # dictionary
 
     # Checks
     for k in 'train', 'val', 'names':
@@ -486,7 +479,13 @@ def check_dataset(data, autodownload=True):
         path = (ROOT / path).resolve()
     for k in 'train', 'val', 'test':
         if data.get(k):  # prepend path
-            data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
+            if isinstance(data[k], str):
+                x = (path / data[k]).resolve()
+                if not x.exists() and data[k].startswith('../'):
+                    x = (path / data[k][3:]).resolve()
+                data[k] = str(x)
+            else:
+                data[k] = [str((path / x).resolve()) for x in data[k]]
 
     # Parse yaml
     train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
@@ -497,13 +496,12 @@ def check_dataset(data, autodownload=True):
             if not s or not autodownload:
                 raise Exception('Dataset not found ❌')
             t = time.time()
-            root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
             if s.startswith('http') and s.endswith('.zip'):  # URL
                 f = Path(s).name  # filename
                 LOGGER.info(f'Downloading {s} to {f}...')
                 torch.hub.download_url_to_file(s, f)
-                Path(root).mkdir(parents=True, exist_ok=True)  # create root
-                ZipFile(f).extractall(path=root)  # unzip
+                Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
+                ZipFile(f).extractall(path=DATASETS_DIR)  # unzip
                 Path(f).unlink()  # remove zip
                 r = None  # success
             elif s.startswith('bash '):  # bash script
@@ -512,7 +510,7 @@ def check_dataset(data, autodownload=True):
             else:  # python script
                 r = exec(s, {'yaml': data})  # return None
             dt = f'({round(time.time() - t, 1)}s)'
-            s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
+            s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
             LOGGER.info(f"Dataset download {s}")
     check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True)  # download fonts
     return data  # dictionary
@@ -537,7 +535,7 @@ def check_amp(model):
     f = ROOT / 'data' / 'images' / 'bus.jpg'  # image to check
     im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
     try:
-        assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
+        assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
         LOGGER.info(f'{prefix}checks passed ✅')
         return True
     except Exception:
@@ -569,10 +567,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
     def download_one(url, dir):
         # Download 1 file
         success = True
-        f = dir / Path(url).name  # filename
-        if Path(url).is_file():  # exists in current path
-            Path(url).rename(f)  # move to dir
-        elif not f.exists():
+        if Path(url).is_file():
+            f = Path(url)  # filename
+        else:  # does not exist
+            f = dir / Path(url).name
             LOGGER.info(f'Downloading {url} to {f}...')
             for i in range(retry + 1):
                 if curl:
@@ -586,9 +584,9 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
                 if success:
                     break
                 elif i < retry:
-                    LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...')
+                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
                 else:
-                    LOGGER.warning(f'Failed to download {url}...')
+                    LOGGER.warning(f'❌ Failed to download {url}...')
 
         if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
             LOGGER.info(f'Unzipping {f}...')
@@ -727,7 +725,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
     if clip:
-        clip_coords(x, (h - eps, w - eps))  # warning: inplace clip
+        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
     y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
@@ -771,7 +769,23 @@ def resample_segments(segments, n=1000):
     return segments
 
 
-def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
+    # Rescale boxes (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+
+    boxes[:, [0, 2]] -= pad[0]  # x padding
+    boxes[:, [1, 3]] -= pad[1]  # y padding
+    boxes[:, :4] /= gain
+    clip_boxes(boxes, img0_shape)
+    return boxes
+
+
+def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None):
     # Rescale coords (xyxy) from img1_shape to img0_shape
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
@@ -780,15 +794,15 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
         gain = ratio_pad[0][0]
         pad = ratio_pad[1]
 
-    coords[:, [0, 2]] -= pad[0]  # x padding
-    coords[:, [1, 3]] -= pad[1]  # y padding
-    coords[:, :4] /= gain
-    clip_coords(coords, img0_shape)
-    return coords
+    segments[:, 0] -= pad[0]  # x padding
+    segments[:, 1] -= pad[1]  # y padding
+    segments /= gain
+    clip_segments(segments, img0_shape)
+    return segments
 
 
-def clip_coords(boxes, shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
     if isinstance(boxes, torch.Tensor):  # faster individually
         boxes[:, 0].clamp_(0, shape[1])  # x1
         boxes[:, 1].clamp_(0, shape[0])  # y1
@@ -799,15 +813,28 @@ def clip_coords(boxes, shape):
         boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
 
 
-def non_max_suppression(prediction,
-                        conf_thres=0.25,
-                        iou_thres=0.45,
-                        classes=None,
-                        agnostic=False,
-                        multi_label=False,
-                        labels=(),
-                        max_det=300):
-    """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
+def clip_segments(boxes, shape):
+    # Clip segments (xy1,xy2,...) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[:, 0].clamp_(0, shape[1])  # x
+        boxes[:, 1].clamp_(0, shape[0])  # y
+    else:  # np.array (faster grouped)
+        boxes[:, 0] = boxes[:, 0].clip(0, shape[1])  # x
+        boxes[:, 1] = boxes[:, 1].clip(0, shape[0])  # y
+
+
+def non_max_suppression(
+        prediction,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic=False,
+        multi_label=False,
+        labels=(),
+        max_det=300,
+        nm=0,  # number of masks
+):
+    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
 
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
@@ -817,7 +844,7 @@ def non_max_suppression(prediction,
         prediction = prediction[0]  # select only inference output
 
     bs = prediction.shape[0]  # batch size
-    nc = prediction.shape[2] - 5  # number of classes
+    nc = prediction.shape[2] - nm - 5  # number of classes
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Checks
@@ -828,13 +855,14 @@ def non_max_suppression(prediction,
     # min_wh = 2  # (pixels) minimum box width and height
     max_wh = 7680  # (pixels) maximum box width and height
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    time_limit = 0.3 + 0.03 * bs  # seconds to quit after
+    time_limit = 0.5 + 0.05 * bs  # seconds to quit after
     redundant = True  # require redundant detections
     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
     merge = False  # use merge-NMS
 
     t = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * bs
+    mi = 5 + nc  # mask start index
+    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -843,7 +871,7 @@ def non_max_suppression(prediction,
         # Cat apriori labels if autolabelling
         if labels and len(labels[xi]):
             lb = labels[xi]
-            v = torch.zeros((len(lb), nc + 5), device=x.device)
+            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
             v[:, :4] = lb[:, 1:5]  # box
             v[:, 4] = 1.0  # conf
             v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
@@ -856,16 +884,17 @@ def non_max_suppression(prediction,
         # Compute conf
         x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
 
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
+        # Box/Mask
+        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
+        mask = x[:, mi:]  # zero columns if no masks
 
         # Detections matrix nx6 (xyxy, conf, cls)
         if multi_label:
-            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+            i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
         else:  # best class only
-            conf, j = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            conf, j = x[:, 5:mi].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
 
         # Filter by class
         if classes is not None:
@@ -881,6 +910,8 @@ def non_max_suppression(prediction,
             continue
         elif n > max_nms:  # excess boxes
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        else:
+            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
@@ -898,7 +929,7 @@ def non_max_suppression(prediction,
 
         output[xi] = x[i]
         if (time.time() - t) > time_limit:
-            LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
+            LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
             break  # time limit exceeded
 
     return output
@@ -975,7 +1006,7 @@ def apply_classifier(x, model, img, im0):
             d[:, :4] = xywh2xyxy(b).long()
 
             # Rescale boxes from img_size to im0 size
-            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
+            scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
 
             # Classes
             pred_cls1 = d[:, 5].long()
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 3aee358..941d09e 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -11,13 +11,13 @@ import pkg_resources as pkg
 import torch
 from torch.utils.tensorboard import SummaryWriter
 
-from utils.general import colorstr, cv2
+from utils.general import LOGGER, colorstr, cv2
 from utils.loggers.clearml.clearml_utils import ClearmlLogger
 from utils.loggers.wandb.wandb_utils import WandbLogger
 from utils.plots import plot_images, plot_labels, plot_results
 from utils.torch_utils import de_parallel
 
-LOGGERS = ('csv', 'tb', 'wandb', 'clearml')  # *.csv, TensorBoard, Weights & Biases, ClearML
+LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet')  # *.csv, TensorBoard, Weights & Biases, ClearML
 RANK = int(os.getenv('RANK', -1))
 
 try:
@@ -41,6 +41,18 @@ try:
 except (ImportError, AssertionError):
     clearml = None
 
+try:
+    if RANK not in [0, -1]:
+        comet_ml = None
+    else:
+        import comet_ml
+
+        assert hasattr(comet_ml, '__version__')  # verify package import not local dir
+        from utils.loggers.comet import CometLogger
+
+except (ModuleNotFoundError, ImportError, AssertionError):
+    comet_ml = None
+
 
 class Loggers():
     # YOLOv5 Loggers class
@@ -80,7 +92,10 @@ class Loggers():
             prefix = colorstr('ClearML: ')
             s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
             self.logger.info(s)
-
+        if not comet_ml:
+            prefix = colorstr('Comet: ')
+            s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
+            self.logger.info(s)
         # TensorBoard
         s = self.save_dir
         if 'tb' in self.include and not self.opt.evolve:
@@ -107,6 +122,18 @@ class Loggers():
         else:
             self.clearml = None
 
+        # Comet
+        if comet_ml and 'comet' in self.include:
+            if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
+                run_id = self.opt.resume.split("/")[-1]
+                self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
+
+            else:
+                self.comet_logger = CometLogger(self.opt, self.hyp)
+
+        else:
+            self.comet_logger = None
+
     @property
     def remote_dataset(self):
         # Get data_dict if custom dataset artifact link is provided
@@ -115,12 +142,18 @@ class Loggers():
             data_dict = self.clearml.data_dict
         if self.wandb:
             data_dict = self.wandb.data_dict
+        if self.comet_logger:
+            data_dict = self.comet_logger.data_dict
 
         return data_dict
 
     def on_train_start(self):
-        # Callback runs on train start
-        pass
+        if self.comet_logger:
+            self.comet_logger.on_train_start()
+
+    def on_pretrain_routine_start(self):
+        if self.comet_logger:
+            self.comet_logger.on_pretrain_routine_start()
 
     def on_pretrain_routine_end(self, labels, names):
         # Callback runs on pre-train routine end
@@ -131,8 +164,11 @@ class Loggers():
                 self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
             # if self.clearml:
             #    pass  # ClearML saves these images automatically using hooks
+            if self.comet_logger:
+                self.comet_logger.on_pretrain_routine_end(paths)
 
-    def on_train_batch_end(self, model, ni, imgs, targets, paths):
+    def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
+        log_dict = dict(zip(self.keys[0:3], vals))
         # Callback runs on train batch end
         # ni: number integrated batches (since train start)
         if self.plots:
@@ -148,11 +184,21 @@ class Loggers():
                 if self.clearml:
                     self.clearml.log_debug_samples(files, title='Mosaics')
 
+        if self.comet_logger:
+            self.comet_logger.on_train_batch_end(log_dict, step=ni)
+
     def on_train_epoch_end(self, epoch):
         # Callback runs on train epoch end
         if self.wandb:
             self.wandb.current_epoch = epoch + 1
 
+        if self.comet_logger:
+            self.comet_logger.on_train_epoch_end(epoch)
+
+    def on_val_start(self):
+        if self.comet_logger:
+            self.comet_logger.on_val_start()
+
     def on_val_image_end(self, pred, predn, path, names, im):
         # Callback runs on val image end
         if self.wandb:
@@ -160,7 +206,11 @@ class Loggers():
         if self.clearml:
             self.clearml.log_image_with_boxes(path, pred, names, im)
 
-    def on_val_end(self):
+    def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
+        if self.comet_logger:
+            self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
+
+    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
         # Callback runs on val end
         if self.wandb or self.clearml:
             files = sorted(self.save_dir.glob('val*.jpg'))
@@ -169,6 +219,9 @@ class Loggers():
             if self.clearml:
                 self.clearml.log_debug_samples(files, title='Validation')
 
+        if self.comet_logger:
+            self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
+
     def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
         # Callback runs at the end of each fit (train+val) epoch
         x = dict(zip(self.keys, vals))
@@ -199,6 +252,9 @@ class Loggers():
             self.clearml.current_epoch_logged_images = set()  # reset epoch image limit
             self.clearml.current_epoch += 1
 
+        if self.comet_logger:
+            self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
+
     def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
         # Callback runs on model save event
         if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
@@ -209,6 +265,9 @@ class Loggers():
                                                       model_name='Latest Model',
                                                       auto_delete_file=False)
 
+        if self.comet_logger:
+            self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
+
     def on_train_end(self, last, best, epoch, results):
         # Callback runs on training end, i.e. saving best model
         if self.plots:
@@ -237,10 +296,16 @@ class Loggers():
                                                   name='Best Model',
                                                   auto_delete_file=False)
 
+        if self.comet_logger:
+            final_results = dict(zip(self.keys[3:10], results))
+            self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
+
     def on_params_update(self, params: dict):
         # Update hyperparams or configs of the experiment
         if self.wandb:
             self.wandb.wandb_run.config.update(params, allow_val_change=True)
+        if self.comet_logger:
+            self.comet_logger.on_params_update(params)
 
 
 class GenericLogger:
@@ -328,7 +393,7 @@ def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
             warnings.simplefilter('ignore')  # suppress jit trace warning
             tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
     except Exception as e:
-        print(f'WARNING: TensorBoard graph visualization failure {e}')
+        LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
 
 
 def web_project_name(project):
diff --git a/utils/loggers/clearml/clearml_utils.py b/utils/loggers/clearml/clearml_utils.py
index 1e13690..eb1c12c 100644
--- a/utils/loggers/clearml/clearml_utils.py
+++ b/utils/loggers/clearml/clearml_utils.py
@@ -11,6 +11,7 @@ from utils.plots import Annotator, colors
 try:
     import clearml
     from clearml import Dataset, Task
+
     assert hasattr(clearml, '__version__')  # verify package import not local dir
 except (ImportError, AssertionError):
     clearml = None
diff --git a/utils/loggers/comet/README.md b/utils/loggers/comet/README.md
new file mode 100644
index 0000000..3a51cb9
--- /dev/null
+++ b/utils/loggers/comet/README.md
@@ -0,0 +1,256 @@
+<img src="https://cdn.comet.ml/img/notebook_logo.png">
+
+# YOLOv5 with Comet
+
+This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet)
+
+# About Comet
+
+Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
+
+Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)!
+Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
+
+# Getting Started
+
+## Install Comet
+
+```shell
+pip install comet_ml
+```
+
+## Configure Comet Credentials
+
+There are two ways to configure Comet with YOLOv5.
+
+You can either set your credentials through enviroment variables
+
+**Environment Variables**
+
+```shell
+export COMET_API_KEY=<Your Comet API Key>
+export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
+```
+
+Or create a `.comet.config` file in your working directory and set your credentials there.
+
+**Comet Configuration File**
+
+```
+[comet]
+api_key=<Your Comet API Key>
+project_name=<Your Comet Project Name> # This will default to 'yolov5'
+```
+
+## Run the Training Script
+
+```shell
+# Train YOLOv5s on COCO128 for 5 epochs
+python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
+```
+
+That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
+
+<img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/7529846/187608607-ff89c3d5-1b8b-4743-a974-9275301b0524.png">
+
+# Try out an Example!
+Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
+
+Or better yet, try it out yourself in this Colab Notebook
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
+
+# Log automatically
+
+By default, Comet will log the following items
+
+## Metrics
+- Box Loss, Object Loss, Classification Loss for the training and validation data
+- mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
+- Precision and Recall for the validation data
+
+## Parameters
+
+- Model Hyperparameters
+- All parameters passed through the command line options
+
+## Visualizations
+
+- Confusion Matrix of the model predictions on the validation data
+- Plots for the PR and F1 curves across all classes
+- Correlogram of the Class Labels
+
+# Configure Comet Logging
+
+Comet can be configured to log additional data either through command line flags passed to the training script
+or through environment variables.
+
+```shell
+export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
+export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
+export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
+export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
+export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
+export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
+export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
+export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
+```
+
+## Logging Checkpoints with Comet
+
+Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
+logged checkpoints to Comet based on the interval value provided by `save-period`
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--save-period 1
+```
+
+## Logging Model Predictions
+
+By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
+
+You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
+
+**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
+
+Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
+
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--bbox_interval 2
+```
+
+### Controlling the number of Prediction Images logged to Comet
+
+When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
+
+```shell
+env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--bbox_interval 1
+```
+
+### Logging Class Level Metrics
+
+Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
+
+```shell
+env COMET_LOG_PER_CLASS_METRICS=true python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt
+```
+
+## Uploading a Dataset to Comet Artifacts
+
+If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration), you can do so using the `upload_dataset` flag.
+
+The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data coco128.yaml \
+--weights yolov5s.pt \
+--upload_dataset
+```
+
+You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
+<img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
+
+You can preview the data directly in the Comet UI.
+<img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
+
+Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
+<img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
+
+### Using a saved Artifact
+
+If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
+
+```
+# contents of artifact.yaml file
+path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
+```
+Then pass this file to your training script in the following way
+
+```shell
+python train.py \
+--img 640 \
+--batch 16 \
+--epochs 5 \
+--data artifact.yaml \
+--weights yolov5s.pt
+```
+
+Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
+<img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
+
+## Resuming a Training Run
+
+If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
+
+The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
+
+This will restore the run to its state before the interruption, which includes restoring the  model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
+
+```shell
+python train.py \
+--resume "comet://<your run path>"
+```
+
+## Hyperparameter Search with the Comet Optimizer
+
+YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
+
+### Configuring an Optimizer Sweep
+
+To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
+
+```shell
+python utils/loggers/comet/hpo.py \
+  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
+```
+
+The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
+the script.
+
+```shell
+python utils/loggers/comet/hpo.py \
+  --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
+  --save-period 1 \
+  --bbox_interval 1
+```
+
+### Running a Sweep in Parallel
+
+```shell
+comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
+  utils/loggers/comet/optimizer_config.json"
+```
+
+### Visualizing Results
+
+Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
+
+<img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">
diff --git a/utils/loggers/comet/__init__.py b/utils/loggers/comet/__init__.py
new file mode 100644
index 0000000..ba5cecc
--- /dev/null
+++ b/utils/loggers/comet/__init__.py
@@ -0,0 +1,501 @@
+import glob
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+try:
+    import comet_ml
+
+    # Project Configuration
+    config = comet_ml.config.get_config()
+    COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
+except (ModuleNotFoundError, ImportError):
+    comet_ml = None
+    COMET_PROJECT_NAME = None
+
+import PIL
+import torch
+import torchvision.transforms as T
+import yaml
+
+from utils.dataloaders import img2label_paths
+from utils.general import check_dataset, scale_boxes, xywh2xyxy
+from utils.metrics import box_iou
+
+COMET_PREFIX = "comet://"
+
+COMET_MODE = os.getenv("COMET_MODE", "online")
+
+# Model Saving Settings
+COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
+
+# Dataset Artifact Settings
+COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
+
+# Evaluation Settings
+COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
+COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
+COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100))
+
+# Confusion Matrix Settings
+CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
+IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
+
+# Batch Logging Settings
+COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
+COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1)
+COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1)
+COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
+
+RANK = int(os.getenv("RANK", -1))
+
+to_pil = T.ToPILImage()
+
+
+class CometLogger:
+    """Log metrics, parameters, source code, models and much more
+    with Comet
+    """
+
+    def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
+        self.job_type = job_type
+        self.opt = opt
+        self.hyp = hyp
+
+        # Comet Flags
+        self.comet_mode = COMET_MODE
+
+        self.save_model = opt.save_period > -1
+        self.model_name = COMET_MODEL_NAME
+
+        # Batch Logging Settings
+        self.log_batch_metrics = COMET_LOG_BATCH_METRICS
+        self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
+
+        # Dataset Artifact Settings
+        self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
+        self.resume = self.opt.resume
+
+        # Default parameters to pass to Experiment objects
+        self.default_experiment_kwargs = {
+            "log_code": False,
+            "log_env_gpu": True,
+            "log_env_cpu": True,
+            "project_name": COMET_PROJECT_NAME,}
+        self.default_experiment_kwargs.update(experiment_kwargs)
+        self.experiment = self._get_experiment(self.comet_mode, run_id)
+
+        self.data_dict = self.check_dataset(self.opt.data)
+        self.class_names = self.data_dict["names"]
+        self.num_classes = self.data_dict["nc"]
+
+        self.logged_images_count = 0
+        self.max_images = COMET_MAX_IMAGE_UPLOADS
+
+        if run_id is None:
+            self.experiment.log_other("Created from", "YOLOv5")
+            if not isinstance(self.experiment, comet_ml.OfflineExperiment):
+                workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
+                self.experiment.log_other(
+                    "Run Path",
+                    f"{workspace}/{project_name}/{experiment_id}",
+                )
+            self.log_parameters(vars(opt))
+            self.log_parameters(self.opt.hyp)
+            self.log_asset_data(
+                self.opt.hyp,
+                name="hyperparameters.json",
+                metadata={"type": "hyp-config-file"},
+            )
+            self.log_asset(
+                f"{self.opt.save_dir}/opt.yaml",
+                metadata={"type": "opt-config-file"},
+            )
+
+        self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
+
+        if hasattr(self.opt, "conf_thres"):
+            self.conf_thres = self.opt.conf_thres
+        else:
+            self.conf_thres = CONF_THRES
+        if hasattr(self.opt, "iou_thres"):
+            self.iou_thres = self.opt.iou_thres
+        else:
+            self.iou_thres = IOU_THRES
+
+        self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
+
+        self.comet_log_predictions = COMET_LOG_PREDICTIONS
+        if self.opt.bbox_interval == -1:
+            self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
+        else:
+            self.comet_log_prediction_interval = self.opt.bbox_interval
+
+        if self.comet_log_predictions:
+            self.metadata_dict = {}
+            self.logged_image_names = []
+
+        self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
+
+        self.experiment.log_others({
+            "comet_mode": COMET_MODE,
+            "comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
+            "comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
+            "comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
+            "comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
+            "comet_model_name": COMET_MODEL_NAME,})
+
+        # Check if running the Experiment with the Comet Optimizer
+        if hasattr(self.opt, "comet_optimizer_id"):
+            self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
+            self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
+            self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
+            self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
+
+    def _get_experiment(self, mode, experiment_id=None):
+        if mode == "offline":
+            if experiment_id is not None:
+                return comet_ml.ExistingOfflineExperiment(
+                    previous_experiment=experiment_id,
+                    **self.default_experiment_kwargs,
+                )
+
+            return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
+
+        else:
+            try:
+                if experiment_id is not None:
+                    return comet_ml.ExistingExperiment(
+                        previous_experiment=experiment_id,
+                        **self.default_experiment_kwargs,
+                    )
+
+                return comet_ml.Experiment(**self.default_experiment_kwargs)
+
+            except ValueError:
+                logger.warning("COMET WARNING: "
+                               "Comet credentials have not been set. "
+                               "Comet will default to offline logging. "
+                               "Please set your credentials to enable online logging.")
+                return self._get_experiment("offline", experiment_id)
+
+        return
+
+    def log_metrics(self, log_dict, **kwargs):
+        self.experiment.log_metrics(log_dict, **kwargs)
+
+    def log_parameters(self, log_dict, **kwargs):
+        self.experiment.log_parameters(log_dict, **kwargs)
+
+    def log_asset(self, asset_path, **kwargs):
+        self.experiment.log_asset(asset_path, **kwargs)
+
+    def log_asset_data(self, asset, **kwargs):
+        self.experiment.log_asset_data(asset, **kwargs)
+
+    def log_image(self, img, **kwargs):
+        self.experiment.log_image(img, **kwargs)
+
+    def log_model(self, path, opt, epoch, fitness_score, best_model=False):
+        if not self.save_model:
+            return
+
+        model_metadata = {
+            "fitness_score": fitness_score[-1],
+            "epochs_trained": epoch + 1,
+            "save_period": opt.save_period,
+            "total_epochs": opt.epochs,}
+
+        model_files = glob.glob(f"{path}/*.pt")
+        for model_path in model_files:
+            name = Path(model_path).name
+
+            self.experiment.log_model(
+                self.model_name,
+                file_or_folder=model_path,
+                file_name=name,
+                metadata=model_metadata,
+                overwrite=True,
+            )
+
+    def check_dataset(self, data_file):
+        with open(data_file) as f:
+            data_config = yaml.safe_load(f)
+
+        if data_config['path'].startswith(COMET_PREFIX):
+            path = data_config['path'].replace(COMET_PREFIX, "")
+            data_dict = self.download_dataset_artifact(path)
+
+            return data_dict
+
+        self.log_asset(self.opt.data, metadata={"type": "data-config-file"})
+
+        return check_dataset(data_file)
+
+    def log_predictions(self, image, labelsn, path, shape, predn):
+        if self.logged_images_count >= self.max_images:
+            return
+        detections = predn[predn[:, 4] > self.conf_thres]
+        iou = box_iou(labelsn[:, 1:], detections[:, :4])
+        mask, _ = torch.where(iou > self.iou_thres)
+        if len(mask) == 0:
+            return
+
+        filtered_detections = detections[mask]
+        filtered_labels = labelsn[mask]
+
+        image_id = path.split("/")[-1].split(".")[0]
+        image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
+        if image_name not in self.logged_image_names:
+            native_scale_image = PIL.Image.open(path)
+            self.log_image(native_scale_image, name=image_name)
+            self.logged_image_names.append(image_name)
+
+        metadata = []
+        for cls, *xyxy in filtered_labels.tolist():
+            metadata.append({
+                "label": f"{self.class_names[int(cls)]}-gt",
+                "score": 100,
+                "box": {
+                    "x": xyxy[0],
+                    "y": xyxy[1],
+                    "x2": xyxy[2],
+                    "y2": xyxy[3]},})
+        for *xyxy, conf, cls in filtered_detections.tolist():
+            metadata.append({
+                "label": f"{self.class_names[int(cls)]}",
+                "score": conf * 100,
+                "box": {
+                    "x": xyxy[0],
+                    "y": xyxy[1],
+                    "x2": xyxy[2],
+                    "y2": xyxy[3]},})
+
+        self.metadata_dict[image_name] = metadata
+        self.logged_images_count += 1
+
+        return
+
+    def preprocess_prediction(self, image, labels, shape, pred):
+        nl, _ = labels.shape[0], pred.shape[0]
+
+        # Predictions
+        if self.opt.single_cls:
+            pred[:, 5] = 0
+
+        predn = pred.clone()
+        scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
+
+        labelsn = None
+        if nl:
+            tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+            scale_boxes(image.shape[1:], tbox, shape[0], shape[1])  # native-space labels
+            labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+            scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])  # native-space pred
+
+        return predn, labelsn
+
+    def add_assets_to_artifact(self, artifact, path, asset_path, split):
+        img_paths = sorted(glob.glob(f"{asset_path}/*"))
+        label_paths = img2label_paths(img_paths)
+
+        for image_file, label_file in zip(img_paths, label_paths):
+            image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
+
+            try:
+                artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
+                artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
+            except ValueError as e:
+                logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
+                logger.error(f"COMET ERROR: {e}")
+                continue
+
+        return artifact
+
+    def upload_dataset_artifact(self):
+        dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
+        path = str((ROOT / Path(self.data_dict["path"])).resolve())
+
+        metadata = self.data_dict.copy()
+        for key in ["train", "val", "test"]:
+            split_path = metadata.get(key)
+            if split_path is not None:
+                metadata[key] = split_path.replace(path, "")
+
+        artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
+        for key in metadata.keys():
+            if key in ["train", "val", "test"]:
+                if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
+                    continue
+
+                asset_path = self.data_dict.get(key)
+                if asset_path is not None:
+                    artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
+
+        self.experiment.log_artifact(artifact)
+
+        return
+
+    def download_dataset_artifact(self, artifact_path):
+        logged_artifact = self.experiment.get_artifact(artifact_path)
+        artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
+        logged_artifact.download(artifact_save_dir)
+
+        metadata = logged_artifact.metadata
+        data_dict = metadata.copy()
+        data_dict["path"] = artifact_save_dir
+        data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
+
+        data_dict = self.update_data_paths(data_dict)
+        return data_dict
+
+    def update_data_paths(self, data_dict):
+        path = data_dict.get("path", "")
+
+        for split in ["train", "val", "test"]:
+            if data_dict.get(split):
+                split_path = data_dict.get(split)
+                data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
+                    f"{path}/{x}" for x in split_path])
+
+        return data_dict
+
+    def on_pretrain_routine_end(self, paths):
+        if self.opt.resume:
+            return
+
+        for path in paths:
+            self.log_asset(str(path))
+
+        if self.upload_dataset:
+            if not self.resume:
+                self.upload_dataset_artifact()
+
+        return
+
+    def on_train_start(self):
+        self.log_parameters(self.hyp)
+
+    def on_train_epoch_start(self):
+        return
+
+    def on_train_epoch_end(self, epoch):
+        self.experiment.curr_epoch = epoch
+
+        return
+
+    def on_train_batch_start(self):
+        return
+
+    def on_train_batch_end(self, log_dict, step):
+        self.experiment.curr_step = step
+        if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
+            self.log_metrics(log_dict, step=step)
+
+        return
+
+    def on_train_end(self, files, save_dir, last, best, epoch, results):
+        if self.comet_log_predictions:
+            curr_epoch = self.experiment.curr_epoch
+            self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
+
+        for f in files:
+            self.log_asset(f, metadata={"epoch": epoch})
+        self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
+
+        if not self.opt.evolve:
+            model_path = str(best if best.exists() else last)
+            name = Path(model_path).name
+            if self.save_model:
+                self.experiment.log_model(
+                    self.model_name,
+                    file_or_folder=model_path,
+                    file_name=name,
+                    overwrite=True,
+                )
+
+        # Check if running Experiment with Comet Optimizer
+        if hasattr(self.opt, 'comet_optimizer_id'):
+            metric = results.get(self.opt.comet_optimizer_metric)
+            self.experiment.log_other('optimizer_metric_value', metric)
+
+        self.finish_run()
+
+    def on_val_start(self):
+        return
+
+    def on_val_batch_start(self):
+        return
+
+    def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
+        if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
+            return
+
+        for si, pred in enumerate(outputs):
+            if len(pred) == 0:
+                continue
+
+            image = images[si]
+            labels = targets[targets[:, 0] == si, 1:]
+            shape = shapes[si]
+            path = paths[si]
+            predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
+            if labelsn is not None:
+                self.log_predictions(image, labelsn, path, shape, predn)
+
+        return
+
+    def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
+        if self.comet_log_per_class_metrics:
+            if self.num_classes > 1:
+                for i, c in enumerate(ap_class):
+                    class_name = self.class_names[c]
+                    self.experiment.log_metrics(
+                        {
+                            'mAP@.5': ap50[i],
+                            'mAP@.5:.95': ap[i],
+                            'precision': p[i],
+                            'recall': r[i],
+                            'f1': f1[i],
+                            'true_positives': tp[i],
+                            'false_positives': fp[i],
+                            'support': nt[c]},
+                        prefix=class_name)
+
+        if self.comet_log_confusion_matrix:
+            epoch = self.experiment.curr_epoch
+            class_names = list(self.class_names.values())
+            class_names.append("background")
+            num_classes = len(class_names)
+
+            self.experiment.log_confusion_matrix(
+                matrix=confusion_matrix.matrix,
+                max_categories=num_classes,
+                labels=class_names,
+                epoch=epoch,
+                column_label='Actual Category',
+                row_label='Predicted Category',
+                file_name=f"confusion-matrix-epoch-{epoch}.json",
+            )
+
+    def on_fit_epoch_end(self, result, epoch):
+        self.log_metrics(result, epoch=epoch)
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
+            self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
+
+    def on_params_update(self, params):
+        self.log_parameters(params)
+
+    def finish_run(self):
+        self.experiment.end()
diff --git a/utils/loggers/comet/comet_utils.py b/utils/loggers/comet/comet_utils.py
new file mode 100644
index 0000000..3cbd451
--- /dev/null
+++ b/utils/loggers/comet/comet_utils.py
@@ -0,0 +1,150 @@
+import logging
+import os
+from urllib.parse import urlparse
+
+try:
+    import comet_ml
+except (ModuleNotFoundError, ImportError):
+    comet_ml = None
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+COMET_PREFIX = "comet://"
+COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
+COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
+
+
+def download_model_checkpoint(opt, experiment):
+    model_dir = f"{opt.project}/{experiment.name}"
+    os.makedirs(model_dir, exist_ok=True)
+
+    model_name = COMET_MODEL_NAME
+    model_asset_list = experiment.get_model_asset_list(model_name)
+
+    if len(model_asset_list) == 0:
+        logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
+        return
+
+    model_asset_list = sorted(
+        model_asset_list,
+        key=lambda x: x["step"],
+        reverse=True,
+    )
+    logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
+
+    resource_url = urlparse(opt.weights)
+    checkpoint_filename = resource_url.query
+
+    if checkpoint_filename:
+        asset_id = logged_checkpoint_map.get(checkpoint_filename)
+    else:
+        asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
+        checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
+
+    if asset_id is None:
+        logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
+        return
+
+    try:
+        logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
+        asset_filename = checkpoint_filename
+
+        model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
+        model_download_path = f"{model_dir}/{asset_filename}"
+        with open(model_download_path, "wb") as f:
+            f.write(model_binary)
+
+        opt.weights = model_download_path
+
+    except Exception as e:
+        logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
+        logger.exception(e)
+
+
+def set_opt_parameters(opt, experiment):
+    """Update the opts Namespace with parameters
+    from Comet's ExistingExperiment when resuming a run
+
+    Args:
+        opt (argparse.Namespace): Namespace of command line options
+        experiment (comet_ml.APIExperiment): Comet API Experiment object
+    """
+    asset_list = experiment.get_asset_list()
+    resume_string = opt.resume
+
+    for asset in asset_list:
+        if asset["fileName"] == "opt.yaml":
+            asset_id = asset["assetId"]
+            asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
+            opt_dict = yaml.safe_load(asset_binary)
+            for key, value in opt_dict.items():
+                setattr(opt, key, value)
+            opt.resume = resume_string
+
+    # Save hyperparameters to YAML file
+    # Necessary to pass checks in training script
+    save_dir = f"{opt.project}/{experiment.name}"
+    os.makedirs(save_dir, exist_ok=True)
+
+    hyp_yaml_path = f"{save_dir}/hyp.yaml"
+    with open(hyp_yaml_path, "w") as f:
+        yaml.dump(opt.hyp, f)
+    opt.hyp = hyp_yaml_path
+
+
+def check_comet_weights(opt):
+    """Downloads model weights from Comet and updates the
+    weights path to point to saved weights location
+
+    Args:
+        opt (argparse.Namespace): Command Line arguments passed
+            to YOLOv5 training script
+
+    Returns:
+        None/bool: Return True if weights are successfully downloaded
+            else return None
+    """
+    if comet_ml is None:
+        return
+
+    if isinstance(opt.weights, str):
+        if opt.weights.startswith(COMET_PREFIX):
+            api = comet_ml.API()
+            resource = urlparse(opt.weights)
+            experiment_path = f"{resource.netloc}{resource.path}"
+            experiment = api.get(experiment_path)
+            download_model_checkpoint(opt, experiment)
+            return True
+
+    return None
+
+
+def check_comet_resume(opt):
+    """Restores run parameters to its original state based on the model checkpoint
+    and logged Experiment parameters.
+
+    Args:
+        opt (argparse.Namespace): Command Line arguments passed
+            to YOLOv5 training script
+
+    Returns:
+        None/bool: Return True if the run is restored successfully
+            else return None
+    """
+    if comet_ml is None:
+        return
+
+    if isinstance(opt.resume, str):
+        if opt.resume.startswith(COMET_PREFIX):
+            api = comet_ml.API()
+            resource = urlparse(opt.resume)
+            experiment_path = f"{resource.netloc}{resource.path}"
+            experiment = api.get(experiment_path)
+            set_opt_parameters(opt, experiment)
+            download_model_checkpoint(opt, experiment)
+
+            return True
+
+    return None
diff --git a/utils/loggers/comet/hpo.py b/utils/loggers/comet/hpo.py
new file mode 100644
index 0000000..7dd5c92
--- /dev/null
+++ b/utils/loggers/comet/hpo.py
@@ -0,0 +1,118 @@
+import argparse
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+
+import comet_ml
+
+logger = logging.getLogger(__name__)
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+from train import train
+from utils.callbacks import Callbacks
+from utils.general import increment_path
+from utils.torch_utils import select_device
+
+# Project Configuration
+config = comet_ml.config.get_config()
+COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
+
+
+def get_args(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noplots', action='store_true', help='save no plot files')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--seed', type=int, default=0, help='Global training seed')
+    parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
+
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+
+    # Comet Arguments
+    parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
+    parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
+    parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
+    parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
+    parser.add_argument("--comet_optimizer_workers",
+                        type=int,
+                        default=1,
+                        help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
+
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+
+
+def run(parameters, opt):
+    hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
+
+    opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
+    opt.batch_size = parameters.get("batch_size")
+    opt.epochs = parameters.get("epochs")
+
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    train(hyp_dict, opt, device, callbacks=Callbacks())
+
+
+if __name__ == "__main__":
+    opt = get_args(known=True)
+
+    opt.weights = str(opt.weights)
+    opt.cfg = str(opt.cfg)
+    opt.data = str(opt.data)
+    opt.project = str(opt.project)
+
+    optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
+    if optimizer_id is None:
+        with open(opt.comet_optimizer_config) as f:
+            optimizer_config = json.load(f)
+        optimizer = comet_ml.Optimizer(optimizer_config)
+    else:
+        optimizer = comet_ml.Optimizer(optimizer_id)
+
+    opt.comet_optimizer_id = optimizer.id
+    status = optimizer.status()
+
+    opt.comet_optimizer_objective = status["spec"]["objective"]
+    opt.comet_optimizer_metric = status["spec"]["metric"]
+
+    logger.info("COMET INFO: Starting Hyperparameter Sweep")
+    for parameter in optimizer.get_parameters():
+        run(parameter["parameters"], opt)
diff --git a/utils/loggers/comet/optimizer_config.json b/utils/loggers/comet/optimizer_config.json
new file mode 100644
index 0000000..83dddda
--- /dev/null
+++ b/utils/loggers/comet/optimizer_config.json
@@ -0,0 +1,209 @@
+{
+  "algorithm": "random",
+  "parameters": {
+    "anchor_t": {
+      "type": "discrete",
+      "values": [
+        2,
+        8
+      ]
+    },
+    "batch_size": {
+      "type": "discrete",
+      "values": [
+        16,
+        32,
+        64
+      ]
+    },
+    "box": {
+      "type": "discrete",
+      "values": [
+        0.02,
+        0.2
+      ]
+    },
+    "cls": {
+      "type": "discrete",
+      "values": [
+        0.2
+      ]
+    },
+    "cls_pw": {
+      "type": "discrete",
+      "values": [
+        0.5
+      ]
+    },
+    "copy_paste": {
+      "type": "discrete",
+      "values": [
+        1
+      ]
+    },
+    "degrees": {
+      "type": "discrete",
+      "values": [
+        0,
+        45
+      ]
+    },
+    "epochs": {
+      "type": "discrete",
+      "values": [
+        5
+      ]
+    },
+    "fl_gamma": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "fliplr": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "flipud": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_h": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_s": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "hsv_v": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "iou_t": {
+      "type": "discrete",
+      "values": [
+        0.7
+      ]
+    },
+    "lr0": {
+      "type": "discrete",
+      "values": [
+        1e-05,
+        0.1
+      ]
+    },
+    "lrf": {
+      "type": "discrete",
+      "values": [
+        0.01,
+        1
+      ]
+    },
+    "mixup": {
+      "type": "discrete",
+      "values": [
+        1
+      ]
+    },
+    "momentum": {
+      "type": "discrete",
+      "values": [
+        0.6
+      ]
+    },
+    "mosaic": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "obj": {
+      "type": "discrete",
+      "values": [
+        0.2
+      ]
+    },
+    "obj_pw": {
+      "type": "discrete",
+      "values": [
+        0.5
+      ]
+    },
+    "optimizer": {
+      "type": "categorical",
+      "values": [
+        "SGD",
+        "Adam",
+        "AdamW"
+      ]
+    },
+    "perspective": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "scale": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "shear": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "translate": {
+      "type": "discrete",
+      "values": [
+        0
+      ]
+    },
+    "warmup_bias_lr": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.2
+      ]
+    },
+    "warmup_epochs": {
+      "type": "discrete",
+      "values": [
+        5
+      ]
+    },
+    "warmup_momentum": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.95
+      ]
+    },
+    "weight_decay": {
+      "type": "discrete",
+      "values": [
+        0,
+        0.001
+      ]
+    }
+  },
+  "spec": {
+    "maxCombo": 0,
+    "metric": "metrics/mAP_0.5",
+    "objective": "maximize"
+  },
+  "trials": 1
+}
diff --git a/utils/metrics.py b/utils/metrics.py
index ee7d339..ed611d7 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -28,7 +28,7 @@ def smooth(y, f=0.05):
     return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed
 
 
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
     """ Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
@@ -83,10 +83,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
     names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
     names = dict(enumerate(names))  # to dict
     if plot:
-        plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+        plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
+        plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
+        plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
+        plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
 
     i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
     p, r, f1 = p[:, i], r[:, i], f1[:, i]
@@ -170,12 +170,12 @@ class ConfusionMatrix:
             if n and sum(j) == 1:
                 self.matrix[detection_classes[m1[j]], gc] += 1  # correct
             else:
-                self.matrix[self.nc, gc] += 1  # background FP
+                self.matrix[self.nc, gc] += 1  # true background
 
         if n:
             for i, dc in enumerate(detection_classes):
                 if not any(m1 == i):
-                    self.matrix[dc, self.nc] += 1  # background FN
+                    self.matrix[dc, self.nc] += 1  # predicted background
 
     def matrix(self):
         return self.matrix
@@ -186,7 +186,7 @@ class ConfusionMatrix:
         # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
         return tp[:-1], fp[:-1]  # remove background class
 
-    @TryExcept('WARNING: ConfusionMatrix plot failure: ')
+    @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure: ')
     def plot(self, normalize=True, save_dir='', names=()):
         import seaborn as sn
 
@@ -197,6 +197,7 @@ class ConfusionMatrix:
         nc, nn = self.nc, len(names)  # number of classes, names
         sn.set(font_scale=1.0 if nc < 50 else 0.8)  # for label size
         labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
+        ticklabels = (names + ['background']) if labels else "auto"
         with warnings.catch_warnings():
             warnings.simplefilter('ignore')  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
             sn.heatmap(array,
@@ -208,8 +209,8 @@ class ConfusionMatrix:
                        fmt='.2f',
                        square=True,
                        vmin=0.0,
-                       xticklabels=names + ['background FP'] if labels else "auto",
-                       yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
+                       xticklabels=ticklabels,
+                       yticklabels=ticklabels).set_facecolor((1, 1, 1))
         ax.set_ylabel('True')
         ax.set_ylabel('Predicted')
         ax.set_title('Confusion Matrix')
diff --git a/utils/plots.py b/utils/plots.py
index 0f322b6..36df271 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -20,9 +20,10 @@ import torch
 from PIL import Image, ImageDraw, ImageFont
 
 from utils import TryExcept, threaded
-from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path,
+from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
                            is_ascii, xywh2xyxy, xyxy2xywh)
 from utils.metrics import fitness
+from utils.segment.general import scale_image
 
 # Settings
 RANK = int(os.getenv('RANK', -1))
@@ -113,6 +114,52 @@ class Annotator:
                             thickness=tf,
                             lineType=cv2.LINE_AA)
 
+    def masks(self, masks, colors, im_gpu=None, alpha=0.5):
+        """Plot masks at once.
+        Args:
+            masks (tensor): predicted masks on cuda, shape: [n, h, w]
+            colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
+            im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
+            alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
+        """
+        if self.pil:
+            # convert to numpy first
+            self.im = np.asarray(self.im).copy()
+        if im_gpu is None:
+            # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
+            if len(masks) == 0:
+                return
+            if isinstance(masks, torch.Tensor):
+                masks = torch.as_tensor(masks, dtype=torch.uint8)
+                masks = masks.permute(1, 2, 0).contiguous()
+                masks = masks.cpu().numpy()
+            # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
+            masks = scale_image(masks.shape[:2], masks, self.im.shape)
+            masks = np.asarray(masks, dtype=np.float32)
+            colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
+            s = masks.sum(2, keepdims=True).clip(0, 1)  # add all masks together
+            masks = (masks @ colors).clip(0, 255)  # (h,w,n) @ (n,3) = (h,w,3)
+            self.im[:] = masks * alpha + self.im * (1 - s * alpha)
+        else:
+            if len(masks) == 0:
+                self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
+            colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
+            colors = colors[:, None, None]  # shape(n,1,1,3)
+            masks = masks.unsqueeze(3)  # shape(n,h,w,1)
+            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)
+
+            inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
+            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)
+
+            im_gpu = im_gpu.flip(dims=[0])  # flip channel
+            im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
+            im_gpu = im_gpu * inv_alph_masks[-1] + mcs
+            im_mask = (im_gpu * 255).byte().cpu().numpy()
+            self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
+        if self.pil:
+            # convert im back to PIL and update draw
+            self.fromarray(self.im)
+
     def rectangle(self, xy, fill=None, outline=None, width=1):
         # Add rectangle to image (PIL-only)
         self.draw.rectangle(xy, fill, outline, width)
@@ -124,6 +171,11 @@ class Annotator:
             xy[1] += 1 - h
         self.draw.text(xy, text, fill=txt_color, font=self.font)
 
+    def fromarray(self, im):
+        # Update self.im from a numpy array
+        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+        self.draw = ImageDraw.Draw(self.im)
+
     def result(self):
         # Return annotated image as array
         return np.asarray(self.im)
@@ -152,7 +204,6 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detec
                 ax[i].axis('off')
 
             LOGGER.info(f'Saving {f}... ({n}/{channels})')
-            plt.title('Features')
             plt.savefig(f, dpi=300, bbox_inches='tight')
             plt.close()
             np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy())  # npy save
@@ -180,26 +231,31 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
     return filtfilt(b, a, data)  # forward-backward filter
 
 
-def output_to_target(output):
-    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+def output_to_target(output, max_det=300):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
     targets = []
     for i, o in enumerate(output):
-        targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy())
-    return np.array(targets)
+        box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
+        j = torch.full((conf.shape[0], 1), i)
+        targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
+    return torch.cat(targets, 0).numpy()
 
 
 @threaded
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
     # Plot image grid with labels
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
-    if np.max(images[0]) <= 1:
-        images *= 255  # de-normalise (optional)
+
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
 
     # Build Image
     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
@@ -364,7 +420,7 @@ def plot_labels(labels, names=(), save_dir=Path('')):
     ax[0].set_ylabel('instances')
     if 0 < len(names) < 30:
         ax[0].set_xticks(range(len(names)))
-        ax[0].set_xticklabels(names, rotation=90, fontsize=10)
+        ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
     else:
         ax[0].set_xlabel('classes')
     sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
@@ -509,7 +565,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
         b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
     b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
     xyxy = xywh2xyxy(b).long()
-    clip_coords(xyxy, im.shape)
+    clip_boxes(xyxy, im.shape)
     crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
     if save:
         file.parent.mkdir(parents=True, exist_ok=True)  # make directory
diff --git a/utils/segment/__init__.py b/utils/segment/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
new file mode 100644
index 0000000..169adde
--- /dev/null
+++ b/utils/segment/augmentations.py
@@ -0,0 +1,104 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+from ..augmentations import box_candidates
+from ..general import resample_segments, segment2box
+
+
+def mixup(im, labels, segments, im2, labels2, segments2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    segments = np.concatenate((segments, segments2), 0)
+    return im, labels, segments
+
+
+def random_perspective(im,
+                       targets=(),
+                       segments=(),
+                       degrees=10,
+                       translate=.1,
+                       scale=.1,
+                       shear=10,
+                       perspective=0.0,
+                       border=(0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width)  # x translation (pixels)
+    T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height)  # y translation (pixels)
+
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+
+    # Transform label coordinates
+    n = len(targets)
+    new_segments = []
+    if n:
+        new = np.zeros((n, 4))
+        segments = resample_segments(segments)  # upsample
+        for i, segment in enumerate(segments):
+            xy = np.ones((len(segment), 3))
+            xy[:, :2] = segment
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
+
+            # clip
+            new[i] = segment2box(xy, width, height)
+            new_segments.append(xy)
+
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+        new_segments = np.array(new_segments)[i]
+
+    return im, targets, new_segments
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
new file mode 100644
index 0000000..a63d6ec
--- /dev/null
+++ b/utils/segment/dataloaders.py
@@ -0,0 +1,330 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders
+"""
+
+import os
+import random
+
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, distributed
+
+from ..augmentations import augment_hsv, copy_paste, letterbox
+from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
+from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
+from ..torch_utils import torch_distributed_zero_first
+from .augmentations import mixup, random_perspective
+
+RANK = int(os.getenv('RANK', -1))
+
+
+def create_dataloader(path,
+                      imgsz,
+                      batch_size,
+                      stride,
+                      single_cls=False,
+                      hyp=None,
+                      augment=False,
+                      cache=False,
+                      pad=0.0,
+                      rect=False,
+                      rank=-1,
+                      workers=8,
+                      image_weights=False,
+                      quad=False,
+                      prefix='',
+                      shuffle=False,
+                      mask_downsample_ratio=1,
+                      overlap_mask=False):
+    if rect and shuffle:
+        LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
+        shuffle = False
+    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+        dataset = LoadImagesAndLabelsAndMasks(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augmentation
+            hyp=hyp,  # hyperparameters
+            rect=rect,  # rectangular batches
+            cache_images=cache,
+            single_cls=single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix,
+            downsample_ratio=mask_downsample_ratio,
+            overlap=overlap_mask)
+
+    batch_size = min(batch_size, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
+    generator = torch.Generator()
+    generator.manual_seed(6148914691236517205 + RANK)
+    return loader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=True,
+        collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
+        worker_init_fn=seed_worker,
+        generator=generator,
+    ), dataset
+
+
+class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
+
+    def __init__(
+        self,
+        path,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        image_weights=False,
+        cache_images=False,
+        single_cls=False,
+        stride=32,
+        pad=0,
+        prefix="",
+        downsample_ratio=1,
+        overlap=False,
+    ):
+        super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
+                         stride, pad, prefix)
+        self.downsample_ratio = downsample_ratio
+        self.overlap = overlap
+
+    def __getitem__(self, index):
+        index = self.indices[index]  # linear, shuffled, or image_weights
+
+        hyp = self.hyp
+        mosaic = self.mosaic and random.random() < hyp['mosaic']
+        masks = []
+        if mosaic:
+            # Load mosaic
+            img, labels, segments = self.load_mosaic(index)
+            shapes = None
+
+            # MixUp augmentation
+            if random.random() < hyp["mixup"]:
+                img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
+
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+
+            # Letterbox
+            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+
+            labels = self.labels[index].copy()
+            # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
+            segments = self.segments[index].copy()
+            if len(segments):
+                for i_s in range(len(segments)):
+                    segments[i_s] = xyn2xy(
+                        segments[i_s],
+                        ratio[0] * w,
+                        ratio[1] * h,
+                        padw=pad[0],
+                        padh=pad[1],
+                    )
+            if labels.size:  # normalized xywh to pixel xyxy format
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
+
+            if self.augment:
+                img, labels, segments = random_perspective(img,
+                                                           labels,
+                                                           segments=segments,
+                                                           degrees=hyp["degrees"],
+                                                           translate=hyp["translate"],
+                                                           scale=hyp["scale"],
+                                                           shear=hyp["shear"],
+                                                           perspective=hyp["perspective"])
+
+        nl = len(labels)  # number of labels
+        if nl:
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
+            if self.overlap:
+                masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
+                                                           segments,
+                                                           downsample_ratio=self.downsample_ratio)
+                masks = masks[None]  # (640, 640) -> (1, 640, 640)
+                labels = labels[sorted_idx]
+            else:
+                masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
+
+        masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
+                                                                        self.downsample_ratio, img.shape[1] //
+                                                                        self.downsample_ratio))
+        # TODO: albumentations support
+        if self.augment:
+            # Albumentations
+            # there are some augmentation that won't change boxes and masks,
+            # so just be it for now.
+            img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
+
+            # HSV color-space
+            augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
+
+            # Flip up-down
+            if random.random() < hyp["flipud"]:
+                img = np.flipud(img)
+                if nl:
+                    labels[:, 2] = 1 - labels[:, 2]
+                    masks = torch.flip(masks, dims=[1])
+
+            # Flip left-right
+            if random.random() < hyp["fliplr"]:
+                img = np.fliplr(img)
+                if nl:
+                    labels[:, 1] = 1 - labels[:, 1]
+                    masks = torch.flip(masks, dims=[2])
+
+            # Cutouts  # labels = cutout(img, labels, p=0.5)
+
+        labels_out = torch.zeros((nl, 6))
+        if nl:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+
+        return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
+
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4, segments4 = [], []
+        s = self.img_size
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
+
+        # 3 additional image indices
+        indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+        for i, index in enumerate(indices):
+            # Load image
+            img, _, (h, w) = self.load_image(index)
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            labels, segments = self.labels[index].copy(), self.segments[index].copy()
+
+            if labels.size:
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+            labels4.append(labels)
+            segments4.extend(segments)
+
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, 1:], *segments4):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+
+        # Augment
+        img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
+        img4, labels4, segments4 = random_perspective(img4,
+                                                      labels4,
+                                                      segments4,
+                                                      degrees=self.hyp["degrees"],
+                                                      translate=self.hyp["translate"],
+                                                      scale=self.hyp["scale"],
+                                                      shear=self.hyp["shear"],
+                                                      perspective=self.hyp["perspective"],
+                                                      border=self.mosaic_border)  # border to remove
+        return img4, labels4, segments4
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, shapes, masks = zip(*batch)  # transposed
+        batched_masks = torch.cat(masks, 0)
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
+
+
+def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (np.ndarray): [N, M], N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    mask = np.zeros(img_size, dtype=np.uint8)
+    polygons = np.asarray(polygons)
+    polygons = polygons.astype(np.int32)
+    shape = polygons.shape
+    polygons = polygons.reshape(shape[0], -1, 2)
+    cv2.fillPoly(mask, polygons, color=color)
+    nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
+    # NOTE: fillPoly firstly then resize is trying the keep the same way
+    # of loss calculation when mask-ratio=1.
+    mask = cv2.resize(mask, (nw, nh))
+    return mask
+
+
+def polygons2masks(img_size, polygons, color, downsample_ratio=1):
+    """
+    Args:
+        img_size (tuple): The image size.
+        polygons (list[np.ndarray]): each polygon is [N, M],
+            N is the number of polygons,
+            M is the number of points(Be divided by 2).
+    """
+    masks = []
+    for si in range(len(polygons)):
+        mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
+        masks.append(mask)
+    return np.array(masks)
+
+
+def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
+    """Return a (640, 640) overlap mask."""
+    masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
+                     dtype=np.int32 if len(segments) > 255 else np.uint8)
+    areas = []
+    ms = []
+    for si in range(len(segments)):
+        mask = polygon2mask(
+            img_size,
+            [segments[si].reshape(-1)],
+            downsample_ratio=downsample_ratio,
+            color=1,
+        )
+        ms.append(mask)
+        areas.append(mask.sum())
+    areas = np.asarray(areas)
+    index = np.argsort(-areas)
+    ms = np.array(ms)[index]
+    for i in range(len(segments)):
+        mask = ms[i] * (i + 1)
+        masks = masks + mask
+        masks = np.clip(masks, a_min=0, a_max=i + 1)
+    return masks, index
diff --git a/utils/segment/general.py b/utils/segment/general.py
new file mode 100644
index 0000000..655123b
--- /dev/null
+++ b/utils/segment/general.py
@@ -0,0 +1,134 @@
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+def crop_mask(masks, boxes):
+    """
+    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+    Vectorized by Chong (thanks Chong).
+
+    Args:
+        - masks should be a size [h, w, n] tensor of masks
+        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+    """
+
+    n, h, w = masks.shape
+    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
+    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
+    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)
+
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+
+def process_mask_upsample(protos, masks_in, bboxes, shape):
+    """
+    Crop after upsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+
+    c, mh, mw = protos.shape  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
+    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    masks = crop_mask(masks, bboxes)  # CHW
+    return masks.gt_(0.5)
+
+
+def process_mask(protos, masks_in, bboxes, shape, upsample=False):
+    """
+    Crop before upsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+
+    c, mh, mw = protos.shape  # CHW
+    ih, iw = shape
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
+
+    downsampled_bboxes = bboxes.clone()
+    downsampled_bboxes[:, 0] *= mw / iw
+    downsampled_bboxes[:, 2] *= mw / iw
+    downsampled_bboxes[:, 3] *= mh / ih
+    downsampled_bboxes[:, 1] *= mh / ih
+
+    masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    if upsample:
+        masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    return masks.gt_(0.5)
+
+
+def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
+    """
+    img1_shape: model input shape, [h, w]
+    img0_shape: origin pic shape, [h, w, 3]
+    masks: [h, w, num]
+    """
+    # Rescale coordinates (xyxy) from im1_shape to im0_shape
+    if ratio_pad is None:  # calculate from im0_shape
+        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
+        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
+    else:
+        pad = ratio_pad[1]
+    top, left = int(pad[1]), int(pad[0])  # y, x
+    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
+
+    if len(masks.shape) < 2:
+        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
+    masks = masks[top:bottom, left:right]
+    # masks = masks.permute(2, 0, 1).contiguous()
+    # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
+    # masks = masks.permute(1, 2, 0).contiguous()
+    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
+
+    if len(masks.shape) == 2:
+        masks = masks[:, :, None]
+    return masks
+
+
+def mask_iou(mask1, mask2, eps=1e-7):
+    """
+    mask1: [N, n] m1 means number of predicted objects
+    mask2: [M, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, [N, M]
+    """
+    intersection = torch.matmul(mask1, mask2.t()).clamp(0)
+    union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)
+
+
+def masks_iou(mask1, mask2, eps=1e-7):
+    """
+    mask1: [N, n] m1 means number of predicted objects
+    mask2: [N, n] m2 means number of gt objects
+    Note: n means image_w x image_h
+
+    return: masks iou, (N, )
+    """
+    intersection = (mask1 * mask2).sum(1).clamp(0)  # (N, )
+    union = (mask1.sum(1) + mask2.sum(1))[None] - intersection  # (area1 + area2) - intersection
+    return intersection / (union + eps)
+
+
+def masks2segments(masks, strategy='largest'):
+    # Convert masks(n,160,160) into segments(n,xy)
+    segments = []
+    for x in masks.int().numpy().astype('uint8'):
+        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
+        if strategy == 'concat':  # concatenate all segments
+            c = np.concatenate([x.reshape(-1, 2) for x in c])
+        elif strategy == 'largest':  # select largest segment
+            c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
+        segments.append(c.astype('float32'))
+    return segments
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
new file mode 100644
index 0000000..b45b2c2
--- /dev/null
+++ b/utils/segment/loss.py
@@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..general import xywh2xyxy
+from ..loss import FocalLoss, smooth_BCE
+from ..metrics import bbox_iou
+from ..torch_utils import de_parallel
+from .general import crop_mask
+
+
+class ComputeLoss:
+    # Compute losses
+    def __init__(self, model, autobalance=False, overlap=False):
+        self.sort_obj_iou = False
+        self.overlap = overlap
+        device = next(model.parameters()).device  # get model device
+        h = model.hyp  # hyperparameters
+        self.device = device
+
+        # Define criteria
+        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
+
+        # Focal loss
+        g = h['fl_gamma']  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
+
+        m = de_parallel(model).model[-1]  # Detect() module
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
+        self.na = m.na  # number of anchors
+        self.nc = m.nc  # number of classes
+        self.nl = m.nl  # number of layers
+        self.nm = m.nm  # number of masks
+        self.anchors = m.anchors
+        self.device = device
+
+    def __call__(self, preds, targets, masks):  # predictions, targets, model
+        p, proto = preds
+        bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
+        lcls = torch.zeros(1, device=self.device)
+        lbox = torch.zeros(1, device=self.device)
+        lobj = torch.zeros(1, device=self.device)
+        lseg = torch.zeros(1, device=self.device)
+        tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets)  # targets
+
+        # Losses
+        for i, pi in enumerate(p):  # layer index, layer predictions
+            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
+            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1)  # subset of predictions
+
+                # Box regression
+                pxy = pxy.sigmoid() * 2 - 0.5
+                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
+                lbox += (1.0 - iou).mean()  # iou loss
+
+                # Objectness
+                iou = iou.detach().clamp(0).type(tobj.dtype)
+                if self.sort_obj_iou:
+                    j = iou.argsort()
+                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
+                if self.gr < 1:
+                    iou = (1.0 - self.gr) + self.gr * iou
+                tobj[b, a, gj, gi] = iou  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
+                    t[range(n), tcls[i]] = self.cp
+                    lcls += self.BCEcls(pcls, t)  # BCE
+
+                # Mask regression
+                if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
+                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
+                marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
+                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
+                for bi in b.unique():
+                    j = b == bi  # matching index
+                    if self.overlap:
+                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
+                    else:
+                        mask_gti = masks[tidxs[i]][j]
+                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+            if self.autobalance:
+                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+
+        if self.autobalance:
+            self.balance = [x / self.balance[self.ssi] for x in self.balance]
+        lbox *= self.hyp["box"]
+        lobj *= self.hyp["obj"]
+        lcls *= self.hyp["cls"]
+        lseg *= self.hyp["box"] / bs
+
+        loss = lbox + lobj + lcls + lseg
+        return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
+
+    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
+        # Mask loss for one image
+        pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
+        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
+        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
+
+    def build_targets(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
+        gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
+        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
+        if self.overlap:
+            batch = p[0].shape[0]
+            ti = []
+            for i in range(batch):
+                num = (targets[:, 0] == i).sum()  # find number of targets of each image
+                ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
+            ti = torch.cat(ti, 1)  # (na, nt)
+        else:
+            ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
+
+        g = 0.5  # bias
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=self.device).float() * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain  # shape(3,n,7)
+            if nt:
+                # Matches
+                r = t[..., 4:6] / anchors[:, None]  # wh ratio
+                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+                t = t[j]  # filter
+
+                # Offsets
+                gxy = t[:, 2:4]  # grid xy
+                gxi = gain[[2, 3]] - gxy  # inverse
+                j, k = ((gxy % 1 < g) & (gxy > 1)).T
+                l, m = ((gxi % 1 < g) & (gxi > 1)).T
+                j = torch.stack((torch.ones_like(j), j, k, l, m))
+                t = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+
+            # Define
+            bc, gxy, gwh, at = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
+            (a, tidx), (b, c) = at.long().T, bc.long().T  # anchors, image, class
+            gij = (gxy - offsets).long()
+            gi, gj = gij.T  # grid indices
+
+            # Append
+            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
+            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
+            anch.append(anchors[a])  # anchors
+            tcls.append(c)  # class
+            tidxs.append(tidx)
+            xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6])  # xywh normalized
+
+        return tcls, tbox, indices, anch, tidxs, xywhn
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
new file mode 100644
index 0000000..b09ce23
--- /dev/null
+++ b/utils/segment/metrics.py
@@ -0,0 +1,210 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
+
+import numpy as np
+
+from ..metrics import ap_per_class
+
+
+def fitness(x):
+    # Model fitness as a weighted combination of metrics
+    w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
+    return (x[:, :8] * w).sum(1)
+
+
+def ap_per_class_box_and_mask(
+        tp_m,
+        tp_b,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=False,
+        save_dir=".",
+        names=(),
+):
+    """
+    Args:
+        tp_b: tp of boxes.
+        tp_m: tp of masks.
+        other arguments see `func: ap_per_class`.
+    """
+    results_boxes = ap_per_class(tp_b,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix="Box")[2:]
+    results_masks = ap_per_class(tp_m,
+                                 conf,
+                                 pred_cls,
+                                 target_cls,
+                                 plot=plot,
+                                 save_dir=save_dir,
+                                 names=names,
+                                 prefix="Mask")[2:]
+
+    results = {
+        "boxes": {
+            "p": results_boxes[0],
+            "r": results_boxes[1],
+            "ap": results_boxes[3],
+            "f1": results_boxes[2],
+            "ap_class": results_boxes[4]},
+        "masks": {
+            "p": results_masks[0],
+            "r": results_masks[1],
+            "ap": results_masks[3],
+            "f1": results_masks[2],
+            "ap_class": results_masks[4]}}
+    return results
+
+
+class Metric:
+
+    def __init__(self) -> None:
+        self.p = []  # (nc, )
+        self.r = []  # (nc, )
+        self.f1 = []  # (nc, )
+        self.all_ap = []  # (nc, 10)
+        self.ap_class_index = []  # (nc, )
+
+    @property
+    def ap50(self):
+        """AP@0.5 of all classes.
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap[:, 0] if len(self.all_ap) else []
+
+    @property
+    def ap(self):
+        """AP@0.5:0.95
+        Return:
+            (nc, ) or [].
+        """
+        return self.all_ap.mean(1) if len(self.all_ap) else []
+
+    @property
+    def mp(self):
+        """mean precision of all classes.
+        Return:
+            float.
+        """
+        return self.p.mean() if len(self.p) else 0.0
+
+    @property
+    def mr(self):
+        """mean recall of all classes.
+        Return:
+            float.
+        """
+        return self.r.mean() if len(self.r) else 0.0
+
+    @property
+    def map50(self):
+        """Mean AP@0.5 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+    @property
+    def map(self):
+        """Mean AP@0.5:0.95 of all classes.
+        Return:
+            float.
+        """
+        return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+    def mean_results(self):
+        """Mean of results, return mp, mr, map50, map"""
+        return (self.mp, self.mr, self.map50, self.map)
+
+    def class_result(self, i):
+        """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+        return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+    def get_maps(self, nc):
+        maps = np.zeros(nc) + self.map
+        for i, c in enumerate(self.ap_class_index):
+            maps[c] = self.ap[i]
+        return maps
+
+    def update(self, results):
+        """
+        Args:
+            results: tuple(p, r, ap, f1, ap_class)
+        """
+        p, r, all_ap, f1, ap_class_index = results
+        self.p = p
+        self.r = r
+        self.all_ap = all_ap
+        self.f1 = f1
+        self.ap_class_index = ap_class_index
+
+
+class Metrics:
+    """Metric for boxes and masks."""
+
+    def __init__(self) -> None:
+        self.metric_box = Metric()
+        self.metric_mask = Metric()
+
+    def update(self, results):
+        """
+        Args:
+            results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+        """
+        self.metric_box.update(list(results["boxes"].values()))
+        self.metric_mask.update(list(results["masks"].values()))
+
+    def mean_results(self):
+        return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+    def class_result(self, i):
+        return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+    def get_maps(self, nc):
+        return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+    @property
+    def ap_class_index(self):
+        # boxes and masks have the same ap_class_index
+        return self.metric_box.ap_class_index
+
+
+KEYS = [
+    "train/box_loss",
+    "train/seg_loss",  # train loss
+    "train/obj_loss",
+    "train/cls_loss",
+    "metrics/precision(B)",
+    "metrics/recall(B)",
+    "metrics/mAP_0.5(B)",
+    "metrics/mAP_0.5:0.95(B)",  # metrics
+    "metrics/precision(M)",
+    "metrics/recall(M)",
+    "metrics/mAP_0.5(M)",
+    "metrics/mAP_0.5:0.95(M)",  # metrics
+    "val/box_loss",
+    "val/seg_loss",  # val loss
+    "val/obj_loss",
+    "val/cls_loss",
+    "x/lr0",
+    "x/lr1",
+    "x/lr2",]
+
+BEST_KEYS = [
+    "best/epoch",
+    "best/precision(B)",
+    "best/recall(B)",
+    "best/mAP_0.5(B)",
+    "best/mAP_0.5:0.95(B)",
+    "best/precision(M)",
+    "best/recall(M)",
+    "best/mAP_0.5(M)",
+    "best/mAP_0.5:0.95(M)",]
diff --git a/utils/segment/plots.py b/utils/segment/plots.py
new file mode 100644
index 0000000..9b90900
--- /dev/null
+++ b/utils/segment/plots.py
@@ -0,0 +1,143 @@
+import contextlib
+import math
+from pathlib import Path
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import torch
+
+from .. import threaded
+from ..general import xywh2xyxy
+from ..plots import Annotator, colors
+
+
+@threaded
+def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
+    # Plot image grid with labels
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    if isinstance(masks, torch.Tensor):
+        masks = masks.cpu().numpy().astype(int)
+
+    max_size = 1920  # max image size
+    max_subplots = 16  # max image subplots, i.e. 4x4
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    if np.max(images[0]) <= 1:
+        images *= 255  # de-normalise (optional)
+
+    # Build Image
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, im in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y:y + h, x:x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
+        if len(targets) > 0:
+            idx = targets[:, 0] == i
+            ti = targets[idx]  # image targets
+
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype('int')
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
+
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
+                color = colors(cls)
+                cls = names[cls] if names else cls
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
+                    annotator.box_label(box, label, color=color)
+
+            # Plot masks
+            if len(masks):
+                if masks.max() > 1.0:  # mean that masks are overlap
+                    image_masks = masks[[i]]  # (1, 640, 640)
+                    nl = len(ti)
+                    index = np.arange(nl).reshape(nl, 1, 1) + 1
+                    image_masks = np.repeat(image_masks, nl, axis=0)
+                    image_masks = np.where(image_masks == index, 1.0, 0.0)
+                else:
+                    image_masks = masks[idx]
+
+                im = np.asarray(annotator.im).copy()
+                for j, box in enumerate(boxes.T.tolist()):
+                    if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                        color = colors(classes[j])
+                        mh, mw = image_masks[j].shape
+                        if mh != h or mw != w:
+                            mask = image_masks[j].astype(np.uint8)
+                            mask = cv2.resize(mask, (w, h))
+                            mask = mask.astype(bool)
+                        else:
+                            mask = image_masks[j].astype(bool)
+                        with contextlib.suppress(Exception):
+                            im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
+                annotator.fromarray(im)
+    annotator.im.save(fname)  # save
+
+
+def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
+    fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
+    ax = ax.ravel()
+    files = list(save_dir.glob("results*.csv"))
+    assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
+    for f in files:
+        try:
+            data = pd.read_csv(f)
+            index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
+                              0.1 * data.values[:, 11])
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
+                if best:
+                    # best
+                    ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
+                    ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
+                else:
+                    # last
+                    ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
+                    ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
+                # if j in [8, 9, 10]:  # share train and val loss y axes
+                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
+        except Exception as e:
+            print(f"Warning: Plotting error for {f}: {e}")
+    ax[1].legend()
+    fig.savefig(save_dir / "results.png", dpi=200)
+    plt.close()
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index abf0bbc..9f257d0 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -47,7 +47,7 @@ def smartCrossEntropyLoss(label_smoothing=0.0):
     if check_version(torch.__version__, '1.10.0'):
         return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
     if label_smoothing > 0:
-        LOGGER.warning(f'WARNING: label smoothing {label_smoothing} requires torch>=1.10.0')
+        LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
     return nn.CrossEntropyLoss()
 
 
@@ -251,6 +251,7 @@ def fuse_conv_and_bn(conv, bn):
                           kernel_size=conv.kernel_size,
                           stride=conv.stride,
                           padding=conv.padding,
+                          dilation=conv.dilation,
                           groups=conv.groups,
                           bias=True).requires_grad_(False).to(conv.weight.device)
 
diff --git a/utils/triton.py b/utils/triton.py
new file mode 100644
index 0000000..a94ef0a
--- /dev/null
+++ b/utils/triton.py
@@ -0,0 +1,85 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+""" Utils to interact with the Triton Inference Server
+"""
+
+import typing
+from urllib.parse import urlparse
+
+import torch
+
+
+class TritonRemoteModel:
+    """ A wrapper over a model served by the Triton Inference Server. It can
+    be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
+    as input and returns them as outputs.
+    """
+
+    def __init__(self, url: str):
+        """
+        Keyword arguments:
+        url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
+        """
+
+        parsed_url = urlparse(url)
+        if parsed_url.scheme == "grpc":
+            from tritonclient.grpc import InferenceServerClient, InferInput
+
+            self.client = InferenceServerClient(parsed_url.netloc)  # Triton GRPC client
+            model_repository = self.client.get_model_repository_index()
+            self.model_name = model_repository.models[0].name
+            self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
+
+            def create_input_placeholders() -> typing.List[InferInput]:
+                return [
+                    InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
+
+        else:
+            from tritonclient.http import InferenceServerClient, InferInput
+
+            self.client = InferenceServerClient(parsed_url.netloc)  # Triton HTTP client
+            model_repository = self.client.get_model_repository_index()
+            self.model_name = model_repository[0]['name']
+            self.metadata = self.client.get_model_metadata(self.model_name)
+
+            def create_input_placeholders() -> typing.List[InferInput]:
+                return [
+                    InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
+
+        self._create_input_placeholders_fn = create_input_placeholders
+
+    @property
+    def runtime(self):
+        """Returns the model runtime"""
+        return self.metadata.get("backend", self.metadata.get("platform"))
+
+    def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
+        """ Invokes the model. Parameters can be provided via args or kwargs.
+        args, if provided, are assumed to match the order of inputs of the model.
+        kwargs are matched with the model input names.
+        """
+        inputs = self._create_inputs(*args, **kwargs)
+        response = self.client.infer(model_name=self.model_name, inputs=inputs)
+        result = []
+        for output in self.metadata['outputs']:
+            tensor = torch.as_tensor(response.as_numpy(output['name']))
+            result.append(tensor)
+        return result[0] if len(result) == 1 else result
+
+    def _create_inputs(self, *args, **kwargs):
+        args_len, kwargs_len = len(args), len(kwargs)
+        if not args_len and not kwargs_len:
+            raise RuntimeError("No inputs provided.")
+        if args_len and kwargs_len:
+            raise RuntimeError("Cannot specify args and kwargs at the same time")
+
+        placeholders = self._create_input_placeholders_fn()
+        if args_len:
+            if args_len != len(placeholders):
+                raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
+            for input, value in zip(placeholders, args):
+                input.set_data_from_numpy(value.cpu().numpy())
+        else:
+            for input in placeholders:
+                value = kwargs[input.name]
+                input.set_data_from_numpy(value.cpu().numpy())
+        return placeholders