mirror of
https://github.com/RootKit-Org/AI-Aimbot.git
synced 2025-06-21 02:41:01 +08:00
Fixing bugs on yolo update
This commit is contained in:
parent
115a454637
commit
65cb7a2111
3
.gitignore
vendored
3
.gitignore
vendored
@ -8,3 +8,6 @@ yolov5s.engine
|
|||||||
yolov5
|
yolov5
|
||||||
tensorrt-8.4.1.5-cp39-none-win_amd64.whl
|
tensorrt-8.4.1.5-cp39-none-win_amd64.whl
|
||||||
tensorrt-8.4.1.5-cp310-none-win_amd64.whl
|
tensorrt-8.4.1.5-cp310-none-win_amd64.whl
|
||||||
|
__pycache__
|
||||||
|
utils/__pycache__
|
||||||
|
models/__pycache__
|
36
main.py
36
main.py
@ -5,12 +5,13 @@ import gc
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
import win32api, win32con
|
import win32api
|
||||||
|
import win32con
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
from utils.general import (cv2, non_max_suppression, xyxy2xywh)
|
||||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
|
||||||
import dxcam
|
import dxcam
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Window title of the game, don't need the entire name
|
# Window title of the game, don't need the entire name
|
||||||
videoGameWindowTitle = "Counter"
|
videoGameWindowTitle = "Counter"
|
||||||
@ -63,8 +64,10 @@ def main():
|
|||||||
# sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080}
|
# sctArea = {"mon": 1, "top": 0, "left": 0, "width": 1920, "height": 1080}
|
||||||
|
|
||||||
# Starting screenshoting engine
|
# Starting screenshoting engine
|
||||||
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
left = aaRightShift + \
|
||||||
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
|
((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
||||||
|
top = videoGameWindow.top + \
|
||||||
|
(videoGameWindow.height - screenShotHeight) // 2
|
||||||
right, bottom = left + screenShotWidth, top + screenShotHeight
|
right, bottom = left + screenShotWidth, top + screenShotHeight
|
||||||
|
|
||||||
region = (left, top, right, bottom)
|
region = (left, top, right, bottom)
|
||||||
@ -81,7 +84,8 @@ def main():
|
|||||||
sTime = time.time()
|
sTime = time.time()
|
||||||
|
|
||||||
# Loading Yolo5 Small AI Model
|
# Loading Yolo5 Small AI Model
|
||||||
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
|
model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
|
||||||
|
pretrained=True, force_reload=True)
|
||||||
stride, names, pt = model.stride, model.names, model.pt
|
stride, names, pt = model.stride, model.names, model.pt
|
||||||
|
|
||||||
model.half()
|
model.half()
|
||||||
@ -109,8 +113,8 @@ def main():
|
|||||||
results = model(im, size=screenShotHeight)
|
results = model(im, size=screenShotHeight)
|
||||||
|
|
||||||
# Suppressing results that dont meet thresholds
|
# Suppressing results that dont meet thresholds
|
||||||
pred = non_max_suppression(results, 0.25, 0.25, 0, False, max_det=1000)
|
pred = non_max_suppression(
|
||||||
|
results, 0.25, 0.25, 0, False, max_det=1000)
|
||||||
|
|
||||||
# Converting output to usable cords
|
# Converting output to usable cords
|
||||||
targets = []
|
targets = []
|
||||||
@ -123,9 +127,11 @@ def main():
|
|||||||
s += f"{n} {names[int(c)]}, " # add to string
|
s += f"{n} {names[int(c)]}, " # add to string
|
||||||
|
|
||||||
for *xyxy, conf, cls in reversed(det):
|
for *xyxy, conf, cls in reversed(det):
|
||||||
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
targets.append((xyxy2xywh(torch.tensor(xyxy).view(
|
||||||
|
1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
||||||
|
|
||||||
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
|
targets = pd.DataFrame(
|
||||||
|
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
|
||||||
|
|
||||||
# If there are people in the center bounding box
|
# If there are people in the center bounding box
|
||||||
if len(targets) > 0:
|
if len(targets) > 0:
|
||||||
@ -134,7 +140,8 @@ def main():
|
|||||||
targets['last_mid_x'] = last_mid_coord[0]
|
targets['last_mid_x'] = last_mid_coord[0]
|
||||||
targets['last_mid_y'] = last_mid_coord[1]
|
targets['last_mid_y'] = last_mid_coord[1]
|
||||||
# Take distance between current person mid coordinate and last person mid coordinate
|
# Take distance between current person mid coordinate and last person mid coordinate
|
||||||
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
|
targets['dist'] = np.linalg.norm(
|
||||||
|
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
|
||||||
targets.sort_values(by="dist", ascending=False)
|
targets.sort_values(by="dist", ascending=False)
|
||||||
|
|
||||||
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
||||||
@ -151,7 +158,8 @@ def main():
|
|||||||
|
|
||||||
# Moving the mouse
|
# Moving the mouse
|
||||||
if win32api.GetKeyState(0x14):
|
if win32api.GetKeyState(0x14):
|
||||||
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
|
||||||
|
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
||||||
last_mid_coord = [xMid, yMid]
|
last_mid_coord = [xMid, yMid]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -165,7 +173,8 @@ def main():
|
|||||||
halfH = round(targets["height"][i] / 2)
|
halfH = round(targets["height"][i] / 2)
|
||||||
midX = targets['current_mid_x'][i]
|
midX = targets['current_mid_x'][i]
|
||||||
midY = targets['current_mid_y'][i]
|
midY = targets['current_mid_y'][i]
|
||||||
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
(startX, startY, endX, endY) = int(
|
||||||
|
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
||||||
|
|
||||||
confidence = .5
|
confidence = .5
|
||||||
|
|
||||||
@ -197,5 +206,6 @@ def main():
|
|||||||
exit()
|
exit()
|
||||||
camera.stop()
|
camera.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -5,13 +5,14 @@ import gc
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
import win32api, win32con
|
import win32api
|
||||||
|
import win32con
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
from utils.general import (cv2, non_max_suppression, xyxy2xywh)
|
||||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
|
||||||
import dxcam
|
import dxcam
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Window title to go after and the height of the screenshots
|
# Window title to go after and the height of the screenshots
|
||||||
videoGameWindowTitle = "Counter"
|
videoGameWindowTitle = "Counter"
|
||||||
@ -61,8 +62,10 @@ def main():
|
|||||||
"height": screenShotHeight}
|
"height": screenShotHeight}
|
||||||
|
|
||||||
# Starting screenshoting engine
|
# Starting screenshoting engine
|
||||||
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
left = aaRightShift + \
|
||||||
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
|
((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
||||||
|
top = videoGameWindow.top + \
|
||||||
|
(videoGameWindow.height - screenShotHeight) // 2
|
||||||
right, bottom = left + 320, top + 320
|
right, bottom = left + 320, top + 320
|
||||||
|
|
||||||
region = (left, top, right, bottom)
|
region = (left, top, right, bottom)
|
||||||
@ -82,7 +85,8 @@ def main():
|
|||||||
|
|
||||||
so = ort.SessionOptions()
|
so = ort.SessionOptions()
|
||||||
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||||
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
|
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
|
||||||
|
'CUDAExecutionProvider'])
|
||||||
|
|
||||||
# Used for colors drawn on bounding boxes
|
# Used for colors drawn on bounding boxes
|
||||||
COLORS = np.random.uniform(0, 255, size=(1500, 3))
|
COLORS = np.random.uniform(0, 255, size=(1500, 3))
|
||||||
@ -99,7 +103,8 @@ def main():
|
|||||||
|
|
||||||
im = torch.from_numpy(outputs[0]).to('cpu')
|
im = torch.from_numpy(outputs[0]).to('cpu')
|
||||||
|
|
||||||
pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
|
pred = non_max_suppression(
|
||||||
|
im, confidence, confidence, 0, False, max_det=10)
|
||||||
|
|
||||||
targets = []
|
targets = []
|
||||||
for i, det in enumerate(pred):
|
for i, det in enumerate(pred):
|
||||||
@ -111,9 +116,11 @@ def main():
|
|||||||
s += f"{n} {int(c)}, " # add to string
|
s += f"{n} {int(c)}, " # add to string
|
||||||
|
|
||||||
for *xyxy, conf, cls in reversed(det):
|
for *xyxy, conf, cls in reversed(det):
|
||||||
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
targets.append((xyxy2xywh(torch.tensor(xyxy).view(
|
||||||
|
1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
||||||
|
|
||||||
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
|
targets = pd.DataFrame(
|
||||||
|
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
|
||||||
|
|
||||||
# If there are people in the center bounding box
|
# If there are people in the center bounding box
|
||||||
if len(targets) > 0:
|
if len(targets) > 0:
|
||||||
@ -122,7 +129,8 @@ def main():
|
|||||||
targets['last_mid_x'] = last_mid_coord[0]
|
targets['last_mid_x'] = last_mid_coord[0]
|
||||||
targets['last_mid_y'] = last_mid_coord[1]
|
targets['last_mid_y'] = last_mid_coord[1]
|
||||||
# Take distance between current person mid coordinate and last person mid coordinate
|
# Take distance between current person mid coordinate and last person mid coordinate
|
||||||
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
|
targets['dist'] = np.linalg.norm(
|
||||||
|
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
|
||||||
targets.sort_values(by="dist", ascending=False)
|
targets.sort_values(by="dist", ascending=False)
|
||||||
|
|
||||||
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
||||||
@ -139,7 +147,8 @@ def main():
|
|||||||
|
|
||||||
# Moving the mouse
|
# Moving the mouse
|
||||||
if win32api.GetKeyState(0x14):
|
if win32api.GetKeyState(0x14):
|
||||||
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
|
||||||
|
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
||||||
last_mid_coord = [xMid, yMid]
|
last_mid_coord = [xMid, yMid]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -153,7 +162,8 @@ def main():
|
|||||||
halfH = round(targets["height"][i] / 2)
|
halfH = round(targets["height"][i] / 2)
|
||||||
midX = targets['current_mid_x'][i]
|
midX = targets['current_mid_x'][i]
|
||||||
midY = targets['current_mid_y'][i]
|
midY = targets['current_mid_y'][i]
|
||||||
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
(startX, startY, endX, endY) = int(midX + halfW), int(midY +
|
||||||
|
halfH), int(midX - halfW), int(midY - halfH)
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
# draw the bounding box and label on the frame
|
# draw the bounding box and label on the frame
|
||||||
@ -183,5 +193,6 @@ def main():
|
|||||||
|
|
||||||
camera.stop()
|
camera.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@ -6,13 +6,14 @@ import gc
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
import win32api, win32con
|
import win32api
|
||||||
|
import win32con
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
from utils.general import (cv2, non_max_suppression, xyxy2xywh)
|
||||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
|
||||||
import dxcam
|
import dxcam
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Window title to go after and the height of the screenshots
|
# Window title to go after and the height of the screenshots
|
||||||
videoGameWindowTitle = "Counter"
|
videoGameWindowTitle = "Counter"
|
||||||
@ -62,8 +63,10 @@ def main():
|
|||||||
"height": screenShotHeight}
|
"height": screenShotHeight}
|
||||||
|
|
||||||
# Starting screenshoting engine
|
# Starting screenshoting engine
|
||||||
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
left = aaRightShift + \
|
||||||
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
|
((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
||||||
|
top = videoGameWindow.top + \
|
||||||
|
(videoGameWindow.height - screenShotHeight) // 2
|
||||||
right, bottom = left + 320, top + 320
|
right, bottom = left + 320, top + 320
|
||||||
|
|
||||||
region = (left, top, right, bottom)
|
region = (left, top, right, bottom)
|
||||||
@ -81,7 +84,8 @@ def main():
|
|||||||
|
|
||||||
so = ort.SessionOptions()
|
so = ort.SessionOptions()
|
||||||
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||||
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=['CUDAExecutionProvider'])
|
ort_sess = ort.InferenceSession('yolov5s320.onnx', sess_options=so, providers=[
|
||||||
|
'CUDAExecutionProvider'])
|
||||||
|
|
||||||
# Used for colors drawn on bounding boxes
|
# Used for colors drawn on bounding boxes
|
||||||
COLORS = np.random.uniform(0, 255, size=(1500, 3))
|
COLORS = np.random.uniform(0, 255, size=(1500, 3))
|
||||||
@ -98,7 +102,8 @@ def main():
|
|||||||
|
|
||||||
im = torch.from_numpy(outputs[0]).to('cpu')
|
im = torch.from_numpy(outputs[0]).to('cpu')
|
||||||
|
|
||||||
pred = non_max_suppression(im, confidence, confidence, 0, False, max_det=10)
|
pred = non_max_suppression(
|
||||||
|
im, confidence, confidence, 0, False, max_det=10)
|
||||||
|
|
||||||
targets = []
|
targets = []
|
||||||
for i, det in enumerate(pred):
|
for i, det in enumerate(pred):
|
||||||
@ -110,9 +115,11 @@ def main():
|
|||||||
s += f"{n} {int(c)}, " # add to string
|
s += f"{n} {int(c)}, " # add to string
|
||||||
|
|
||||||
for *xyxy, conf, cls in reversed(det):
|
for *xyxy, conf, cls in reversed(det):
|
||||||
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
targets.append((xyxy2xywh(torch.tensor(xyxy).view(
|
||||||
|
1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
||||||
|
|
||||||
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
|
targets = pd.DataFrame(
|
||||||
|
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
|
||||||
|
|
||||||
# If there are people in the center bounding box
|
# If there are people in the center bounding box
|
||||||
if len(targets) > 0:
|
if len(targets) > 0:
|
||||||
@ -121,7 +128,8 @@ def main():
|
|||||||
targets['last_mid_x'] = last_mid_coord[0]
|
targets['last_mid_x'] = last_mid_coord[0]
|
||||||
targets['last_mid_y'] = last_mid_coord[1]
|
targets['last_mid_y'] = last_mid_coord[1]
|
||||||
# Take distance between current person mid coordinate and last person mid coordinate
|
# Take distance between current person mid coordinate and last person mid coordinate
|
||||||
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
|
targets['dist'] = np.linalg.norm(
|
||||||
|
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
|
||||||
targets.sort_values(by="dist", ascending=False)
|
targets.sort_values(by="dist", ascending=False)
|
||||||
|
|
||||||
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
||||||
@ -138,7 +146,8 @@ def main():
|
|||||||
|
|
||||||
# Moving the mouse
|
# Moving the mouse
|
||||||
if win32api.GetKeyState(0x14):
|
if win32api.GetKeyState(0x14):
|
||||||
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
|
||||||
|
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
||||||
last_mid_coord = [xMid, yMid]
|
last_mid_coord = [xMid, yMid]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -152,7 +161,8 @@ def main():
|
|||||||
halfH = round(targets["height"][i] / 2)
|
halfH = round(targets["height"][i] / 2)
|
||||||
midX = targets['current_mid_x'][i]
|
midX = targets['current_mid_x'][i]
|
||||||
midY = targets['current_mid_y'][i]
|
midY = targets['current_mid_y'][i]
|
||||||
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
(startX, startY, endX, endY) = int(midX + halfW), int(midY +
|
||||||
|
halfH), int(midX - halfW), int(midY - halfH)
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
# draw the bounding box and label on the frame
|
# draw the bounding box and label on the frame
|
||||||
@ -181,5 +191,6 @@ def main():
|
|||||||
exit()
|
exit()
|
||||||
camera.stop()
|
camera.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@ -5,14 +5,15 @@ import gc
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
import win32api, win32con
|
import win32api
|
||||||
|
import win32con
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
from utils.general import (cv2, non_max_suppression, xyxy2xywh)
|
||||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
|
||||||
from models.common import DetectMultiBackend
|
from models.common import DetectMultiBackend
|
||||||
import dxcam
|
import dxcam
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Window title to go after and the height of the screenshots
|
# Window title to go after and the height of the screenshots
|
||||||
videoGameWindowTitle = "Counter"
|
videoGameWindowTitle = "Counter"
|
||||||
@ -62,8 +63,10 @@ def main():
|
|||||||
"height": screenShotHeight}
|
"height": screenShotHeight}
|
||||||
|
|
||||||
# Starting screenshoting engine
|
# Starting screenshoting engine
|
||||||
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
left = aaRightShift + \
|
||||||
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
|
((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
||||||
|
top = videoGameWindow.top + \
|
||||||
|
(videoGameWindow.height - screenShotHeight) // 2
|
||||||
right, bottom = left + screenShotWidth, top + screenShotHeight
|
right, bottom = left + screenShotWidth, top + screenShotHeight
|
||||||
|
|
||||||
region = (left, top, right, bottom)
|
region = (left, top, right, bottom)
|
||||||
@ -80,7 +83,8 @@ def main():
|
|||||||
sTime = time.time()
|
sTime = time.time()
|
||||||
|
|
||||||
# Loading Yolo5 Small AI Model
|
# Loading Yolo5 Small AI Model
|
||||||
model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device('cuda'), dnn=False, data='', fp16=True)
|
model = DetectMultiBackend('yolov5s320Half.engine', device=torch.device(
|
||||||
|
'cuda'), dnn=False, data='', fp16=True)
|
||||||
stride, names, pt = model.stride, model.names, model.pt
|
stride, names, pt = model.stride, model.names, model.pt
|
||||||
|
|
||||||
# Used for colors drawn on bounding boxes
|
# Used for colors drawn on bounding boxes
|
||||||
@ -97,15 +101,17 @@ def main():
|
|||||||
im = cp.moveaxis(npImg, 3, 1)
|
im = cp.moveaxis(npImg, 3, 1)
|
||||||
im = torch.from_numpy(cp.asnumpy(im)).to('cuda')
|
im = torch.from_numpy(cp.asnumpy(im)).to('cuda')
|
||||||
|
|
||||||
#Converting to numpy for visuals
|
# Converting to numpy for visuals
|
||||||
im0 = im[0].permute(1, 2, 0) * 255
|
im0 = im[0].permute(1, 2, 0) * 255
|
||||||
im0 = im0.cpu().numpy().astype(np.uint8)
|
im0 = im0.cpu().numpy().astype(np.uint8)
|
||||||
im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #Image has to be in BGR for visualization
|
# Image has to be in BGR for visualization
|
||||||
|
im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
# Detecting all the objects
|
# Detecting all the objects
|
||||||
results = model(im)
|
results = model(im)
|
||||||
|
|
||||||
pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
|
pred = non_max_suppression(
|
||||||
|
results, confidence, confidence, 0, False, max_det=10)
|
||||||
|
|
||||||
targets = []
|
targets = []
|
||||||
for i, det in enumerate(pred):
|
for i, det in enumerate(pred):
|
||||||
@ -117,9 +123,11 @@ def main():
|
|||||||
s += f"{n} {names[int(c)]}, " # add to string
|
s += f"{n} {names[int(c)]}, " # add to string
|
||||||
|
|
||||||
for *xyxy, conf, cls in reversed(det):
|
for *xyxy, conf, cls in reversed(det):
|
||||||
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
targets.append((xyxy2xywh(torch.tensor(xyxy).view(
|
||||||
|
1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
||||||
|
|
||||||
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
|
targets = pd.DataFrame(
|
||||||
|
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
|
||||||
|
|
||||||
# If there are people in the center bounding box
|
# If there are people in the center bounding box
|
||||||
if len(targets) > 0:
|
if len(targets) > 0:
|
||||||
@ -128,7 +136,8 @@ def main():
|
|||||||
targets['last_mid_x'] = last_mid_coord[0]
|
targets['last_mid_x'] = last_mid_coord[0]
|
||||||
targets['last_mid_y'] = last_mid_coord[1]
|
targets['last_mid_y'] = last_mid_coord[1]
|
||||||
# Take distance between current person mid coordinate and last person mid coordinate
|
# Take distance between current person mid coordinate and last person mid coordinate
|
||||||
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
|
targets['dist'] = np.linalg.norm(
|
||||||
|
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
|
||||||
targets.sort_values(by="dist", ascending=False)
|
targets.sort_values(by="dist", ascending=False)
|
||||||
|
|
||||||
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
||||||
@ -145,7 +154,8 @@ def main():
|
|||||||
|
|
||||||
# Moving the mouse
|
# Moving the mouse
|
||||||
if win32api.GetKeyState(0x14):
|
if win32api.GetKeyState(0x14):
|
||||||
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
|
||||||
|
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
||||||
last_mid_coord = [xMid, yMid]
|
last_mid_coord = [xMid, yMid]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -159,7 +169,8 @@ def main():
|
|||||||
halfH = round(targets["height"][i] / 2)
|
halfH = round(targets["height"][i] / 2)
|
||||||
midX = targets['current_mid_x'][i]
|
midX = targets['current_mid_x'][i]
|
||||||
midY = targets['current_mid_y'][i]
|
midY = targets['current_mid_y'][i]
|
||||||
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
(startX, startY, endX, endY) = int(
|
||||||
|
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
# draw the bounding box and label on the frame
|
# draw the bounding box and label on the frame
|
||||||
@ -188,5 +199,6 @@ def main():
|
|||||||
exit()
|
exit()
|
||||||
camera.stop()
|
camera.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -5,12 +5,13 @@ import gc
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
import time
|
import time
|
||||||
import win32api, win32con
|
import win32api
|
||||||
|
import win32con
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
|
from utils.general import (cv2, non_max_suppression, xyxy2xywh)
|
||||||
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
|
|
||||||
import dxcam
|
import dxcam
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Window title to go after and the height of the screenshots
|
# Window title to go after and the height of the screenshots
|
||||||
videoGameWindowTitle = "Counter"
|
videoGameWindowTitle = "Counter"
|
||||||
@ -60,8 +61,10 @@ def main():
|
|||||||
"height": screenShotHeight}
|
"height": screenShotHeight}
|
||||||
|
|
||||||
# Starting screenshoting engine
|
# Starting screenshoting engine
|
||||||
left = aaRightShift + ((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
left = aaRightShift + \
|
||||||
top = videoGameWindow.top + (videoGameWindow.height - screenShotHeight) // 2
|
((videoGameWindow.left + videoGameWindow.right) // 2) - (screenShotWidth // 2)
|
||||||
|
top = videoGameWindow.top + \
|
||||||
|
(videoGameWindow.height - screenShotHeight) // 2
|
||||||
right, bottom = left + screenShotWidth, top + screenShotHeight
|
right, bottom = left + screenShotWidth, top + screenShotHeight
|
||||||
|
|
||||||
region = (left, top, right, bottom)
|
region = (left, top, right, bottom)
|
||||||
@ -78,7 +81,8 @@ def main():
|
|||||||
sTime = time.time()
|
sTime = time.time()
|
||||||
|
|
||||||
# Loading Yolo5 Small AI Model
|
# Loading Yolo5 Small AI Model
|
||||||
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
|
model = torch.hub.load('ultralytics/yolov5', 'yolov5s',
|
||||||
|
pretrained=True, force_reload=True)
|
||||||
stride, names, pt = model.stride, model.names, model.pt
|
stride, names, pt = model.stride, model.names, model.pt
|
||||||
|
|
||||||
model.half()
|
model.half()
|
||||||
@ -106,7 +110,8 @@ def main():
|
|||||||
results = model(im, size=screenShotHeight)
|
results = model(im, size=screenShotHeight)
|
||||||
|
|
||||||
# Suppressing results that dont meet thresholds
|
# Suppressing results that dont meet thresholds
|
||||||
pred = non_max_suppression(results, confidence, confidence, 0, False, max_det=10)
|
pred = non_max_suppression(
|
||||||
|
results, confidence, confidence, 0, False, max_det=10)
|
||||||
|
|
||||||
# Converting output to usable cords
|
# Converting output to usable cords
|
||||||
targets = []
|
targets = []
|
||||||
@ -119,9 +124,11 @@ def main():
|
|||||||
s += f"{n} {names[int(c)]}, " # add to string
|
s += f"{n} {names[int(c)]}, " # add to string
|
||||||
|
|
||||||
for *xyxy, conf, cls in reversed(det):
|
for *xyxy, conf, cls in reversed(det):
|
||||||
targets.append((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
targets.append((xyxy2xywh(torch.tensor(xyxy).view(
|
||||||
|
1, 4)) / gn).view(-1).tolist()) # normalized xywh
|
||||||
|
|
||||||
targets = pd.DataFrame(targets, columns = ['current_mid_x', 'current_mid_y', 'width', "height"])
|
targets = pd.DataFrame(
|
||||||
|
targets, columns=['current_mid_x', 'current_mid_y', 'width', "height"])
|
||||||
|
|
||||||
# If there are people in the center bounding box
|
# If there are people in the center bounding box
|
||||||
if len(targets) > 0:
|
if len(targets) > 0:
|
||||||
@ -130,7 +137,8 @@ def main():
|
|||||||
targets['last_mid_x'] = last_mid_coord[0]
|
targets['last_mid_x'] = last_mid_coord[0]
|
||||||
targets['last_mid_y'] = last_mid_coord[1]
|
targets['last_mid_y'] = last_mid_coord[1]
|
||||||
# Take distance between current person mid coordinate and last person mid coordinate
|
# Take distance between current person mid coordinate and last person mid coordinate
|
||||||
targets['dist'] = np.linalg.norm(targets.iloc[:, [0,1]].values - targets.iloc[:, [4,5]], axis=1)
|
targets['dist'] = np.linalg.norm(
|
||||||
|
targets.iloc[:, [0, 1]].values - targets.iloc[:, [4, 5]], axis=1)
|
||||||
targets.sort_values(by="dist", ascending=False)
|
targets.sort_values(by="dist", ascending=False)
|
||||||
|
|
||||||
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
# Take the first person that shows up in the dataframe (Recall that we sort based on Euclidean distance)
|
||||||
@ -147,7 +155,8 @@ def main():
|
|||||||
|
|
||||||
# Moving the mouse
|
# Moving the mouse
|
||||||
if win32api.GetKeyState(0x14):
|
if win32api.GetKeyState(0x14):
|
||||||
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
win32api.mouse_event(win32con.MOUSEEVENTF_MOVE, int(
|
||||||
|
mouseMove[0] * aaMovementAmp), int(mouseMove[1] * aaMovementAmp), 0, 0)
|
||||||
last_mid_coord = [xMid, yMid]
|
last_mid_coord = [xMid, yMid]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -161,7 +170,8 @@ def main():
|
|||||||
halfH = round(targets["height"][i] / 2)
|
halfH = round(targets["height"][i] / 2)
|
||||||
midX = targets['current_mid_x'][i]
|
midX = targets['current_mid_x'][i]
|
||||||
midY = targets['current_mid_y'][i]
|
midY = targets['current_mid_y'][i]
|
||||||
(startX, startY, endX, endY) = int(midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
(startX, startY, endX, endY) = int(
|
||||||
|
midX + halfW), int(midY + halfH), int(midX - halfW), int(midY - halfH)
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
# draw the bounding box and label on the frame
|
# draw the bounding box and label on the frame
|
||||||
@ -190,5 +200,6 @@ def main():
|
|||||||
exit()
|
exit()
|
||||||
camera.stop()
|
camera.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
492
models/common.py
492
models/common.py
@ -10,6 +10,7 @@ import warnings
|
|||||||
from collections import OrderedDict, namedtuple
|
from collections import OrderedDict, namedtuple
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -22,26 +23,51 @@ from torch.cuda import amp
|
|||||||
|
|
||||||
from utils.dataloaders import exif_transpose, letterbox
|
from utils.dataloaders import exif_transpose, letterbox
|
||||||
from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
|
from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
|
||||||
increment_path, make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh,
|
increment_path, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh,
|
||||||
yaml_load)
|
yaml_load)
|
||||||
from utils.plots import Annotator, colors, save_one_box
|
from utils.plots import Annotator, colors, save_one_box
|
||||||
from utils.torch_utils import copy_attr, smart_inference_mode
|
from utils.torch_utils import copy_attr, smart_inference_mode
|
||||||
|
|
||||||
|
|
||||||
def autopad(k, p=None): # kernel, padding
|
def export_formats():
|
||||||
# Pad to 'same'
|
# YOLOv5 export formats
|
||||||
|
x = [
|
||||||
|
['PyTorch', '-', '.pt', True, True],
|
||||||
|
['TorchScript', 'torchscript', '.torchscript', True, True],
|
||||||
|
['ONNX', 'onnx', '.onnx', True, True],
|
||||||
|
['OpenVINO', 'openvino', '_openvino_model', True, False],
|
||||||
|
['TensorRT', 'engine', '.engine', False, True],
|
||||||
|
['CoreML', 'coreml', '.mlmodel', True, False],
|
||||||
|
['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
|
||||||
|
['TensorFlow GraphDef', 'pb', '.pb', True, True],
|
||||||
|
['TensorFlow Lite', 'tflite', '.tflite', True, False],
|
||||||
|
['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
|
||||||
|
['TensorFlow.js', 'tfjs', '_web_model', False, False],
|
||||||
|
['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]
|
||||||
|
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
|
||||||
|
|
||||||
|
|
||||||
|
def autopad(k, p=None, d=1): # kernel, padding, dilation
|
||||||
|
# Pad to 'same' shape outputs
|
||||||
|
if d > 1:
|
||||||
|
k = d * (k - 1) + 1 if isinstance(k,
|
||||||
|
int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
|
||||||
if p is None:
|
if p is None:
|
||||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
|
||||||
class Conv(nn.Module):
|
class Conv(nn.Module):
|
||||||
# Standard convolution
|
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
|
||||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
default_act = nn.SiLU() # default activation
|
||||||
|
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
self.conv = nn.Conv2d(c1, c2, k, s, autopad(
|
||||||
|
k, p, d), groups=g, dilation=d, bias=False)
|
||||||
self.bn = nn.BatchNorm2d(c2)
|
self.bn = nn.BatchNorm2d(c2)
|
||||||
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
self.act = self.default_act if act is True else act if isinstance(
|
||||||
|
act, nn.Module) else nn.Identity()
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return self.act(self.bn(self.conv(x)))
|
return self.act(self.bn(self.conv(x)))
|
||||||
@ -51,14 +77,16 @@ class Conv(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class DWConv(Conv):
|
class DWConv(Conv):
|
||||||
# Depth-wise convolution class
|
# Depth-wise convolution
|
||||||
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
# ch_in, ch_out, kernel, stride, dilation, activation
|
||||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
|
||||||
|
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
|
||||||
|
|
||||||
|
|
||||||
class DWConvTranspose2d(nn.ConvTranspose2d):
|
class DWConvTranspose2d(nn.ConvTranspose2d):
|
||||||
# Depth-wise transpose convolution class
|
# Depth-wise transpose convolution
|
||||||
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
|
# ch_in, ch_out, kernel, stride, padding, padding_out
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
|
||||||
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
||||||
|
|
||||||
|
|
||||||
@ -87,7 +115,8 @@ class TransformerBlock(nn.Module):
|
|||||||
if c1 != c2:
|
if c1 != c2:
|
||||||
self.conv = Conv(c1, c2)
|
self.conv = Conv(c1, c2)
|
||||||
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
||||||
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
|
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads)
|
||||||
|
for _ in range(num_layers)))
|
||||||
self.c2 = c2
|
self.c2 = c2
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
@ -100,7 +129,8 @@ class TransformerBlock(nn.Module):
|
|||||||
|
|
||||||
class Bottleneck(nn.Module):
|
class Bottleneck(nn.Module):
|
||||||
# Standard bottleneck
|
# Standard bottleneck
|
||||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
# ch_in, ch_out, shortcut, groups, expansion
|
||||||
|
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
c_ = int(c2 * e) # hidden channels
|
c_ = int(c2 * e) # hidden channels
|
||||||
self.cv1 = Conv(c1, c_, 1, 1)
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
@ -113,7 +143,8 @@ class Bottleneck(nn.Module):
|
|||||||
|
|
||||||
class BottleneckCSP(nn.Module):
|
class BottleneckCSP(nn.Module):
|
||||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
# ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
c_ = int(c2 * e) # hidden channels
|
c_ = int(c2 * e) # hidden channels
|
||||||
self.cv1 = Conv(c1, c_, 1, 1)
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
@ -122,7 +153,8 @@ class BottleneckCSP(nn.Module):
|
|||||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||||
self.act = nn.SiLU()
|
self.act = nn.SiLU()
|
||||||
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
self.m = nn.Sequential(
|
||||||
|
*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
y1 = self.cv3(self.m(self.cv1(x)))
|
y1 = self.cv3(self.m(self.cv1(x)))
|
||||||
@ -146,13 +178,15 @@ class CrossConv(nn.Module):
|
|||||||
|
|
||||||
class C3(nn.Module):
|
class C3(nn.Module):
|
||||||
# CSP Bottleneck with 3 convolutions
|
# CSP Bottleneck with 3 convolutions
|
||||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
# ch_in, ch_out, number, shortcut, groups, expansion
|
||||||
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
c_ = int(c2 * e) # hidden channels
|
c_ = int(c2 * e) # hidden channels
|
||||||
self.cv1 = Conv(c1, c_, 1, 1)
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
self.cv2 = Conv(c1, c_, 1, 1)
|
self.cv2 = Conv(c1, c_, 1, 1)
|
||||||
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
||||||
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
self.m = nn.Sequential(
|
||||||
|
*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
||||||
@ -163,7 +197,8 @@ class C3x(C3):
|
|||||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||||
super().__init__(c1, c2, n, shortcut, g, e)
|
super().__init__(c1, c2, n, shortcut, g, e)
|
||||||
c_ = int(c2 * e)
|
c_ = int(c2 * e)
|
||||||
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
self.m = nn.Sequential(
|
||||||
|
*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
||||||
|
|
||||||
|
|
||||||
class C3TR(C3):
|
class C3TR(C3):
|
||||||
@ -197,12 +232,14 @@ class SPP(nn.Module):
|
|||||||
c_ = c1 // 2 # hidden channels
|
c_ = c1 // 2 # hidden channels
|
||||||
self.cv1 = Conv(c1, c_, 1, 1)
|
self.cv1 = Conv(c1, c_, 1, 1)
|
||||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
self.m = nn.ModuleList(
|
||||||
|
[nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = self.cv1(x)
|
x = self.cv1(x)
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
# suppress torch 1.9.0 max_pool2d() warning
|
||||||
|
warnings.simplefilter('ignore')
|
||||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||||
|
|
||||||
|
|
||||||
@ -218,7 +255,8 @@ class SPPF(nn.Module):
|
|||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = self.cv1(x)
|
x = self.cv1(x)
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
# suppress torch 1.9.0 max_pool2d() warning
|
||||||
|
warnings.simplefilter('ignore')
|
||||||
y1 = self.m(x)
|
y1 = self.m(x)
|
||||||
y2 = self.m(y1)
|
y2 = self.m(y1)
|
||||||
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
|
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
|
||||||
@ -226,9 +264,10 @@ class SPPF(nn.Module):
|
|||||||
|
|
||||||
class Focus(nn.Module):
|
class Focus(nn.Module):
|
||||||
# Focus wh information into c-space
|
# Focus wh information into c-space
|
||||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
# ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
|
||||||
# self.contract = Contract(gain=2)
|
# self.contract = Contract(gain=2)
|
||||||
|
|
||||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||||
@ -238,11 +277,12 @@ class Focus(nn.Module):
|
|||||||
|
|
||||||
class GhostConv(nn.Module):
|
class GhostConv(nn.Module):
|
||||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
# ch_in, ch_out, kernel, stride, groups
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
c_ = c2 // 2 # hidden channels
|
c_ = c2 // 2 # hidden channels
|
||||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
|
||||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
y = self.cv1(x)
|
y = self.cv1(x)
|
||||||
@ -310,7 +350,7 @@ class DetectMultiBackend(nn.Module):
|
|||||||
# PyTorch: weights = *.pt
|
# PyTorch: weights = *.pt
|
||||||
# TorchScript: *.torchscript
|
# TorchScript: *.torchscript
|
||||||
# ONNX Runtime: *.onnx
|
# ONNX Runtime: *.onnx
|
||||||
# ONNX OpenCV DNN: *.onnx with --dnn
|
# ONNX OpenCV DNN: *.onnx --dnn
|
||||||
# OpenVINO: *.xml
|
# OpenVINO: *.xml
|
||||||
# CoreML: *.mlmodel
|
# CoreML: *.mlmodel
|
||||||
# TensorRT: *.engine
|
# TensorRT: *.engine
|
||||||
@ -318,25 +358,35 @@ class DetectMultiBackend(nn.Module):
|
|||||||
# TensorFlow GraphDef: *.pb
|
# TensorFlow GraphDef: *.pb
|
||||||
# TensorFlow Lite: *.tflite
|
# TensorFlow Lite: *.tflite
|
||||||
# TensorFlow Edge TPU: *_edgetpu.tflite
|
# TensorFlow Edge TPU: *_edgetpu.tflite
|
||||||
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
|
# PaddlePaddle: *_paddle_model
|
||||||
|
# scoped to avoid circular import
|
||||||
|
from models.experimental import attempt_download, attempt_load
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
w = str(weights[0] if isinstance(weights, list) else weights)
|
w = str(weights[0] if isinstance(weights, list) else weights)
|
||||||
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self._model_type(w) # get backend
|
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(
|
||||||
w = attempt_download(w) # download if not local
|
w)
|
||||||
fp16 &= pt or jit or onnx or engine # FP16
|
fp16 &= pt or jit or onnx or engine # FP16
|
||||||
|
# BHWC formats (vs torch BCWH)
|
||||||
|
nhwc = coreml or saved_model or pb or tflite or edgetpu
|
||||||
stride = 32 # default stride
|
stride = 32 # default stride
|
||||||
|
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
|
||||||
|
if not (pt or triton):
|
||||||
|
w = attempt_download(w) # download if not local
|
||||||
|
|
||||||
if pt: # PyTorch
|
if pt: # PyTorch
|
||||||
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
|
model = attempt_load(weights if isinstance(
|
||||||
|
weights, list) else w, device=device, inplace=True, fuse=fuse)
|
||||||
stride = max(int(model.stride.max()), 32) # model stride
|
stride = max(int(model.stride.max()), 32) # model stride
|
||||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
names = model.module.names if hasattr(
|
||||||
|
model, 'module') else model.names # get class names
|
||||||
model.half() if fp16 else model.float()
|
model.half() if fp16 else model.float()
|
||||||
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
||||||
elif jit: # TorchScript
|
elif jit: # TorchScript
|
||||||
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
||||||
extra_files = {'config.txt': ''} # model metadata
|
extra_files = {'config.txt': ''} # model metadata
|
||||||
model = torch.jit.load(w, _extra_files=extra_files)
|
model = torch.jit.load(
|
||||||
|
w, _extra_files=extra_files, map_location=device)
|
||||||
model.half() if fp16 else model.float()
|
model.half() if fp16 else model.float()
|
||||||
if extra_files['config.txt']: # load metadata dict
|
if extra_files['config.txt']: # load metadata dict
|
||||||
d = json.loads(extra_files['config.txt'],
|
d = json.loads(extra_files['config.txt'],
|
||||||
@ -345,14 +395,15 @@ class DetectMultiBackend(nn.Module):
|
|||||||
stride, names = int(d['stride']), d['names']
|
stride, names = int(d['stride']), d['names']
|
||||||
elif dnn: # ONNX OpenCV DNN
|
elif dnn: # ONNX OpenCV DNN
|
||||||
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
|
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
|
||||||
check_requirements(('opencv-python>=4.5.4',))
|
check_requirements('opencv-python>=4.5.4')
|
||||||
net = cv2.dnn.readNetFromONNX(w)
|
net = cv2.dnn.readNetFromONNX(w)
|
||||||
elif onnx: # ONNX Runtime
|
elif onnx: # ONNX Runtime
|
||||||
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
|
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
|
||||||
cuda = torch.cuda.is_available() and device.type != 'cpu'
|
check_requirements(
|
||||||
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
|
('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else [
|
||||||
|
'CPUExecutionProvider']
|
||||||
session = onnxruntime.InferenceSession(w, providers=providers)
|
session = onnxruntime.InferenceSession(w, providers=providers)
|
||||||
output_names = [x.name for x in session.get_outputs()]
|
output_names = [x.name for x in session.get_outputs()]
|
||||||
meta = session.get_modelmeta().custom_metadata_map # metadata
|
meta = session.get_modelmeta().custom_metadata_map # metadata
|
||||||
@ -360,100 +411,149 @@ class DetectMultiBackend(nn.Module):
|
|||||||
stride, names = int(meta['stride']), eval(meta['names'])
|
stride, names = int(meta['stride']), eval(meta['names'])
|
||||||
elif xml: # OpenVINO
|
elif xml: # OpenVINO
|
||||||
LOGGER.info(f'Loading {w} for OpenVINO inference...')
|
LOGGER.info(f'Loading {w} for OpenVINO inference...')
|
||||||
check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
# requires openvino-dev: https://pypi.org/project/openvino-dev/
|
||||||
|
check_requirements('openvino')
|
||||||
from openvino.runtime import Core, Layout, get_batch
|
from openvino.runtime import Core, Layout, get_batch
|
||||||
ie = Core()
|
ie = Core()
|
||||||
if not Path(w).is_file(): # if not *.xml
|
if not Path(w).is_file(): # if not *.xml
|
||||||
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
|
# get *.xml file from *_openvino_model dir
|
||||||
network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
|
w = next(Path(w).glob('*.xml'))
|
||||||
|
network = ie.read_model(
|
||||||
|
model=w, weights=Path(w).with_suffix('.bin'))
|
||||||
if network.get_parameters()[0].get_layout().empty:
|
if network.get_parameters()[0].get_layout().empty:
|
||||||
network.get_parameters()[0].set_layout(Layout("NCHW"))
|
network.get_parameters()[0].set_layout(Layout("NCHW"))
|
||||||
batch_dim = get_batch(network)
|
batch_dim = get_batch(network)
|
||||||
if batch_dim.is_static:
|
if batch_dim.is_static:
|
||||||
batch_size = batch_dim.get_length()
|
batch_size = batch_dim.get_length()
|
||||||
executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
|
# device_name="MYRIAD" for Intel NCS2
|
||||||
output_layer = next(iter(executable_network.outputs))
|
executable_network = ie.compile_model(network, device_name="CPU")
|
||||||
stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
|
stride, names = self._load_metadata(
|
||||||
|
Path(w).with_suffix('.yaml')) # load metadata
|
||||||
elif engine: # TensorRT
|
elif engine: # TensorRT
|
||||||
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
||||||
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
||||||
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
# require tensorrt>=7.0.0
|
||||||
|
check_version(trt.__version__, '7.0.0', hard=True)
|
||||||
if device.type == 'cpu':
|
if device.type == 'cpu':
|
||||||
device = torch.device('cuda:0')
|
device = torch.device('cuda:0')
|
||||||
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
Binding = namedtuple(
|
||||||
|
'Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
||||||
logger = trt.Logger(trt.Logger.INFO)
|
logger = trt.Logger(trt.Logger.INFO)
|
||||||
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
||||||
model = runtime.deserialize_cuda_engine(f.read())
|
model = runtime.deserialize_cuda_engine(f.read())
|
||||||
context = model.create_execution_context()
|
context = model.create_execution_context()
|
||||||
bindings = OrderedDict()
|
bindings = OrderedDict()
|
||||||
|
output_names = []
|
||||||
fp16 = False # default updated below
|
fp16 = False # default updated below
|
||||||
dynamic = False
|
dynamic = False
|
||||||
for index in range(model.num_bindings):
|
for i in range(model.num_bindings):
|
||||||
name = model.get_binding_name(index)
|
name = model.get_binding_name(i)
|
||||||
dtype = trt.nptype(model.get_binding_dtype(index))
|
dtype = trt.nptype(model.get_binding_dtype(i))
|
||||||
if model.binding_is_input(index):
|
if model.binding_is_input(i):
|
||||||
if -1 in tuple(model.get_binding_shape(index)): # dynamic
|
if -1 in tuple(model.get_binding_shape(i)): # dynamic
|
||||||
dynamic = True
|
dynamic = True
|
||||||
context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
|
context.set_binding_shape(
|
||||||
|
i, tuple(model.get_profile_shape(0, i)[2]))
|
||||||
if dtype == np.float16:
|
if dtype == np.float16:
|
||||||
fp16 = True
|
fp16 = True
|
||||||
shape = tuple(context.get_binding_shape(index))
|
else: # output
|
||||||
|
output_names.append(name)
|
||||||
|
shape = tuple(context.get_binding_shape(i))
|
||||||
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
|
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
|
||||||
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
|
bindings[name] = Binding(
|
||||||
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
name, dtype, shape, im, int(im.data_ptr()))
|
||||||
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
|
binding_addrs = OrderedDict((n, d.ptr)
|
||||||
|
for n, d in bindings.items())
|
||||||
|
# if dynamic, this is instead max batch size
|
||||||
|
batch_size = bindings['images'].shape[0]
|
||||||
elif coreml: # CoreML
|
elif coreml: # CoreML
|
||||||
LOGGER.info(f'Loading {w} for CoreML inference...')
|
LOGGER.info(f'Loading {w} for CoreML inference...')
|
||||||
import coremltools as ct
|
import coremltools as ct
|
||||||
model = ct.models.MLModel(w)
|
model = ct.models.MLModel(w)
|
||||||
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
elif saved_model: # TF SavedModel
|
||||||
if saved_model: # SavedModel
|
|
||||||
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
|
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
keras = False # assume TF1 saved_model
|
keras = False # assume TF1 saved_model
|
||||||
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
|
model = tf.keras.models.load_model(
|
||||||
|
w) if keras else tf.saved_model.load(w)
|
||||||
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
||||||
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
|
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
def wrap_frozen_graph(gd, inputs, outputs):
|
def wrap_frozen_graph(gd, inputs, outputs):
|
||||||
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
x = tf.compat.v1.wrap_function(
|
||||||
|
lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
||||||
ge = x.graph.as_graph_element
|
ge = x.graph.as_graph_element
|
||||||
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
||||||
|
|
||||||
gd = tf.Graph().as_graph_def() # graph_def
|
def gd_outputs(gd):
|
||||||
|
name_list, input_list = [], []
|
||||||
|
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
|
||||||
|
name_list.append(node.name)
|
||||||
|
input_list.extend(node.input)
|
||||||
|
return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
|
||||||
|
|
||||||
|
gd = tf.Graph().as_graph_def() # TF GraphDef
|
||||||
with open(w, 'rb') as f:
|
with open(w, 'rb') as f:
|
||||||
gd.ParseFromString(f.read())
|
gd.ParseFromString(f.read())
|
||||||
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
|
frozen_func = wrap_frozen_graph(
|
||||||
|
gd, inputs="x:0", outputs=gd_outputs(gd))
|
||||||
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
||||||
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
||||||
from tflite_runtime.interpreter import Interpreter, load_delegate
|
from tflite_runtime.interpreter import Interpreter, load_delegate
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
|
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
|
||||||
if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime
|
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
|
||||||
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
|
LOGGER.info(
|
||||||
|
f'Loading {w} for TensorFlow Lite Edge TPU inference...')
|
||||||
delegate = {
|
delegate = {
|
||||||
'Linux': 'libedgetpu.so.1',
|
'Linux': 'libedgetpu.so.1',
|
||||||
'Darwin': 'libedgetpu.1.dylib',
|
'Darwin': 'libedgetpu.1.dylib',
|
||||||
'Windows': 'edgetpu.dll'}[platform.system()]
|
'Windows': 'edgetpu.dll'}[platform.system()]
|
||||||
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
|
interpreter = Interpreter(model_path=w, experimental_delegates=[
|
||||||
else: # Lite
|
load_delegate(delegate)])
|
||||||
|
else: # TFLite
|
||||||
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
|
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
|
||||||
interpreter = Interpreter(model_path=w) # load TFLite model
|
interpreter = Interpreter(model_path=w) # load TFLite model
|
||||||
interpreter.allocate_tensors() # allocate
|
interpreter.allocate_tensors() # allocate
|
||||||
input_details = interpreter.get_input_details() # inputs
|
input_details = interpreter.get_input_details() # inputs
|
||||||
output_details = interpreter.get_output_details() # outputs
|
output_details = interpreter.get_output_details() # outputs
|
||||||
elif tfjs:
|
elif tfjs: # TF.js
|
||||||
raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
|
raise NotImplementedError(
|
||||||
|
'ERROR: YOLOv5 TF.js inference is not supported')
|
||||||
|
elif paddle: # PaddlePaddle
|
||||||
|
LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
|
||||||
|
check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
|
||||||
|
import paddle.inference as pdi
|
||||||
|
if not Path(w).is_file(): # if not *.pdmodel
|
||||||
|
# get *.xml file from *_openvino_model dir
|
||||||
|
w = next(Path(w).rglob('*.pdmodel'))
|
||||||
|
weights = Path(w).with_suffix('.pdiparams')
|
||||||
|
config = pdi.Config(str(w), str(weights))
|
||||||
|
if cuda:
|
||||||
|
config.enable_use_gpu(
|
||||||
|
memory_pool_init_size_mb=2048, device_id=0)
|
||||||
|
predictor = pdi.create_predictor(config)
|
||||||
|
input_handle = predictor.get_input_handle(
|
||||||
|
predictor.get_input_names()[0])
|
||||||
|
output_names = predictor.get_output_names()
|
||||||
|
elif triton: # NVIDIA Triton Inference Server
|
||||||
|
LOGGER.info(f'Using {w} as Triton Inference Server...')
|
||||||
|
check_requirements('tritonclient[all]')
|
||||||
|
from utils.triton import TritonRemoteModel
|
||||||
|
model = TritonRemoteModel(url=w)
|
||||||
|
nhwc = model.runtime.startswith("tensorflow")
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f'ERROR: {w} is not a supported format')
|
raise NotImplementedError(f'ERROR: {w} is not a supported format')
|
||||||
|
|
||||||
# class names
|
# class names
|
||||||
if 'names' not in locals():
|
if 'names' not in locals():
|
||||||
names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
|
names = yaml_load(data)['names'] if data else {
|
||||||
|
i: f'class{i}' for i in range(999)}
|
||||||
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
|
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
|
||||||
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
|
# human-readable names
|
||||||
|
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']
|
||||||
|
|
||||||
self.__dict__.update(locals()) # assign all variables to self
|
self.__dict__.update(locals()) # assign all variables to self
|
||||||
|
|
||||||
@ -462,9 +562,13 @@ class DetectMultiBackend(nn.Module):
|
|||||||
b, ch, h, w = im.shape # batch, channel, height, width
|
b, ch, h, w = im.shape # batch, channel, height, width
|
||||||
if self.fp16 and im.dtype != torch.float16:
|
if self.fp16 and im.dtype != torch.float16:
|
||||||
im = im.half() # to FP16
|
im = im.half() # to FP16
|
||||||
|
if self.nhwc:
|
||||||
|
# torch BCHW to numpy BHWC shape(1,320,192,3)
|
||||||
|
im = im.permute(0, 2, 3, 1)
|
||||||
|
|
||||||
if self.pt: # PyTorch
|
if self.pt: # PyTorch
|
||||||
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
|
y = self.model(
|
||||||
|
im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
|
||||||
elif self.jit: # TorchScript
|
elif self.jit: # TorchScript
|
||||||
y = self.model(im)
|
y = self.model(im)
|
||||||
elif self.dnn: # ONNX OpenCV DNN
|
elif self.dnn: # ONNX OpenCV DNN
|
||||||
@ -473,52 +577,77 @@ class DetectMultiBackend(nn.Module):
|
|||||||
y = self.net.forward()
|
y = self.net.forward()
|
||||||
elif self.onnx: # ONNX Runtime
|
elif self.onnx: # ONNX Runtime
|
||||||
im = im.cpu().numpy() # torch to numpy
|
im = im.cpu().numpy() # torch to numpy
|
||||||
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
|
y = self.session.run(self.output_names, {
|
||||||
|
self.session.get_inputs()[0].name: im})
|
||||||
elif self.xml: # OpenVINO
|
elif self.xml: # OpenVINO
|
||||||
im = im.cpu().numpy() # FP32
|
im = im.cpu().numpy() # FP32
|
||||||
y = self.executable_network([im])[self.output_layer]
|
y = list(self.executable_network([im]).values())
|
||||||
elif self.engine: # TensorRT
|
elif self.engine: # TensorRT
|
||||||
if self.dynamic and im.shape != self.bindings['images'].shape:
|
if self.dynamic and im.shape != self.bindings['images'].shape:
|
||||||
i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output'))
|
i = self.model.get_binding_index('images')
|
||||||
self.context.set_binding_shape(i_in, im.shape) # reshape if dynamic
|
self.context.set_binding_shape(
|
||||||
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
|
i, im.shape) # reshape if dynamic
|
||||||
self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
|
self.bindings['images'] = self.bindings['images']._replace(
|
||||||
|
shape=im.shape)
|
||||||
|
for name in self.output_names:
|
||||||
|
i = self.model.get_binding_index(name)
|
||||||
|
self.bindings[name].data.resize_(
|
||||||
|
tuple(self.context.get_binding_shape(i)))
|
||||||
s = self.bindings['images'].shape
|
s = self.bindings['images'].shape
|
||||||
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
|
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
|
||||||
self.binding_addrs['images'] = int(im.data_ptr())
|
self.binding_addrs['images'] = int(im.data_ptr())
|
||||||
self.context.execute_v2(list(self.binding_addrs.values()))
|
self.context.execute_v2(list(self.binding_addrs.values()))
|
||||||
y = self.bindings['output'].data
|
y = [self.bindings[x].data for x in sorted(self.output_names)]
|
||||||
elif self.coreml: # CoreML
|
elif self.coreml: # CoreML
|
||||||
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
im = im.cpu().numpy()
|
||||||
im = Image.fromarray((im[0] * 255).astype('uint8'))
|
im = Image.fromarray((im[0] * 255).astype('uint8'))
|
||||||
# im = im.resize((192, 320), Image.ANTIALIAS)
|
# im = im.resize((192, 320), Image.ANTIALIAS)
|
||||||
y = self.model.predict({'image': im}) # coordinates are xywh normalized
|
# coordinates are xywh normalized
|
||||||
|
y = self.model.predict({'image': im})
|
||||||
if 'confidence' in y:
|
if 'confidence' in y:
|
||||||
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
|
box = xywh2xyxy(y['coordinates'] *
|
||||||
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
|
[[w, h, w, h]]) # xyxy pixels
|
||||||
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
|
conf, cls = y['confidence'].max(
|
||||||
|
1), y['confidence'].argmax(1).astype(np.float)
|
||||||
|
y = np.concatenate(
|
||||||
|
(box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
|
||||||
else:
|
else:
|
||||||
k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key
|
# reversed for segmentation models (pred, proto)
|
||||||
y = y[k] # output
|
y = list(reversed(y.values()))
|
||||||
|
elif self.paddle: # PaddlePaddle
|
||||||
|
im = im.cpu().numpy().astype(np.float32)
|
||||||
|
self.input_handle.copy_from_cpu(im)
|
||||||
|
self.predictor.run()
|
||||||
|
y = [self.predictor.get_output_handle(
|
||||||
|
x).copy_to_cpu() for x in self.output_names]
|
||||||
|
elif self.triton: # NVIDIA Triton Inference Server
|
||||||
|
y = self.model(im)
|
||||||
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
||||||
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
im = im.cpu().numpy()
|
||||||
if self.saved_model: # SavedModel
|
if self.saved_model: # SavedModel
|
||||||
y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
|
y = self.model(
|
||||||
|
im, training=False) if self.keras else self.model(im)
|
||||||
elif self.pb: # GraphDef
|
elif self.pb: # GraphDef
|
||||||
y = self.frozen_func(x=self.tf.constant(im)).numpy()
|
y = self.frozen_func(x=self.tf.constant(im))
|
||||||
else: # Lite or Edge TPU
|
else: # Lite or Edge TPU
|
||||||
input, output = self.input_details[0], self.output_details[0]
|
input = self.input_details[0]
|
||||||
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
|
# is TFLite quantized uint8 model
|
||||||
|
int8 = input['dtype'] == np.uint8
|
||||||
if int8:
|
if int8:
|
||||||
scale, zero_point = input['quantization']
|
scale, zero_point = input['quantization']
|
||||||
im = (im / scale + zero_point).astype(np.uint8) # de-scale
|
im = (im / scale + zero_point).astype(np.uint8) # de-scale
|
||||||
self.interpreter.set_tensor(input['index'], im)
|
self.interpreter.set_tensor(input['index'], im)
|
||||||
self.interpreter.invoke()
|
self.interpreter.invoke()
|
||||||
y = self.interpreter.get_tensor(output['index'])
|
y = []
|
||||||
|
for output in self.output_details:
|
||||||
|
x = self.interpreter.get_tensor(output['index'])
|
||||||
if int8:
|
if int8:
|
||||||
scale, zero_point = output['quantization']
|
scale, zero_point = output['quantization']
|
||||||
y = (y.astype(np.float32) - zero_point) * scale # re-scale
|
x = (x.astype(np.float32) - zero_point) * \
|
||||||
y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
|
scale # re-scale
|
||||||
|
y.append(x)
|
||||||
|
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
|
||||||
|
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
|
||||||
|
|
||||||
if isinstance(y, (list, tuple)):
|
if isinstance(y, (list, tuple)):
|
||||||
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
|
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
|
||||||
@ -530,23 +659,27 @@ class DetectMultiBackend(nn.Module):
|
|||||||
|
|
||||||
def warmup(self, imgsz=(1, 3, 640, 640)):
|
def warmup(self, imgsz=(1, 3, 640, 640)):
|
||||||
# Warmup model by running inference once
|
# Warmup model by running inference once
|
||||||
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
|
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
|
||||||
if any(warmup_types) and self.device.type != 'cpu':
|
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
|
||||||
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
im = torch.empty(
|
||||||
|
*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
||||||
for _ in range(2 if self.jit else 1): #
|
for _ in range(2 if self.jit else 1): #
|
||||||
self.forward(im) # warmup
|
self.forward(im) # warmup
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _model_type(p='path/to/model.pt'):
|
def _model_type(p='path/to/model.pt'):
|
||||||
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
||||||
from yolov5.export import export_formats
|
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
||||||
suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes
|
from utils.downloads import is_url
|
||||||
check_suffix(p, suffixes) # checks
|
sf = list(export_formats().Suffix) # export suffixes
|
||||||
p = Path(p).name # eliminate trailing separators
|
if not is_url(p, check=False):
|
||||||
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
|
check_suffix(p, sf) # checks
|
||||||
xml |= xml2 # *_openvino_model or *.xml
|
url = urlparse(p) # if url may be Triton inference server
|
||||||
tflite &= not edgetpu # *.tflite
|
types = [s in Path(p).name for s in sf]
|
||||||
return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
|
types[8] &= not types[9] # tflite &= not edgetpu
|
||||||
|
triton = not any(types) and all(
|
||||||
|
[any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
|
||||||
|
return types + [triton]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_metadata(f=Path('path/to/meta.yaml')):
|
def _load_metadata(f=Path('path/to/meta.yaml')):
|
||||||
@ -563,7 +696,8 @@ class AutoShape(nn.Module):
|
|||||||
iou = 0.45 # NMS IoU threshold
|
iou = 0.45 # NMS IoU threshold
|
||||||
agnostic = False # NMS class-agnostic
|
agnostic = False # NMS class-agnostic
|
||||||
multi_label = False # NMS multiple labels per box
|
multi_label = False # NMS multiple labels per box
|
||||||
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
|
# (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
|
||||||
|
classes = None
|
||||||
max_det = 1000 # maximum number of detections per image
|
max_det = 1000 # maximum number of detections per image
|
||||||
amp = False # Automatic Mixed Precision (AMP) inference
|
amp = False # Automatic Mixed Precision (AMP) inference
|
||||||
|
|
||||||
@ -571,19 +705,24 @@ class AutoShape(nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
if verbose:
|
if verbose:
|
||||||
LOGGER.info('Adding AutoShape... ')
|
LOGGER.info('Adding AutoShape... ')
|
||||||
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
|
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names',
|
||||||
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
|
'stride', 'abc'), exclude=()) # copy attributes
|
||||||
|
# DetectMultiBackend() instance
|
||||||
|
self.dmb = isinstance(model, DetectMultiBackend)
|
||||||
self.pt = not self.dmb or model.pt # PyTorch model
|
self.pt = not self.dmb or model.pt # PyTorch model
|
||||||
self.model = model.eval()
|
self.model = model.eval()
|
||||||
if self.pt:
|
if self.pt:
|
||||||
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
# Detect()
|
||||||
|
m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
|
||||||
m.inplace = False # Detect.inplace=False for safe multithread inference
|
m.inplace = False # Detect.inplace=False for safe multithread inference
|
||||||
|
m.export = True # do not output loss values
|
||||||
|
|
||||||
def _apply(self, fn):
|
def _apply(self, fn):
|
||||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
||||||
self = super()._apply(fn)
|
self = super()._apply(fn)
|
||||||
if self.pt:
|
if self.pt:
|
||||||
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
# Detect()
|
||||||
|
m = self.model.model.model[-1] if self.dmb else self.model.model[-1]
|
||||||
m.stride = fn(m.stride)
|
m.stride = fn(m.stride)
|
||||||
m.grid = list(map(fn, m.grid))
|
m.grid = list(map(fn, m.grid))
|
||||||
if isinstance(m.anchor_grid, list):
|
if isinstance(m.anchor_grid, list):
|
||||||
@ -605,40 +744,52 @@ class AutoShape(nn.Module):
|
|||||||
with dt[0]:
|
with dt[0]:
|
||||||
if isinstance(size, int): # expand
|
if isinstance(size, int): # expand
|
||||||
size = (size, size)
|
size = (size, size)
|
||||||
p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
|
p = next(self.model.parameters()) if self.pt else torch.empty(
|
||||||
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
|
1, device=self.model.device) # param
|
||||||
|
# Automatic Mixed Precision (AMP) inference
|
||||||
|
autocast = self.amp and (p.device.type != 'cpu')
|
||||||
if isinstance(ims, torch.Tensor): # torch
|
if isinstance(ims, torch.Tensor): # torch
|
||||||
with amp.autocast(autocast):
|
with amp.autocast(autocast):
|
||||||
return self.model(ims.to(p.device).type_as(p), augment, profile) # inference
|
# inference
|
||||||
|
return self.model(ims.to(p.device).type_as(p), augment=augment)
|
||||||
|
|
||||||
# Pre-process
|
# Pre-process
|
||||||
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
|
n, ims = (len(ims), list(ims)) if isinstance(
|
||||||
|
ims, (list, tuple)) else (1, [ims]) # number, list of images
|
||||||
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
||||||
for i, im in enumerate(ims):
|
for i, im in enumerate(ims):
|
||||||
f = f'image{i}' # filename
|
f = f'image{i}' # filename
|
||||||
if isinstance(im, (str, Path)): # filename or uri
|
if isinstance(im, (str, Path)): # filename or uri
|
||||||
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
im, f = Image.open(requests.get(im, stream=True).raw if str(
|
||||||
|
im).startswith('http') else im), im
|
||||||
im = np.asarray(exif_transpose(im))
|
im = np.asarray(exif_transpose(im))
|
||||||
elif isinstance(im, Image.Image): # PIL Image
|
elif isinstance(im, Image.Image): # PIL Image
|
||||||
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
im, f = np.asarray(exif_transpose(im)), getattr(
|
||||||
|
im, 'filename', f) or f
|
||||||
files.append(Path(f).with_suffix('.jpg').name)
|
files.append(Path(f).with_suffix('.jpg').name)
|
||||||
if im.shape[0] < 5: # image in CHW
|
if im.shape[0] < 5: # image in CHW
|
||||||
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
# reverse dataloader .transpose(2, 0, 1)
|
||||||
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
|
im = im.transpose((1, 2, 0))
|
||||||
|
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(
|
||||||
|
im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
|
||||||
s = im.shape[:2] # HWC
|
s = im.shape[:2] # HWC
|
||||||
shape0.append(s) # image shape
|
shape0.append(s) # image shape
|
||||||
g = max(size) / max(s) # gain
|
g = max(size) / max(s) # gain
|
||||||
shape1.append([y * g for y in s])
|
shape1.append([y * g for y in s])
|
||||||
ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
ims[i] = im if im.data.contiguous else np.ascontiguousarray(
|
||||||
shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape
|
im) # update
|
||||||
|
shape1 = [make_divisible(x, self.stride) for x in np.array(
|
||||||
|
shape1).max(0)] if self.pt else size # inf shape
|
||||||
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
|
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
|
||||||
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
|
x = np.ascontiguousarray(np.array(x).transpose(
|
||||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
|
(0, 3, 1, 2))) # stack and BHWC to BCHW
|
||||||
|
x = torch.from_numpy(x).to(p.device).type_as(
|
||||||
|
p) / 255 # uint8 to fp16/32
|
||||||
|
|
||||||
with amp.autocast(autocast):
|
with amp.autocast(autocast):
|
||||||
# Inference
|
# Inference
|
||||||
with dt[1]:
|
with dt[1]:
|
||||||
y = self.model(x, augment, profile) # forward
|
y = self.model(x, augment=augment) # forward
|
||||||
|
|
||||||
# Post-process
|
# Post-process
|
||||||
with dt[2]:
|
with dt[2]:
|
||||||
@ -650,7 +801,7 @@ class AutoShape(nn.Module):
|
|||||||
self.multi_label,
|
self.multi_label,
|
||||||
max_det=self.max_det) # NMS
|
max_det=self.max_det) # NMS
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
scale_coords(shape1, y[i][:, :4], shape0[i])
|
scale_boxes(shape1, y[i][:, :4], shape0[i])
|
||||||
|
|
||||||
return Detections(ims, y, files, dt, self.names, x.shape)
|
return Detections(ims, y, files, dt, self.names, x.shape)
|
||||||
|
|
||||||
@ -660,7 +811,8 @@ class Detections:
|
|||||||
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
|
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
d = pred[0].device # device
|
d = pred[0].device # device
|
||||||
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
|
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d)
|
||||||
|
for im in ims] # normalizations
|
||||||
self.ims = ims # list of images as numpy arrays
|
self.ims = ims # list of images as numpy arrays
|
||||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
||||||
self.names = names # class names
|
self.names = names # class names
|
||||||
@ -672,22 +824,28 @@ class Detections:
|
|||||||
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
||||||
self.n = len(self.pred) # number of images (batch size)
|
self.n = len(self.pred) # number of images (batch size)
|
||||||
self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
|
self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
|
||||||
self.s = shape # inference BCHW shape
|
self.s = tuple(shape) # inference BCHW shape
|
||||||
|
|
||||||
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
|
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
|
||||||
crops = []
|
s, crops = '', []
|
||||||
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
|
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
|
||||||
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
|
# string
|
||||||
|
s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
|
||||||
if pred.shape[0]:
|
if pred.shape[0]:
|
||||||
for c in pred[:, -1].unique():
|
for c in pred[:, -1].unique():
|
||||||
n = (pred[:, -1] == c).sum() # detections per class
|
n = (pred[:, -1] == c).sum() # detections per class
|
||||||
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
# add to string
|
||||||
|
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
|
||||||
|
s = s.rstrip(', ')
|
||||||
if show or save or render or crop:
|
if show or save or render or crop:
|
||||||
annotator = Annotator(im, example=str(self.names))
|
annotator = Annotator(im, example=str(self.names))
|
||||||
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
# xyxy, confidence, class
|
||||||
|
for *box, conf, cls in reversed(pred):
|
||||||
label = f'{self.names[int(cls)]} {conf:.2f}'
|
label = f'{self.names[int(cls)]} {conf:.2f}'
|
||||||
if crop:
|
if crop:
|
||||||
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
|
file = save_dir / 'crops' / \
|
||||||
|
self.names[int(cls)] / \
|
||||||
|
self.files[i] if save else None
|
||||||
crops.append({
|
crops.append({
|
||||||
'box': box,
|
'box': box,
|
||||||
'conf': conf,
|
'conf': conf,
|
||||||
@ -695,45 +853,48 @@ class Detections:
|
|||||||
'label': label,
|
'label': label,
|
||||||
'im': save_one_box(box, im, file=file, save=save)})
|
'im': save_one_box(box, im, file=file, save=save)})
|
||||||
else: # all others
|
else: # all others
|
||||||
annotator.box_label(box, label if labels else '', color=colors(cls))
|
annotator.box_label(
|
||||||
|
box, label if labels else '', color=colors(cls))
|
||||||
im = annotator.im
|
im = annotator.im
|
||||||
else:
|
else:
|
||||||
s += '(no detections)'
|
s += '(no detections)'
|
||||||
|
|
||||||
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
im = Image.fromarray(im.astype(np.uint8)) if isinstance(
|
||||||
if pprint:
|
im, np.ndarray) else im # from np
|
||||||
print(s.rstrip(', '))
|
|
||||||
if show:
|
if show:
|
||||||
im.show(self.files[i]) # show
|
im.show(self.files[i]) # show
|
||||||
if save:
|
if save:
|
||||||
f = self.files[i]
|
f = self.files[i]
|
||||||
im.save(save_dir / f) # save
|
im.save(save_dir / f) # save
|
||||||
if i == self.n - 1:
|
if i == self.n - 1:
|
||||||
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
|
LOGGER.info(
|
||||||
|
f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
|
||||||
if render:
|
if render:
|
||||||
self.ims[i] = np.asarray(im)
|
self.ims[i] = np.asarray(im)
|
||||||
|
if pprint:
|
||||||
|
s = s.lstrip('\n')
|
||||||
|
return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
|
||||||
if crop:
|
if crop:
|
||||||
if save:
|
if save:
|
||||||
LOGGER.info(f'Saved results to {save_dir}\n')
|
LOGGER.info(f'Saved results to {save_dir}\n')
|
||||||
return crops
|
return crops
|
||||||
|
|
||||||
def print(self):
|
|
||||||
self.display(pprint=True) # print results
|
|
||||||
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
|
|
||||||
|
|
||||||
def show(self, labels=True):
|
def show(self, labels=True):
|
||||||
self.display(show=True, labels=labels) # show results
|
self._run(show=True, labels=labels) # show results
|
||||||
|
|
||||||
def save(self, labels=True, save_dir='runs/detect/exp'):
|
def save(self, labels=True, save_dir='runs/detect/exp'):
|
||||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
save_dir = increment_path(
|
||||||
self.display(save=True, labels=labels, save_dir=save_dir) # save results
|
save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
||||||
|
self._run(save=True, labels=labels, save_dir=save_dir) # save results
|
||||||
|
|
||||||
def crop(self, save=True, save_dir='runs/detect/exp'):
|
def crop(self, save=True, save_dir='runs/detect/exp'):
|
||||||
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
|
save_dir = increment_path(
|
||||||
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
|
save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
|
||||||
|
# crop results
|
||||||
|
return self._run(crop=True, save=save, save_dir=save_dir)
|
||||||
|
|
||||||
def render(self, labels=True):
|
def render(self, labels=True):
|
||||||
self.display(render=True, labels=labels) # render results
|
self._run(render=True, labels=labels) # render results
|
||||||
return self.ims
|
return self.ims
|
||||||
|
|
||||||
def pandas(self):
|
def pandas(self):
|
||||||
@ -742,30 +903,51 @@ class Detections:
|
|||||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||||
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
||||||
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
||||||
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]]
|
||||||
|
for x in x.tolist()] for x in getattr(self, k)] # update
|
||||||
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def tolist(self):
|
def tolist(self):
|
||||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||||
r = range(self.n) # iterable
|
r = range(self.n) # iterable
|
||||||
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
|
x = [Detections([self.ims[i]], [self.pred[i]], [
|
||||||
|
self.files[i]], self.times, self.names, self.s) for i in r]
|
||||||
# for d in x:
|
# for d in x:
|
||||||
# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
||||||
# setattr(d, k, getattr(d, k)[0]) # pop out of list
|
# setattr(d, k, getattr(d, k)[0]) # pop out of list
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def __len__(self):
|
def print(self):
|
||||||
return self.n # override len(results)
|
LOGGER.info(self.__str__())
|
||||||
|
|
||||||
def __str__(self):
|
def __len__(self): # override len(results)
|
||||||
self.print() # override print(results)
|
return self.n
|
||||||
return ''
|
|
||||||
|
def __str__(self): # override print(results)
|
||||||
|
return self._run(pprint=True) # print results
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
|
||||||
|
|
||||||
|
|
||||||
|
class Proto(nn.Module):
|
||||||
|
# YOLOv5 mask Proto module for segmentation models
|
||||||
|
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
|
||||||
|
super().__init__()
|
||||||
|
self.cv1 = Conv(c1, c_, k=3)
|
||||||
|
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
|
||||||
|
self.cv2 = Conv(c_, c_, k=3)
|
||||||
|
self.cv3 = Conv(c_, c2)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
|
||||||
|
|
||||||
|
|
||||||
class Classify(nn.Module):
|
class Classify(nn.Module):
|
||||||
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
# YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
# ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
c_ = 1280 # efficientnet_b0 size
|
c_ = 1280 # efficientnet_b0 size
|
||||||
self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
|
self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
|
||||||
|
49
models/hub/yolov5s-LeakyReLU.yaml
Normal file
49
models/hub/yolov5s-LeakyReLU.yaml
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.50 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||||
|
]
|
48
models/segment/yolov5l-seg.yaml
Normal file
48
models/segment/yolov5l-seg.yaml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.0 # model depth multiple
|
||||||
|
width_multiple: 1.0 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
||||||
|
]
|
48
models/segment/yolov5m-seg.yaml
Normal file
48
models/segment/yolov5m-seg.yaml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.67 # model depth multiple
|
||||||
|
width_multiple: 0.75 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
||||||
|
]
|
48
models/segment/yolov5n-seg.yaml
Normal file
48
models/segment/yolov5n-seg.yaml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.25 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
||||||
|
]
|
48
models/segment/yolov5s-seg.yaml
Normal file
48
models/segment/yolov5s-seg.yaml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 0.33 # model depth multiple
|
||||||
|
width_multiple: 0.5 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
||||||
|
]
|
48
models/segment/yolov5x-seg.yaml
Normal file
48
models/segment/yolov5x-seg.yaml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 80 # number of classes
|
||||||
|
depth_multiple: 1.33 # model depth multiple
|
||||||
|
width_multiple: 1.25 # layer channel multiple
|
||||||
|
anchors:
|
||||||
|
- [10,13, 16,30, 33,23] # P3/8
|
||||||
|
- [30,61, 62,45, 59,119] # P4/16
|
||||||
|
- [116,90, 156,198, 373,326] # P5/32
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, number, module, args]
|
||||||
|
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
||||||
|
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||||
|
[-1, 3, C3, [128]],
|
||||||
|
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||||
|
[-1, 6, C3, [256]],
|
||||||
|
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||||
|
[-1, 9, C3, [512]],
|
||||||
|
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||||
|
[-1, 3, C3, [1024]],
|
||||||
|
[-1, 1, SPPF, [1024, 5]], # 9
|
||||||
|
]
|
||||||
|
|
||||||
|
# YOLOv5 v6.0 head
|
||||||
|
head:
|
||||||
|
[[-1, 1, Conv, [512, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||||
|
[-1, 3, C3, [512, False]], # 13
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 1, 1]],
|
||||||
|
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||||
|
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||||
|
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [256, 3, 2]],
|
||||||
|
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||||
|
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
||||||
|
|
||||||
|
[-1, 1, Conv, [512, 3, 2]],
|
||||||
|
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||||
|
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
||||||
|
|
||||||
|
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
||||||
|
]
|
53
models/tf.py
53
models/tf.py
@ -30,7 +30,7 @@ from tensorflow import keras
|
|||||||
from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
|
from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
|
||||||
DWConvTranspose2d, Focus, autopad)
|
DWConvTranspose2d, Focus, autopad)
|
||||||
from models.experimental import MixConv2d, attempt_load
|
from models.experimental import MixConv2d, attempt_load
|
||||||
from models.yolo import Detect
|
from models.yolo import Detect, Segment
|
||||||
from utils.activations import SiLU
|
from utils.activations import SiLU
|
||||||
from utils.general import LOGGER, make_divisible, print_args
|
from utils.general import LOGGER, make_divisible, print_args
|
||||||
|
|
||||||
@ -299,18 +299,18 @@ class TFDetect(keras.layers.Layer):
|
|||||||
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
|
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
|
||||||
|
|
||||||
if not self.training: # inference
|
if not self.training: # inference
|
||||||
y = tf.sigmoid(x[i])
|
y = x[i]
|
||||||
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
|
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
|
||||||
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
|
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
|
||||||
xy = (y[..., 0:2] * 2 + grid) * self.stride[i] # xy
|
xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
|
||||||
wh = y[..., 2:4] ** 2 * anchor_grid
|
wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
|
||||||
# Normalize xywh to 0-1 to reduce calibration error
|
# Normalize xywh to 0-1 to reduce calibration error
|
||||||
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
||||||
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
||||||
y = tf.concat([xy, wh, y[..., 4:]], -1)
|
y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
|
||||||
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
|
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
|
||||||
|
|
||||||
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x)
|
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _make_grid(nx=20, ny=20):
|
def _make_grid(nx=20, ny=20):
|
||||||
@ -320,6 +320,37 @@ class TFDetect(keras.layers.Layer):
|
|||||||
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
|
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
|
||||||
|
|
||||||
|
|
||||||
|
class TFSegment(TFDetect):
|
||||||
|
# YOLOv5 Segment head for segmentation models
|
||||||
|
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
|
||||||
|
super().__init__(nc, anchors, ch, imgsz, w)
|
||||||
|
self.nm = nm # number of masks
|
||||||
|
self.npr = npr # number of protos
|
||||||
|
self.no = 5 + nc + self.nm # number of outputs per anchor
|
||||||
|
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
|
||||||
|
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
|
||||||
|
self.detect = TFDetect.call
|
||||||
|
|
||||||
|
def call(self, x):
|
||||||
|
p = self.proto(x[0])
|
||||||
|
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
|
||||||
|
x = self.detect(self, x)
|
||||||
|
return (x, p) if self.training else (x[0], p)
|
||||||
|
|
||||||
|
|
||||||
|
class TFProto(keras.layers.Layer):
|
||||||
|
|
||||||
|
def __init__(self, c1, c_=256, c2=32, w=None):
|
||||||
|
super().__init__()
|
||||||
|
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
|
||||||
|
self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
|
||||||
|
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
|
||||||
|
self.cv3 = TFConv(c_, c2, w=w.cv3)
|
||||||
|
|
||||||
|
def call(self, inputs):
|
||||||
|
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
|
||||||
|
|
||||||
|
|
||||||
class TFUpsample(keras.layers.Layer):
|
class TFUpsample(keras.layers.Layer):
|
||||||
# TF version of torch.nn.Upsample()
|
# TF version of torch.nn.Upsample()
|
||||||
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
|
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
|
||||||
@ -377,10 +408,12 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
|
|||||||
args = [ch[f]]
|
args = [ch[f]]
|
||||||
elif m is Concat:
|
elif m is Concat:
|
||||||
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
|
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
|
||||||
elif m is Detect:
|
elif m in [Detect, Segment]:
|
||||||
args.append([ch[x + 1] for x in f])
|
args.append([ch[x + 1] for x in f])
|
||||||
if isinstance(args[1], int): # number of anchors
|
if isinstance(args[1], int): # number of anchors
|
||||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||||
|
if m is Segment:
|
||||||
|
args[3] = make_divisible(args[3] * gw, 8)
|
||||||
args.append(imgsz)
|
args.append(imgsz)
|
||||||
else:
|
else:
|
||||||
c2 = ch[f]
|
c2 = ch[f]
|
||||||
@ -452,9 +485,9 @@ class TFModel:
|
|||||||
iou_thres,
|
iou_thres,
|
||||||
conf_thres,
|
conf_thres,
|
||||||
clip_boxes=False)
|
clip_boxes=False)
|
||||||
return nms, x[1]
|
return (nms,)
|
||||||
return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
|
return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
|
||||||
# x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
|
# x = x[0] # [x(1,6300,85), ...] to x(6300,85)
|
||||||
# xywh = x[..., :4] # x(6300,4) boxes
|
# xywh = x[..., :4] # x(6300,4) boxes
|
||||||
# conf = x[..., 4:5] # x(6300,1) confidences
|
# conf = x[..., 4:5] # x(6300,1) confidences
|
||||||
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
|
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
|
||||||
|
@ -36,6 +36,7 @@ except ImportError:
|
|||||||
|
|
||||||
|
|
||||||
class Detect(nn.Module):
|
class Detect(nn.Module):
|
||||||
|
# YOLOv5 Detect head for detection models
|
||||||
stride = None # strides computed during build
|
stride = None # strides computed during build
|
||||||
dynamic = False # force grid reconstruction
|
dynamic = False # force grid reconstruction
|
||||||
export = False # export mode
|
export = False # export mode
|
||||||
@ -46,8 +47,8 @@ class Detect(nn.Module):
|
|||||||
self.no = nc + 5 # number of outputs per anchor
|
self.no = nc + 5 # number of outputs per anchor
|
||||||
self.nl = len(anchors) # number of detection layers
|
self.nl = len(anchors) # number of detection layers
|
||||||
self.na = len(anchors[0]) // 2 # number of anchors
|
self.na = len(anchors[0]) // 2 # number of anchors
|
||||||
self.grid = [torch.empty(1)] * self.nl # init grid
|
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
|
||||||
self.anchor_grid = [torch.empty(1)] * self.nl # init anchor grid
|
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
|
||||||
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
|
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
|
||||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||||
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
||||||
@ -63,16 +64,17 @@ class Detect(nn.Module):
|
|||||||
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||||
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
|
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
|
||||||
|
|
||||||
y = x[i].sigmoid()
|
if isinstance(self, Segment): # (boxes + masks)
|
||||||
if self.inplace:
|
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
|
||||||
y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy
|
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
|
||||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
|
||||||
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
|
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
|
||||||
xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0
|
else: # Detect (boxes only)
|
||||||
|
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
|
||||||
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
|
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
|
||||||
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
|
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
|
||||||
y = torch.cat((xy, wh, conf), 4)
|
y = torch.cat((xy, wh, conf), 4)
|
||||||
z.append(y.view(bs, -1, self.no))
|
z.append(y.view(bs, self.na * nx * ny, self.no))
|
||||||
|
|
||||||
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
||||||
|
|
||||||
@ -87,6 +89,23 @@ class Detect(nn.Module):
|
|||||||
return grid, anchor_grid
|
return grid, anchor_grid
|
||||||
|
|
||||||
|
|
||||||
|
class Segment(Detect):
|
||||||
|
# YOLOv5 Segment head for segmentation models
|
||||||
|
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
|
||||||
|
super().__init__(nc, anchors, ch, inplace)
|
||||||
|
self.nm = nm # number of masks
|
||||||
|
self.npr = npr # number of protos
|
||||||
|
self.no = 5 + nc + self.nm # number of outputs per anchor
|
||||||
|
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||||
|
self.proto = Proto(ch[0], self.npr, self.nm) # protos
|
||||||
|
self.detect = Detect.forward
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
p = self.proto(x[0])
|
||||||
|
x = self.detect(self, x)
|
||||||
|
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
|
||||||
|
|
||||||
|
|
||||||
class BaseModel(nn.Module):
|
class BaseModel(nn.Module):
|
||||||
# YOLOv5 base model
|
# YOLOv5 base model
|
||||||
def forward(self, x, profile=False, visualize=False):
|
def forward(self, x, profile=False, visualize=False):
|
||||||
@ -135,7 +154,7 @@ class BaseModel(nn.Module):
|
|||||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
||||||
self = super()._apply(fn)
|
self = super()._apply(fn)
|
||||||
m = self.model[-1] # Detect()
|
m = self.model[-1] # Detect()
|
||||||
if isinstance(m, Detect):
|
if isinstance(m, (Detect, Segment)):
|
||||||
m.stride = fn(m.stride)
|
m.stride = fn(m.stride)
|
||||||
m.grid = list(map(fn, m.grid))
|
m.grid = list(map(fn, m.grid))
|
||||||
if isinstance(m.anchor_grid, list):
|
if isinstance(m.anchor_grid, list):
|
||||||
@ -169,11 +188,12 @@ class DetectionModel(BaseModel):
|
|||||||
|
|
||||||
# Build strides, anchors
|
# Build strides, anchors
|
||||||
m = self.model[-1] # Detect()
|
m = self.model[-1] # Detect()
|
||||||
if isinstance(m, Detect):
|
if isinstance(m, (Detect, Segment)):
|
||||||
s = 256 # 2x min stride
|
s = 256 # 2x min stride
|
||||||
m.inplace = self.inplace
|
m.inplace = self.inplace
|
||||||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))]) # forward
|
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
|
||||||
check_anchor_order(m) # must be in pixel-space (not grid-space)
|
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
||||||
|
check_anchor_order(m)
|
||||||
m.anchors /= m.stride.view(-1, 1, 1)
|
m.anchors /= m.stride.view(-1, 1, 1)
|
||||||
self.stride = m.stride
|
self.stride = m.stride
|
||||||
self._initialize_biases() # only run once
|
self._initialize_biases() # only run once
|
||||||
@ -235,15 +255,21 @@ class DetectionModel(BaseModel):
|
|||||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||||
m = self.model[-1] # Detect() module
|
m = self.model[-1] # Detect() module
|
||||||
for mi, s in zip(m.m, m.stride): # from
|
for mi, s in zip(m.m, m.stride): # from
|
||||||
b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85)
|
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||||
b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||||
b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
|
b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||||
|
|
||||||
|
|
||||||
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
|
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
|
||||||
|
|
||||||
|
|
||||||
|
class SegmentationModel(DetectionModel):
|
||||||
|
# YOLOv5 segmentation model
|
||||||
|
def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
|
||||||
|
super().__init__(cfg, ch, nc, anchors)
|
||||||
|
|
||||||
|
|
||||||
class ClassificationModel(BaseModel):
|
class ClassificationModel(BaseModel):
|
||||||
# YOLOv5 classification model
|
# YOLOv5 classification model
|
||||||
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
|
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
|
||||||
@ -271,8 +297,12 @@ class ClassificationModel(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
def parse_model(d, ch): # model_dict, input_channels(3)
|
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||||
|
# Parse a YOLOv5 model.yaml dictionary
|
||||||
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
|
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
|
||||||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
|
||||||
|
if act:
|
||||||
|
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
|
||||||
|
LOGGER.info(f"{colorstr('activation:')} {act}") # print
|
||||||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||||
|
|
||||||
@ -284,24 +314,28 @@ def parse_model(d, ch): # model_dict, input_channels(3)
|
|||||||
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||||
|
|
||||||
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||||
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
|
if m in {
|
||||||
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
|
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
|
||||||
|
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
|
||||||
c1, c2 = ch[f], args[0]
|
c1, c2 = ch[f], args[0]
|
||||||
if c2 != no: # if not output
|
if c2 != no: # if not output
|
||||||
c2 = make_divisible(c2 * gw, 8)
|
c2 = make_divisible(c2 * gw, 8)
|
||||||
|
|
||||||
args = [c1, c2, *args[1:]]
|
args = [c1, c2, *args[1:]]
|
||||||
if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
|
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
|
||||||
args.insert(2, n) # number of repeats
|
args.insert(2, n) # number of repeats
|
||||||
n = 1
|
n = 1
|
||||||
elif m is nn.BatchNorm2d:
|
elif m is nn.BatchNorm2d:
|
||||||
args = [ch[f]]
|
args = [ch[f]]
|
||||||
elif m is Concat:
|
elif m is Concat:
|
||||||
c2 = sum(ch[x] for x in f)
|
c2 = sum(ch[x] for x in f)
|
||||||
elif m is Detect:
|
# TODO: channel, gw, gd
|
||||||
|
elif m in {Detect, Segment}:
|
||||||
args.append([ch[x] for x in f])
|
args.append([ch[x] for x in f])
|
||||||
if isinstance(args[1], int): # number of anchors
|
if isinstance(args[1], int): # number of anchors
|
||||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||||
|
if m is Segment:
|
||||||
|
args[3] = make_divisible(args[3] * gw, 8)
|
||||||
elif m is Contract:
|
elif m is Contract:
|
||||||
c2 = ch[f] * args[0] ** 2
|
c2 = ch[f] * args[0] ** 2
|
||||||
elif m is Expand:
|
elif m is Expand:
|
||||||
|
@ -4,9 +4,15 @@ utils/initialization
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import platform
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
|
||||||
|
def emojis(str=''):
|
||||||
|
# Return platform-dependent emoji-safe version of string
|
||||||
|
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
|
||||||
|
|
||||||
|
|
||||||
class TryExcept(contextlib.ContextDecorator):
|
class TryExcept(contextlib.ContextDecorator):
|
||||||
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
|
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
|
||||||
def __init__(self, msg=''):
|
def __init__(self, msg=''):
|
||||||
@ -17,7 +23,7 @@ class TryExcept(contextlib.ContextDecorator):
|
|||||||
|
|
||||||
def __exit__(self, exc_type, value, traceback):
|
def __exit__(self, exc_type, value, traceback):
|
||||||
if value:
|
if value:
|
||||||
print(f'{self.msg}{value}')
|
print(emojis(f'{self.msg}{value}'))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -38,7 +44,7 @@ def notebook_init(verbose=True):
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from utils.general import check_font, check_requirements, emojis, is_colab
|
from utils.general import check_font, check_requirements, is_colab
|
||||||
from utils.torch_utils import select_device # imports
|
from utils.torch_utils import select_device # imports
|
||||||
|
|
||||||
check_requirements(('psutil', 'IPython'))
|
check_requirements(('psutil', 'IPython'))
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -12,7 +12,7 @@ import torch
|
|||||||
import torchvision.transforms as T
|
import torchvision.transforms as T
|
||||||
import torchvision.transforms.functional as TF
|
import torchvision.transforms.functional as TF
|
||||||
|
|
||||||
from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box
|
from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
|
||||||
from utils.metrics import bbox_ioa
|
from utils.metrics import bbox_ioa
|
||||||
|
|
||||||
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
|
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
|
||||||
@ -21,7 +21,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
|
|||||||
|
|
||||||
class Albumentations:
|
class Albumentations:
|
||||||
# YOLOv5 Albumentations class (optional, only used if package is installed)
|
# YOLOv5 Albumentations class (optional, only used if package is installed)
|
||||||
def __init__(self):
|
def __init__(self, size=640):
|
||||||
self.transform = None
|
self.transform = None
|
||||||
prefix = colorstr('albumentations: ')
|
prefix = colorstr('albumentations: ')
|
||||||
try:
|
try:
|
||||||
@ -29,6 +29,7 @@ class Albumentations:
|
|||||||
check_version(A.__version__, '1.0.3', hard=True) # version requirement
|
check_version(A.__version__, '1.0.3', hard=True) # version requirement
|
||||||
|
|
||||||
T = [
|
T = [
|
||||||
|
A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
|
||||||
A.Blur(p=0.01),
|
A.Blur(p=0.01),
|
||||||
A.MedianBlur(p=0.01),
|
A.MedianBlur(p=0.01),
|
||||||
A.ToGray(p=0.01),
|
A.ToGray(p=0.01),
|
||||||
@ -281,7 +282,7 @@ def cutout(im, labels, p=0.5):
|
|||||||
# return unobscured labels
|
# return unobscured labels
|
||||||
if len(labels) and s > 0.03:
|
if len(labels) and s > 0.03:
|
||||||
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
||||||
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h)) # intersection over area
|
||||||
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
||||||
|
|
||||||
return labels
|
return labels
|
||||||
@ -303,9 +304,11 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
|
|||||||
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
|
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
|
||||||
|
|
||||||
|
|
||||||
def classify_albumentations(augment=True,
|
def classify_albumentations(
|
||||||
|
augment=True,
|
||||||
size=224,
|
size=224,
|
||||||
scale=(0.08, 1.0),
|
scale=(0.08, 1.0),
|
||||||
|
ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
|
||||||
hflip=0.5,
|
hflip=0.5,
|
||||||
vflip=0.0,
|
vflip=0.0,
|
||||||
jitter=0.4,
|
jitter=0.4,
|
||||||
@ -319,7 +322,7 @@ def classify_albumentations(augment=True,
|
|||||||
from albumentations.pytorch import ToTensorV2
|
from albumentations.pytorch import ToTensorV2
|
||||||
check_version(A.__version__, '1.0.3', hard=True) # version requirement
|
check_version(A.__version__, '1.0.3', hard=True) # version requirement
|
||||||
if augment: # Resize and crop
|
if augment: # Resize and crop
|
||||||
T = [A.RandomResizedCrop(height=size, width=size, scale=scale)]
|
T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
|
||||||
if auto_aug:
|
if auto_aug:
|
||||||
# TODO: implement AugMix, AutoAug & RandAug in albumentation
|
# TODO: implement AugMix, AutoAug & RandAug in albumentation
|
||||||
LOGGER.info(f'{prefix}auto augmentations are currently not supported')
|
LOGGER.info(f'{prefix}auto augmentations are currently not supported')
|
||||||
@ -338,7 +341,7 @@ def classify_albumentations(augment=True,
|
|||||||
return A.Compose(T)
|
return A.Compose(T)
|
||||||
|
|
||||||
except ImportError: # package not installed, skip
|
except ImportError: # package not installed, skip
|
||||||
pass
|
LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.info(f'{prefix}{e}')
|
LOGGER.info(f'{prefix}{e}')
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
|
|||||||
# Filter
|
# Filter
|
||||||
i = (wh0 < 3.0).any(1).sum()
|
i = (wh0 < 3.0).any(1).sum()
|
||||||
if i:
|
if i:
|
||||||
LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size')
|
LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
|
||||||
wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
|
wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
|
||||||
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
|
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
|
|||||||
k = kmeans(wh / s, n, iter=30)[0] * s # points
|
k = kmeans(wh / s, n, iter=30)[0] * s # points
|
||||||
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
|
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
|
||||||
except Exception:
|
except Exception:
|
||||||
LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init')
|
LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
|
||||||
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
|
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
|
||||||
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
|
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
|
||||||
k = print_results(k, verbose=False)
|
k = print_results(k, verbose=False)
|
||||||
|
@ -19,7 +19,7 @@ def check_train_batch_size(model, imgsz=640, amp=True):
|
|||||||
|
|
||||||
|
|
||||||
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
|
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
|
||||||
# Automatically estimate best batch size to use `fraction` of available CUDA memory
|
# Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
|
||||||
# Usage:
|
# Usage:
|
||||||
# import torch
|
# import torch
|
||||||
# from utils.autobatch import autobatch
|
# from utils.autobatch import autobatch
|
||||||
@ -33,6 +33,9 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
|
|||||||
if device.type == 'cpu':
|
if device.type == 'cpu':
|
||||||
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
|
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
|
||||||
return batch_size
|
return batch_size
|
||||||
|
if torch.backends.cudnn.benchmark:
|
||||||
|
LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
|
||||||
|
return batch_size
|
||||||
|
|
||||||
# Inspect CUDA memory
|
# Inspect CUDA memory
|
||||||
gb = 1 << 30 # bytes to GiB (1024 ** 3)
|
gb = 1 << 30 # bytes to GiB (1024 ** 3)
|
||||||
@ -62,8 +65,8 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
|
|||||||
b = batch_sizes[max(i - 1, 0)] # select prior safe point
|
b = batch_sizes[max(i - 1, 0)] # select prior safe point
|
||||||
if b < 1 or b > 1024: # b outside of safe range
|
if b < 1 or b > 1024: # b outside of safe range
|
||||||
b = batch_size
|
b = batch_size
|
||||||
LOGGER.warning(f'{prefix}WARNING: ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
|
LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
|
||||||
|
|
||||||
fraction = np.polyval(p, b) / t # actual fraction predicted
|
fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
|
||||||
LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
|
LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
|
||||||
return b
|
return b
|
||||||
|
@ -40,6 +40,7 @@ IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp',
|
|||||||
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
|
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
|
||||||
BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format
|
BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format
|
||||||
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
||||||
|
RANK = int(os.getenv('RANK', -1))
|
||||||
PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
|
PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
|
||||||
|
|
||||||
# Get orientation exif tag
|
# Get orientation exif tag
|
||||||
@ -116,7 +117,7 @@ def create_dataloader(path,
|
|||||||
prefix='',
|
prefix='',
|
||||||
shuffle=False):
|
shuffle=False):
|
||||||
if rect and shuffle:
|
if rect and shuffle:
|
||||||
LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
|
LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
|
||||||
shuffle = False
|
shuffle = False
|
||||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||||
dataset = LoadImagesAndLabels(
|
dataset = LoadImagesAndLabels(
|
||||||
@ -139,7 +140,7 @@ def create_dataloader(path,
|
|||||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||||
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
|
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
|
||||||
generator = torch.Generator()
|
generator = torch.Generator()
|
||||||
generator.manual_seed(0)
|
generator.manual_seed(6148914691236517205 + RANK)
|
||||||
return loader(dataset,
|
return loader(dataset,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=shuffle and sampler is None,
|
shuffle=shuffle and sampler is None,
|
||||||
@ -185,6 +186,55 @@ class _RepeatSampler:
|
|||||||
yield from iter(self.sampler)
|
yield from iter(self.sampler)
|
||||||
|
|
||||||
|
|
||||||
|
class LoadScreenshots:
|
||||||
|
# YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
|
||||||
|
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
|
||||||
|
# source = [screen_number left top width height] (pixels)
|
||||||
|
check_requirements('mss')
|
||||||
|
import mss
|
||||||
|
|
||||||
|
source, *params = source.split()
|
||||||
|
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
|
||||||
|
if len(params) == 1:
|
||||||
|
self.screen = int(params[0])
|
||||||
|
elif len(params) == 4:
|
||||||
|
left, top, width, height = (int(x) for x in params)
|
||||||
|
elif len(params) == 5:
|
||||||
|
self.screen, left, top, width, height = (int(x) for x in params)
|
||||||
|
self.img_size = img_size
|
||||||
|
self.stride = stride
|
||||||
|
self.transforms = transforms
|
||||||
|
self.auto = auto
|
||||||
|
self.mode = 'stream'
|
||||||
|
self.frame = 0
|
||||||
|
self.sct = mss.mss()
|
||||||
|
|
||||||
|
# Parse monitor shape
|
||||||
|
monitor = self.sct.monitors[self.screen]
|
||||||
|
self.top = monitor["top"] if top is None else (monitor["top"] + top)
|
||||||
|
self.left = monitor["left"] if left is None else (monitor["left"] + left)
|
||||||
|
self.width = width or monitor["width"]
|
||||||
|
self.height = height or monitor["height"]
|
||||||
|
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
# mss screen capture: get raw pixels from the screen as np array
|
||||||
|
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
|
||||||
|
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
||||||
|
|
||||||
|
if self.transforms:
|
||||||
|
im = self.transforms(im0) # transforms
|
||||||
|
else:
|
||||||
|
im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
|
||||||
|
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||||
|
im = np.ascontiguousarray(im) # contiguous
|
||||||
|
self.frame += 1
|
||||||
|
return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s
|
||||||
|
|
||||||
|
|
||||||
class LoadImages:
|
class LoadImages:
|
||||||
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
|
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
|
||||||
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
||||||
@ -232,8 +282,9 @@ class LoadImages:
|
|||||||
if self.video_flag[self.count]:
|
if self.video_flag[self.count]:
|
||||||
# Read video
|
# Read video
|
||||||
self.mode = 'video'
|
self.mode = 'video'
|
||||||
ret_val, im0 = self.cap.read()
|
for _ in range(self.vid_stride):
|
||||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1)) # read at vid_stride
|
self.cap.grab()
|
||||||
|
ret_val, im0 = self.cap.retrieve()
|
||||||
while not ret_val:
|
while not ret_val:
|
||||||
self.count += 1
|
self.count += 1
|
||||||
self.cap.release()
|
self.cap.release()
|
||||||
@ -328,7 +379,7 @@ class LoadStreams:
|
|||||||
self.auto = auto and self.rect
|
self.auto = auto and self.rect
|
||||||
self.transforms = transforms # optional
|
self.transforms = transforms # optional
|
||||||
if not self.rect:
|
if not self.rect:
|
||||||
LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
|
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
|
||||||
|
|
||||||
def update(self, i, cap, stream):
|
def update(self, i, cap, stream):
|
||||||
# Read stream `i` frames in daemon thread
|
# Read stream `i` frames in daemon thread
|
||||||
@ -341,7 +392,7 @@ class LoadStreams:
|
|||||||
if success:
|
if success:
|
||||||
self.imgs[i] = im
|
self.imgs[i] = im
|
||||||
else:
|
else:
|
||||||
LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
|
LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
|
||||||
self.imgs[i] = np.zeros_like(self.imgs[i])
|
self.imgs[i] = np.zeros_like(self.imgs[i])
|
||||||
cap.open(stream) # re-open stream if signal was lost
|
cap.open(stream) # re-open stream if signal was lost
|
||||||
time.sleep(0.0) # wait time
|
time.sleep(0.0) # wait time
|
||||||
@ -403,7 +454,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
||||||
self.stride = stride
|
self.stride = stride
|
||||||
self.path = path
|
self.path = path
|
||||||
self.albumentations = Albumentations() if augment else None
|
self.albumentations = Albumentations(size=img_size) if augment else None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = [] # image files
|
f = [] # image files
|
||||||
@ -455,7 +506,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
self.im_files = list(cache.keys()) # update
|
self.im_files = list(cache.keys()) # update
|
||||||
self.label_files = img2label_paths(cache.keys()) # update
|
self.label_files = img2label_paths(cache.keys()) # update
|
||||||
n = len(shapes) # number of images
|
n = len(shapes) # number of images
|
||||||
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
|
bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index
|
||||||
nb = bi[-1] + 1 # number of batches
|
nb = bi[-1] + 1 # number of batches
|
||||||
self.batch = bi # batch index of image
|
self.batch = bi # batch index of image
|
||||||
self.n = n
|
self.n = n
|
||||||
@ -484,6 +535,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
self.im_files = [self.im_files[i] for i in irect]
|
self.im_files = [self.im_files[i] for i in irect]
|
||||||
self.label_files = [self.label_files[i] for i in irect]
|
self.label_files = [self.label_files[i] for i in irect]
|
||||||
self.labels = [self.labels[i] for i in irect]
|
self.labels = [self.labels[i] for i in irect]
|
||||||
|
self.segments = [self.segments[i] for i in irect]
|
||||||
self.shapes = s[irect] # wh
|
self.shapes = s[irect] # wh
|
||||||
ar = ar[irect]
|
ar = ar[irect]
|
||||||
|
|
||||||
@ -497,7 +549,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
elif mini > 1:
|
elif mini > 1:
|
||||||
shapes[i] = [1, 1 / mini]
|
shapes[i] = [1, 1 / mini]
|
||||||
|
|
||||||
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
|
||||||
|
|
||||||
# Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
|
# Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
|
||||||
self.ims = [None] * n
|
self.ims = [None] * n
|
||||||
@ -542,7 +594,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
if msgs:
|
if msgs:
|
||||||
LOGGER.info('\n'.join(msgs))
|
LOGGER.info('\n'.join(msgs))
|
||||||
if nf == 0:
|
if nf == 0:
|
||||||
LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}')
|
LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
|
||||||
x['hash'] = get_hash(self.label_files + self.im_files)
|
x['hash'] = get_hash(self.label_files + self.im_files)
|
||||||
x['results'] = nf, nm, ne, nc, len(self.im_files)
|
x['results'] = nf, nm, ne, nc, len(self.im_files)
|
||||||
x['msgs'] = msgs # warnings
|
x['msgs'] = msgs # warnings
|
||||||
@ -552,7 +604,7 @@ class LoadImagesAndLabels(Dataset):
|
|||||||
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
|
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
|
||||||
LOGGER.info(f'{prefix}New cache created: {path}')
|
LOGGER.info(f'{prefix}New cache created: {path}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable
|
LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
@ -867,7 +919,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
|
|||||||
b = x[1:] * [w, h, w, h] # box
|
b = x[1:] * [w, h, w, h] # box
|
||||||
# b[2:] = b[2:].max() # rectangle to square
|
# b[2:] = b[2:].max() # rectangle to square
|
||||||
b[2:] = b[2:] * 1.2 + 3 # pad
|
b[2:] = b[2:] * 1.2 + 3 # pad
|
||||||
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
|
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
|
||||||
|
|
||||||
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
|
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
|
||||||
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
|
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
|
||||||
@ -916,7 +968,7 @@ def verify_image_label(args):
|
|||||||
f.seek(-2, 2)
|
f.seek(-2, 2)
|
||||||
if f.read() != b'\xff\xd9': # corrupt JPEG
|
if f.read() != b'\xff\xd9': # corrupt JPEG
|
||||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
|
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
|
||||||
msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
|
msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
|
||||||
|
|
||||||
# verify labels
|
# verify labels
|
||||||
if os.path.isfile(lb_file):
|
if os.path.isfile(lb_file):
|
||||||
@ -938,7 +990,7 @@ def verify_image_label(args):
|
|||||||
lb = lb[i] # remove duplicates
|
lb = lb[i] # remove duplicates
|
||||||
if segments:
|
if segments:
|
||||||
segments = [segments[x] for x in i]
|
segments = [segments[x] for x in i]
|
||||||
msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'
|
msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
|
||||||
else:
|
else:
|
||||||
ne = 1 # label empty
|
ne = 1 # label empty
|
||||||
lb = np.zeros((0, 5), dtype=np.float32)
|
lb = np.zeros((0, 5), dtype=np.float32)
|
||||||
@ -948,7 +1000,7 @@ def verify_image_label(args):
|
|||||||
return im_file, lb, shape, segments, nm, nf, ne, nc, msg
|
return im_file, lb, shape, segments, nm, nf, ne, nc, msg
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
nc = 1
|
nc = 1
|
||||||
msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
|
msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
|
||||||
return [None, None, None, None, nm, nf, ne, nc, msg]
|
return [None, None, None, None, nm, nf, ne, nc, msg]
|
||||||
|
|
||||||
|
|
||||||
@ -1011,7 +1063,7 @@ class HUBDatasetStats():
|
|||||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||||
im.save(f_new, 'JPEG', quality=50, optimize=True) # save
|
im.save(f_new, 'JPEG', quality=50, optimize=True) # save
|
||||||
except Exception as e: # use OpenCV
|
except Exception as e: # use OpenCV
|
||||||
print(f'WARNING: HUB ops PIL failure {f}: {e}')
|
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
|
||||||
im = cv2.imread(f)
|
im = cv2.imread(f)
|
||||||
im_height, im_width = im.shape[:2]
|
im_height, im_width = im.shape[:2]
|
||||||
r = max_dim / max(im_height, im_width) # ratio
|
r = max_dim / max(im_height, im_width) # ratio
|
||||||
@ -1118,7 +1170,7 @@ def create_classification_dataloader(path,
|
|||||||
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
|
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
|
||||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||||
generator = torch.Generator()
|
generator = torch.Generator()
|
||||||
generator.manual_seed(0)
|
generator.manual_seed(6148914691236517205 + RANK)
|
||||||
return InfiniteDataLoader(dataset,
|
return InfiniteDataLoader(dataset,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=shuffle and sampler is None,
|
shuffle=shuffle and sampler is None,
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
|
# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
|
||||||
|
|
||||||
# Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
|
# Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
|
||||||
FROM nvcr.io/nvidia/pytorch:22.07-py3
|
FROM nvcr.io/nvidia/pytorch:22.08-py3
|
||||||
RUN rm -rf /opt/pytorch # remove 1.2GB dir
|
RUN rm -rf /opt/pytorch # remove 1.2GB dir
|
||||||
|
|
||||||
# Downloads to user config dir
|
# Downloads to user config dir
|
||||||
|
@ -16,13 +16,13 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def is_url(url, check_online=True):
|
def is_url(url, check=True):
|
||||||
# Check if online file exists
|
# Check if string is URL and check if URL exists
|
||||||
try:
|
try:
|
||||||
url = str(url)
|
url = str(url)
|
||||||
result = urllib.parse.urlparse(url)
|
result = urllib.parse.urlparse(url)
|
||||||
assert all([result.scheme, result.netloc, result.path]) # check if is url
|
assert all([result.scheme, result.netloc, result.path]) # check if is url
|
||||||
return (urllib.request.urlopen(url).getcode() == 200) if check_online else True # check if exists online
|
return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
|
||||||
except (AssertionError, urllib.request.HTTPError):
|
except (AssertionError, urllib.request.HTTPError):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -87,9 +87,7 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
|
|||||||
return file
|
return file
|
||||||
|
|
||||||
# GitHub assets
|
# GitHub assets
|
||||||
assets = [
|
assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
|
||||||
'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt',
|
|
||||||
'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
|
|
||||||
try:
|
try:
|
||||||
tag, assets = github_assets(repo, release)
|
tag, assets = github_assets(repo, release)
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -107,7 +105,6 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
|
|||||||
safe_download(
|
safe_download(
|
||||||
file,
|
file,
|
||||||
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
|
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
|
||||||
url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}', # backup url (optional)
|
|
||||||
min_bytes=1E5,
|
min_bytes=1E5,
|
||||||
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
|
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
|
||||||
|
|
||||||
|
173
utils/general.py
173
utils/general.py
@ -17,6 +17,7 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
|
from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
@ -33,7 +34,7 @@ import torch
|
|||||||
import torchvision
|
import torchvision
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from utils import TryExcept
|
from utils import TryExcept, emojis
|
||||||
from utils.downloads import gsutil_getsize
|
from utils.downloads import gsutil_getsize
|
||||||
from utils.metrics import box_iou, fitness
|
from utils.metrics import box_iou, fitness
|
||||||
|
|
||||||
@ -42,8 +43,8 @@ ROOT = FILE.parents[1] # YOLOv5 root directory
|
|||||||
RANK = int(os.getenv('RANK', -1))
|
RANK = int(os.getenv('RANK', -1))
|
||||||
|
|
||||||
# Settings
|
# Settings
|
||||||
DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
|
|
||||||
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
|
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
|
||||||
|
DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory
|
||||||
AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
|
AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
|
||||||
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
|
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
|
||||||
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
|
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
|
||||||
@ -222,7 +223,7 @@ def init_seeds(seed=0, deterministic=False):
|
|||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
torch.cuda.manual_seed(seed)
|
torch.cuda.manual_seed(seed)
|
||||||
torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
|
torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
|
||||||
torch.backends.cudnn.benchmark = True # for faster training
|
# torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
|
||||||
if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
|
if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
|
||||||
torch.use_deterministic_algorithms(True)
|
torch.use_deterministic_algorithms(True)
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
||||||
@ -247,11 +248,6 @@ def get_latest_run(search_dir='.'):
|
|||||||
return max(last_list, key=os.path.getctime) if last_list else ''
|
return max(last_list, key=os.path.getctime) if last_list else ''
|
||||||
|
|
||||||
|
|
||||||
def emojis(str=''):
|
|
||||||
# Return platform-dependent emoji-safe version of string
|
|
||||||
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
|
|
||||||
|
|
||||||
|
|
||||||
def file_age(path=__file__):
|
def file_age(path=__file__):
|
||||||
# Return days since last file update
|
# Return days since last file update
|
||||||
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
|
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
|
||||||
@ -332,7 +328,7 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
|
|||||||
# Check version vs. required version
|
# Check version vs. required version
|
||||||
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
|
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
|
||||||
result = (current == minimum) if pinned else (current >= minimum) # bool
|
result = (current == minimum) if pinned else (current >= minimum) # bool
|
||||||
s = f'WARNING: ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed' # string
|
s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed' # string
|
||||||
if hard:
|
if hard:
|
||||||
assert result, emojis(s) # assert min requirements met
|
assert result, emojis(s) # assert min requirements met
|
||||||
if verbose and not result:
|
if verbose and not result:
|
||||||
@ -341,40 +337,38 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
|
|||||||
|
|
||||||
|
|
||||||
@TryExcept()
|
@TryExcept()
|
||||||
def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()):
|
def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
|
||||||
# Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages)
|
# Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
|
||||||
prefix = colorstr('red', 'bold', 'requirements:')
|
prefix = colorstr('red', 'bold', 'requirements:')
|
||||||
check_python() # check python version
|
check_python() # check python version
|
||||||
if isinstance(requirements, (str, Path)): # requirements.txt file
|
if isinstance(requirements, Path): # requirements.txt file
|
||||||
file = Path(requirements)
|
file = requirements.resolve()
|
||||||
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
|
assert file.exists(), f"{prefix} {file} not found, check failed."
|
||||||
with file.open() as f:
|
with file.open() as f:
|
||||||
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
|
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
|
||||||
else: # list or tuple of packages
|
elif isinstance(requirements, str):
|
||||||
requirements = [x for x in requirements if x not in exclude]
|
requirements = [requirements]
|
||||||
|
|
||||||
n = 0 # number of packages updates
|
s = ''
|
||||||
for i, r in enumerate(requirements):
|
n = 0
|
||||||
|
for r in requirements:
|
||||||
try:
|
try:
|
||||||
pkg.require(r)
|
pkg.require(r)
|
||||||
except Exception: # DistributionNotFound or VersionConflict if requirements not met
|
except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met
|
||||||
s = f"{prefix} {r} not found and is required by YOLOv5"
|
s += f'"{r}" '
|
||||||
if install and AUTOINSTALL: # check environment variable
|
|
||||||
LOGGER.info(f"{s}, attempting auto-update...")
|
|
||||||
try:
|
|
||||||
assert check_online(), f"'pip install {r}' skipped (offline)"
|
|
||||||
LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
|
|
||||||
n += 1
|
n += 1
|
||||||
except Exception as e:
|
|
||||||
LOGGER.warning(f'{prefix} {e}')
|
|
||||||
else:
|
|
||||||
LOGGER.info(f'{s}. Please install and rerun your command.')
|
|
||||||
|
|
||||||
if n: # if packages updated
|
if s and install and AUTOINSTALL: # check environment variable
|
||||||
source = file.resolve() if 'file' in locals() else requirements
|
LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
|
||||||
|
try:
|
||||||
|
assert check_online(), "AutoUpdate skipped (offline)"
|
||||||
|
LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
|
||||||
|
source = file if 'file' in locals() else requirements
|
||||||
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
|
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
|
||||||
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
|
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
|
||||||
LOGGER.info(s)
|
LOGGER.info(s)
|
||||||
|
except Exception as e:
|
||||||
|
LOGGER.warning(f'{prefix} ❌ {e}')
|
||||||
|
|
||||||
|
|
||||||
def check_img_size(imgsz, s=32, floor=0):
|
def check_img_size(imgsz, s=32, floor=0):
|
||||||
@ -385,7 +379,7 @@ def check_img_size(imgsz, s=32, floor=0):
|
|||||||
imgsz = list(imgsz) # convert to list if tuple
|
imgsz = list(imgsz) # convert to list if tuple
|
||||||
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
|
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
|
||||||
if new_size != imgsz:
|
if new_size != imgsz:
|
||||||
LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
|
LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
|
||||||
return new_size
|
return new_size
|
||||||
|
|
||||||
|
|
||||||
@ -400,7 +394,7 @@ def check_imshow():
|
|||||||
cv2.waitKey(1)
|
cv2.waitKey(1)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
|
LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -470,8 +464,7 @@ def check_dataset(data, autodownload=True):
|
|||||||
|
|
||||||
# Read yaml (optional)
|
# Read yaml (optional)
|
||||||
if isinstance(data, (str, Path)):
|
if isinstance(data, (str, Path)):
|
||||||
with open(data, errors='ignore') as f:
|
data = yaml_load(data) # dictionary
|
||||||
data = yaml.safe_load(f) # dictionary
|
|
||||||
|
|
||||||
# Checks
|
# Checks
|
||||||
for k in 'train', 'val', 'names':
|
for k in 'train', 'val', 'names':
|
||||||
@ -486,7 +479,13 @@ def check_dataset(data, autodownload=True):
|
|||||||
path = (ROOT / path).resolve()
|
path = (ROOT / path).resolve()
|
||||||
for k in 'train', 'val', 'test':
|
for k in 'train', 'val', 'test':
|
||||||
if data.get(k): # prepend path
|
if data.get(k): # prepend path
|
||||||
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
|
if isinstance(data[k], str):
|
||||||
|
x = (path / data[k]).resolve()
|
||||||
|
if not x.exists() and data[k].startswith('../'):
|
||||||
|
x = (path / data[k][3:]).resolve()
|
||||||
|
data[k] = str(x)
|
||||||
|
else:
|
||||||
|
data[k] = [str((path / x).resolve()) for x in data[k]]
|
||||||
|
|
||||||
# Parse yaml
|
# Parse yaml
|
||||||
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
||||||
@ -497,13 +496,12 @@ def check_dataset(data, autodownload=True):
|
|||||||
if not s or not autodownload:
|
if not s or not autodownload:
|
||||||
raise Exception('Dataset not found ❌')
|
raise Exception('Dataset not found ❌')
|
||||||
t = time.time()
|
t = time.time()
|
||||||
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
|
||||||
if s.startswith('http') and s.endswith('.zip'): # URL
|
if s.startswith('http') and s.endswith('.zip'): # URL
|
||||||
f = Path(s).name # filename
|
f = Path(s).name # filename
|
||||||
LOGGER.info(f'Downloading {s} to {f}...')
|
LOGGER.info(f'Downloading {s} to {f}...')
|
||||||
torch.hub.download_url_to_file(s, f)
|
torch.hub.download_url_to_file(s, f)
|
||||||
Path(root).mkdir(parents=True, exist_ok=True) # create root
|
Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
|
||||||
ZipFile(f).extractall(path=root) # unzip
|
ZipFile(f).extractall(path=DATASETS_DIR) # unzip
|
||||||
Path(f).unlink() # remove zip
|
Path(f).unlink() # remove zip
|
||||||
r = None # success
|
r = None # success
|
||||||
elif s.startswith('bash '): # bash script
|
elif s.startswith('bash '): # bash script
|
||||||
@ -512,7 +510,7 @@ def check_dataset(data, autodownload=True):
|
|||||||
else: # python script
|
else: # python script
|
||||||
r = exec(s, {'yaml': data}) # return None
|
r = exec(s, {'yaml': data}) # return None
|
||||||
dt = f'({round(time.time() - t, 1)}s)'
|
dt = f'({round(time.time() - t, 1)}s)'
|
||||||
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
|
s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
|
||||||
LOGGER.info(f"Dataset download {s}")
|
LOGGER.info(f"Dataset download {s}")
|
||||||
check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
|
check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
|
||||||
return data # dictionary
|
return data # dictionary
|
||||||
@ -537,7 +535,7 @@ def check_amp(model):
|
|||||||
f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
|
f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
|
||||||
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
|
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
|
||||||
try:
|
try:
|
||||||
assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
|
assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
|
||||||
LOGGER.info(f'{prefix}checks passed ✅')
|
LOGGER.info(f'{prefix}checks passed ✅')
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -569,10 +567,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
|
|||||||
def download_one(url, dir):
|
def download_one(url, dir):
|
||||||
# Download 1 file
|
# Download 1 file
|
||||||
success = True
|
success = True
|
||||||
f = dir / Path(url).name # filename
|
if Path(url).is_file():
|
||||||
if Path(url).is_file(): # exists in current path
|
f = Path(url) # filename
|
||||||
Path(url).rename(f) # move to dir
|
else: # does not exist
|
||||||
elif not f.exists():
|
f = dir / Path(url).name
|
||||||
LOGGER.info(f'Downloading {url} to {f}...')
|
LOGGER.info(f'Downloading {url} to {f}...')
|
||||||
for i in range(retry + 1):
|
for i in range(retry + 1):
|
||||||
if curl:
|
if curl:
|
||||||
@ -586,9 +584,9 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
|
|||||||
if success:
|
if success:
|
||||||
break
|
break
|
||||||
elif i < retry:
|
elif i < retry:
|
||||||
LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...')
|
LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
|
||||||
else:
|
else:
|
||||||
LOGGER.warning(f'Failed to download {url}...')
|
LOGGER.warning(f'❌ Failed to download {url}...')
|
||||||
|
|
||||||
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
|
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
|
||||||
LOGGER.info(f'Unzipping {f}...')
|
LOGGER.info(f'Unzipping {f}...')
|
||||||
@ -727,7 +725,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
|||||||
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
||||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
||||||
if clip:
|
if clip:
|
||||||
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
|
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
|
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
|
||||||
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
|
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
|
||||||
@ -771,7 +769,23 @@ def resample_segments(segments, n=1000):
|
|||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
|
||||||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
||||||
|
# Rescale boxes (xyxy) from img1_shape to img0_shape
|
||||||
|
if ratio_pad is None: # calculate from img0_shape
|
||||||
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||||
|
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||||
|
else:
|
||||||
|
gain = ratio_pad[0][0]
|
||||||
|
pad = ratio_pad[1]
|
||||||
|
|
||||||
|
boxes[:, [0, 2]] -= pad[0] # x padding
|
||||||
|
boxes[:, [1, 3]] -= pad[1] # y padding
|
||||||
|
boxes[:, :4] /= gain
|
||||||
|
clip_boxes(boxes, img0_shape)
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
|
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None):
|
||||||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
# Rescale coords (xyxy) from img1_shape to img0_shape
|
||||||
if ratio_pad is None: # calculate from img0_shape
|
if ratio_pad is None: # calculate from img0_shape
|
||||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||||
@ -780,15 +794,15 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
|||||||
gain = ratio_pad[0][0]
|
gain = ratio_pad[0][0]
|
||||||
pad = ratio_pad[1]
|
pad = ratio_pad[1]
|
||||||
|
|
||||||
coords[:, [0, 2]] -= pad[0] # x padding
|
segments[:, 0] -= pad[0] # x padding
|
||||||
coords[:, [1, 3]] -= pad[1] # y padding
|
segments[:, 1] -= pad[1] # y padding
|
||||||
coords[:, :4] /= gain
|
segments /= gain
|
||||||
clip_coords(coords, img0_shape)
|
clip_segments(segments, img0_shape)
|
||||||
return coords
|
return segments
|
||||||
|
|
||||||
|
|
||||||
def clip_coords(boxes, shape):
|
def clip_boxes(boxes, shape):
|
||||||
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
# Clip boxes (xyxy) to image shape (height, width)
|
||||||
if isinstance(boxes, torch.Tensor): # faster individually
|
if isinstance(boxes, torch.Tensor): # faster individually
|
||||||
boxes[:, 0].clamp_(0, shape[1]) # x1
|
boxes[:, 0].clamp_(0, shape[1]) # x1
|
||||||
boxes[:, 1].clamp_(0, shape[0]) # y1
|
boxes[:, 1].clamp_(0, shape[0]) # y1
|
||||||
@ -799,15 +813,28 @@ def clip_coords(boxes, shape):
|
|||||||
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
||||||
|
|
||||||
|
|
||||||
def non_max_suppression(prediction,
|
def clip_segments(boxes, shape):
|
||||||
|
# Clip segments (xy1,xy2,...) to image shape (height, width)
|
||||||
|
if isinstance(boxes, torch.Tensor): # faster individually
|
||||||
|
boxes[:, 0].clamp_(0, shape[1]) # x
|
||||||
|
boxes[:, 1].clamp_(0, shape[0]) # y
|
||||||
|
else: # np.array (faster grouped)
|
||||||
|
boxes[:, 0] = boxes[:, 0].clip(0, shape[1]) # x
|
||||||
|
boxes[:, 1] = boxes[:, 1].clip(0, shape[0]) # y
|
||||||
|
|
||||||
|
|
||||||
|
def non_max_suppression(
|
||||||
|
prediction,
|
||||||
conf_thres=0.25,
|
conf_thres=0.25,
|
||||||
iou_thres=0.45,
|
iou_thres=0.45,
|
||||||
classes=None,
|
classes=None,
|
||||||
agnostic=False,
|
agnostic=False,
|
||||||
multi_label=False,
|
multi_label=False,
|
||||||
labels=(),
|
labels=(),
|
||||||
max_det=300):
|
max_det=300,
|
||||||
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
|
nm=0, # number of masks
|
||||||
|
):
|
||||||
|
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
||||||
@ -817,7 +844,7 @@ def non_max_suppression(prediction,
|
|||||||
prediction = prediction[0] # select only inference output
|
prediction = prediction[0] # select only inference output
|
||||||
|
|
||||||
bs = prediction.shape[0] # batch size
|
bs = prediction.shape[0] # batch size
|
||||||
nc = prediction.shape[2] - 5 # number of classes
|
nc = prediction.shape[2] - nm - 5 # number of classes
|
||||||
xc = prediction[..., 4] > conf_thres # candidates
|
xc = prediction[..., 4] > conf_thres # candidates
|
||||||
|
|
||||||
# Checks
|
# Checks
|
||||||
@ -828,13 +855,14 @@ def non_max_suppression(prediction,
|
|||||||
# min_wh = 2 # (pixels) minimum box width and height
|
# min_wh = 2 # (pixels) minimum box width and height
|
||||||
max_wh = 7680 # (pixels) maximum box width and height
|
max_wh = 7680 # (pixels) maximum box width and height
|
||||||
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
||||||
time_limit = 0.3 + 0.03 * bs # seconds to quit after
|
time_limit = 0.5 + 0.05 * bs # seconds to quit after
|
||||||
redundant = True # require redundant detections
|
redundant = True # require redundant detections
|
||||||
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
||||||
merge = False # use merge-NMS
|
merge = False # use merge-NMS
|
||||||
|
|
||||||
t = time.time()
|
t = time.time()
|
||||||
output = [torch.zeros((0, 6), device=prediction.device)] * bs
|
mi = 5 + nc # mask start index
|
||||||
|
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
||||||
for xi, x in enumerate(prediction): # image index, image inference
|
for xi, x in enumerate(prediction): # image index, image inference
|
||||||
# Apply constraints
|
# Apply constraints
|
||||||
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
||||||
@ -843,7 +871,7 @@ def non_max_suppression(prediction,
|
|||||||
# Cat apriori labels if autolabelling
|
# Cat apriori labels if autolabelling
|
||||||
if labels and len(labels[xi]):
|
if labels and len(labels[xi]):
|
||||||
lb = labels[xi]
|
lb = labels[xi]
|
||||||
v = torch.zeros((len(lb), nc + 5), device=x.device)
|
v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
|
||||||
v[:, :4] = lb[:, 1:5] # box
|
v[:, :4] = lb[:, 1:5] # box
|
||||||
v[:, 4] = 1.0 # conf
|
v[:, 4] = 1.0 # conf
|
||||||
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
||||||
@ -856,16 +884,17 @@ def non_max_suppression(prediction,
|
|||||||
# Compute conf
|
# Compute conf
|
||||||
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
||||||
|
|
||||||
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
# Box/Mask
|
||||||
box = xywh2xyxy(x[:, :4])
|
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
|
||||||
|
mask = x[:, mi:] # zero columns if no masks
|
||||||
|
|
||||||
# Detections matrix nx6 (xyxy, conf, cls)
|
# Detections matrix nx6 (xyxy, conf, cls)
|
||||||
if multi_label:
|
if multi_label:
|
||||||
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
|
||||||
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
|
||||||
else: # best class only
|
else: # best class only
|
||||||
conf, j = x[:, 5:].max(1, keepdim=True)
|
conf, j = x[:, 5:mi].max(1, keepdim=True)
|
||||||
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
|
||||||
|
|
||||||
# Filter by class
|
# Filter by class
|
||||||
if classes is not None:
|
if classes is not None:
|
||||||
@ -881,6 +910,8 @@ def non_max_suppression(prediction,
|
|||||||
continue
|
continue
|
||||||
elif n > max_nms: # excess boxes
|
elif n > max_nms: # excess boxes
|
||||||
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
||||||
|
else:
|
||||||
|
x = x[x[:, 4].argsort(descending=True)] # sort by confidence
|
||||||
|
|
||||||
# Batched NMS
|
# Batched NMS
|
||||||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
||||||
@ -898,7 +929,7 @@ def non_max_suppression(prediction,
|
|||||||
|
|
||||||
output[xi] = x[i]
|
output[xi] = x[i]
|
||||||
if (time.time() - t) > time_limit:
|
if (time.time() - t) > time_limit:
|
||||||
LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
|
LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
||||||
break # time limit exceeded
|
break # time limit exceeded
|
||||||
|
|
||||||
return output
|
return output
|
||||||
@ -975,7 +1006,7 @@ def apply_classifier(x, model, img, im0):
|
|||||||
d[:, :4] = xywh2xyxy(b).long()
|
d[:, :4] = xywh2xyxy(b).long()
|
||||||
|
|
||||||
# Rescale boxes from img_size to im0 size
|
# Rescale boxes from img_size to im0 size
|
||||||
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
|
scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
|
||||||
|
|
||||||
# Classes
|
# Classes
|
||||||
pred_cls1 = d[:, 5].long()
|
pred_cls1 = d[:, 5].long()
|
||||||
|
@ -11,13 +11,13 @@ import pkg_resources as pkg
|
|||||||
import torch
|
import torch
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
|
||||||
from utils.general import colorstr, cv2
|
from utils.general import LOGGER, colorstr, cv2
|
||||||
from utils.loggers.clearml.clearml_utils import ClearmlLogger
|
from utils.loggers.clearml.clearml_utils import ClearmlLogger
|
||||||
from utils.loggers.wandb.wandb_utils import WandbLogger
|
from utils.loggers.wandb.wandb_utils import WandbLogger
|
||||||
from utils.plots import plot_images, plot_labels, plot_results
|
from utils.plots import plot_images, plot_labels, plot_results
|
||||||
from utils.torch_utils import de_parallel
|
from utils.torch_utils import de_parallel
|
||||||
|
|
||||||
LOGGERS = ('csv', 'tb', 'wandb', 'clearml') # *.csv, TensorBoard, Weights & Biases, ClearML
|
LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML
|
||||||
RANK = int(os.getenv('RANK', -1))
|
RANK = int(os.getenv('RANK', -1))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -41,6 +41,18 @@ try:
|
|||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
clearml = None
|
clearml = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
if RANK not in [0, -1]:
|
||||||
|
comet_ml = None
|
||||||
|
else:
|
||||||
|
import comet_ml
|
||||||
|
|
||||||
|
assert hasattr(comet_ml, '__version__') # verify package import not local dir
|
||||||
|
from utils.loggers.comet import CometLogger
|
||||||
|
|
||||||
|
except (ModuleNotFoundError, ImportError, AssertionError):
|
||||||
|
comet_ml = None
|
||||||
|
|
||||||
|
|
||||||
class Loggers():
|
class Loggers():
|
||||||
# YOLOv5 Loggers class
|
# YOLOv5 Loggers class
|
||||||
@ -80,7 +92,10 @@ class Loggers():
|
|||||||
prefix = colorstr('ClearML: ')
|
prefix = colorstr('ClearML: ')
|
||||||
s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
|
s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
|
||||||
self.logger.info(s)
|
self.logger.info(s)
|
||||||
|
if not comet_ml:
|
||||||
|
prefix = colorstr('Comet: ')
|
||||||
|
s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
|
||||||
|
self.logger.info(s)
|
||||||
# TensorBoard
|
# TensorBoard
|
||||||
s = self.save_dir
|
s = self.save_dir
|
||||||
if 'tb' in self.include and not self.opt.evolve:
|
if 'tb' in self.include and not self.opt.evolve:
|
||||||
@ -107,6 +122,18 @@ class Loggers():
|
|||||||
else:
|
else:
|
||||||
self.clearml = None
|
self.clearml = None
|
||||||
|
|
||||||
|
# Comet
|
||||||
|
if comet_ml and 'comet' in self.include:
|
||||||
|
if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
|
||||||
|
run_id = self.opt.resume.split("/")[-1]
|
||||||
|
self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.comet_logger = CometLogger(self.opt, self.hyp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.comet_logger = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def remote_dataset(self):
|
def remote_dataset(self):
|
||||||
# Get data_dict if custom dataset artifact link is provided
|
# Get data_dict if custom dataset artifact link is provided
|
||||||
@ -115,12 +142,18 @@ class Loggers():
|
|||||||
data_dict = self.clearml.data_dict
|
data_dict = self.clearml.data_dict
|
||||||
if self.wandb:
|
if self.wandb:
|
||||||
data_dict = self.wandb.data_dict
|
data_dict = self.wandb.data_dict
|
||||||
|
if self.comet_logger:
|
||||||
|
data_dict = self.comet_logger.data_dict
|
||||||
|
|
||||||
return data_dict
|
return data_dict
|
||||||
|
|
||||||
def on_train_start(self):
|
def on_train_start(self):
|
||||||
# Callback runs on train start
|
if self.comet_logger:
|
||||||
pass
|
self.comet_logger.on_train_start()
|
||||||
|
|
||||||
|
def on_pretrain_routine_start(self):
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_pretrain_routine_start()
|
||||||
|
|
||||||
def on_pretrain_routine_end(self, labels, names):
|
def on_pretrain_routine_end(self, labels, names):
|
||||||
# Callback runs on pre-train routine end
|
# Callback runs on pre-train routine end
|
||||||
@ -131,8 +164,11 @@ class Loggers():
|
|||||||
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
|
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
|
||||||
# if self.clearml:
|
# if self.clearml:
|
||||||
# pass # ClearML saves these images automatically using hooks
|
# pass # ClearML saves these images automatically using hooks
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_pretrain_routine_end(paths)
|
||||||
|
|
||||||
def on_train_batch_end(self, model, ni, imgs, targets, paths):
|
def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
|
||||||
|
log_dict = dict(zip(self.keys[0:3], vals))
|
||||||
# Callback runs on train batch end
|
# Callback runs on train batch end
|
||||||
# ni: number integrated batches (since train start)
|
# ni: number integrated batches (since train start)
|
||||||
if self.plots:
|
if self.plots:
|
||||||
@ -148,11 +184,21 @@ class Loggers():
|
|||||||
if self.clearml:
|
if self.clearml:
|
||||||
self.clearml.log_debug_samples(files, title='Mosaics')
|
self.clearml.log_debug_samples(files, title='Mosaics')
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_train_batch_end(log_dict, step=ni)
|
||||||
|
|
||||||
def on_train_epoch_end(self, epoch):
|
def on_train_epoch_end(self, epoch):
|
||||||
# Callback runs on train epoch end
|
# Callback runs on train epoch end
|
||||||
if self.wandb:
|
if self.wandb:
|
||||||
self.wandb.current_epoch = epoch + 1
|
self.wandb.current_epoch = epoch + 1
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_train_epoch_end(epoch)
|
||||||
|
|
||||||
|
def on_val_start(self):
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_val_start()
|
||||||
|
|
||||||
def on_val_image_end(self, pred, predn, path, names, im):
|
def on_val_image_end(self, pred, predn, path, names, im):
|
||||||
# Callback runs on val image end
|
# Callback runs on val image end
|
||||||
if self.wandb:
|
if self.wandb:
|
||||||
@ -160,7 +206,11 @@ class Loggers():
|
|||||||
if self.clearml:
|
if self.clearml:
|
||||||
self.clearml.log_image_with_boxes(path, pred, names, im)
|
self.clearml.log_image_with_boxes(path, pred, names, im)
|
||||||
|
|
||||||
def on_val_end(self):
|
def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
|
||||||
|
|
||||||
|
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
|
||||||
# Callback runs on val end
|
# Callback runs on val end
|
||||||
if self.wandb or self.clearml:
|
if self.wandb or self.clearml:
|
||||||
files = sorted(self.save_dir.glob('val*.jpg'))
|
files = sorted(self.save_dir.glob('val*.jpg'))
|
||||||
@ -169,6 +219,9 @@ class Loggers():
|
|||||||
if self.clearml:
|
if self.clearml:
|
||||||
self.clearml.log_debug_samples(files, title='Validation')
|
self.clearml.log_debug_samples(files, title='Validation')
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
|
||||||
|
|
||||||
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
|
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
|
||||||
# Callback runs at the end of each fit (train+val) epoch
|
# Callback runs at the end of each fit (train+val) epoch
|
||||||
x = dict(zip(self.keys, vals))
|
x = dict(zip(self.keys, vals))
|
||||||
@ -199,6 +252,9 @@ class Loggers():
|
|||||||
self.clearml.current_epoch_logged_images = set() # reset epoch image limit
|
self.clearml.current_epoch_logged_images = set() # reset epoch image limit
|
||||||
self.clearml.current_epoch += 1
|
self.clearml.current_epoch += 1
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
|
||||||
|
|
||||||
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
||||||
# Callback runs on model save event
|
# Callback runs on model save event
|
||||||
if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
|
if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
|
||||||
@ -209,6 +265,9 @@ class Loggers():
|
|||||||
model_name='Latest Model',
|
model_name='Latest Model',
|
||||||
auto_delete_file=False)
|
auto_delete_file=False)
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
|
||||||
|
|
||||||
def on_train_end(self, last, best, epoch, results):
|
def on_train_end(self, last, best, epoch, results):
|
||||||
# Callback runs on training end, i.e. saving best model
|
# Callback runs on training end, i.e. saving best model
|
||||||
if self.plots:
|
if self.plots:
|
||||||
@ -237,10 +296,16 @@ class Loggers():
|
|||||||
name='Best Model',
|
name='Best Model',
|
||||||
auto_delete_file=False)
|
auto_delete_file=False)
|
||||||
|
|
||||||
|
if self.comet_logger:
|
||||||
|
final_results = dict(zip(self.keys[3:10], results))
|
||||||
|
self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
|
||||||
|
|
||||||
def on_params_update(self, params: dict):
|
def on_params_update(self, params: dict):
|
||||||
# Update hyperparams or configs of the experiment
|
# Update hyperparams or configs of the experiment
|
||||||
if self.wandb:
|
if self.wandb:
|
||||||
self.wandb.wandb_run.config.update(params, allow_val_change=True)
|
self.wandb.wandb_run.config.update(params, allow_val_change=True)
|
||||||
|
if self.comet_logger:
|
||||||
|
self.comet_logger.on_params_update(params)
|
||||||
|
|
||||||
|
|
||||||
class GenericLogger:
|
class GenericLogger:
|
||||||
@ -328,7 +393,7 @@ def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
|
|||||||
warnings.simplefilter('ignore') # suppress jit trace warning
|
warnings.simplefilter('ignore') # suppress jit trace warning
|
||||||
tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
|
tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'WARNING: TensorBoard graph visualization failure {e}')
|
LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
|
||||||
|
|
||||||
|
|
||||||
def web_project_name(project):
|
def web_project_name(project):
|
||||||
|
@ -11,6 +11,7 @@ from utils.plots import Annotator, colors
|
|||||||
try:
|
try:
|
||||||
import clearml
|
import clearml
|
||||||
from clearml import Dataset, Task
|
from clearml import Dataset, Task
|
||||||
|
|
||||||
assert hasattr(clearml, '__version__') # verify package import not local dir
|
assert hasattr(clearml, '__version__') # verify package import not local dir
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
clearml = None
|
clearml = None
|
||||||
|
256
utils/loggers/comet/README.md
Normal file
256
utils/loggers/comet/README.md
Normal file
@ -0,0 +1,256 @@
|
|||||||
|
<img src="https://cdn.comet.ml/img/notebook_logo.png">
|
||||||
|
|
||||||
|
# YOLOv5 with Comet
|
||||||
|
|
||||||
|
This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet)
|
||||||
|
|
||||||
|
# About Comet
|
||||||
|
|
||||||
|
Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
|
||||||
|
|
||||||
|
Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)!
|
||||||
|
Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
|
||||||
|
|
||||||
|
# Getting Started
|
||||||
|
|
||||||
|
## Install Comet
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install comet_ml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configure Comet Credentials
|
||||||
|
|
||||||
|
There are two ways to configure Comet with YOLOv5.
|
||||||
|
|
||||||
|
You can either set your credentials through enviroment variables
|
||||||
|
|
||||||
|
**Environment Variables**
|
||||||
|
|
||||||
|
```shell
|
||||||
|
export COMET_API_KEY=<Your Comet API Key>
|
||||||
|
export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
|
||||||
|
```
|
||||||
|
|
||||||
|
Or create a `.comet.config` file in your working directory and set your credentials there.
|
||||||
|
|
||||||
|
**Comet Configuration File**
|
||||||
|
|
||||||
|
```
|
||||||
|
[comet]
|
||||||
|
api_key=<Your Comet API Key>
|
||||||
|
project_name=<Your Comet Project Name> # This will default to 'yolov5'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run the Training Script
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Train YOLOv5s on COCO128 for 5 epochs
|
||||||
|
python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
|
||||||
|
|
||||||
|
<img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/7529846/187608607-ff89c3d5-1b8b-4743-a974-9275301b0524.png">
|
||||||
|
|
||||||
|
# Try out an Example!
|
||||||
|
Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
|
||||||
|
|
||||||
|
Or better yet, try it out yourself in this Colab Notebook
|
||||||
|
|
||||||
|
[](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
|
||||||
|
|
||||||
|
# Log automatically
|
||||||
|
|
||||||
|
By default, Comet will log the following items
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
- Box Loss, Object Loss, Classification Loss for the training and validation data
|
||||||
|
- mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
|
||||||
|
- Precision and Recall for the validation data
|
||||||
|
|
||||||
|
## Parameters
|
||||||
|
|
||||||
|
- Model Hyperparameters
|
||||||
|
- All parameters passed through the command line options
|
||||||
|
|
||||||
|
## Visualizations
|
||||||
|
|
||||||
|
- Confusion Matrix of the model predictions on the validation data
|
||||||
|
- Plots for the PR and F1 curves across all classes
|
||||||
|
- Correlogram of the Class Labels
|
||||||
|
|
||||||
|
# Configure Comet Logging
|
||||||
|
|
||||||
|
Comet can be configured to log additional data either through command line flags passed to the training script
|
||||||
|
or through environment variables.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
|
||||||
|
export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
|
||||||
|
export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
|
||||||
|
export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
|
||||||
|
export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
|
||||||
|
export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
|
||||||
|
export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
|
||||||
|
export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
|
||||||
|
```
|
||||||
|
|
||||||
|
## Logging Checkpoints with Comet
|
||||||
|
|
||||||
|
Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
|
||||||
|
logged checkpoints to Comet based on the interval value provided by `save-period`
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data coco128.yaml \
|
||||||
|
--weights yolov5s.pt \
|
||||||
|
--save-period 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Logging Model Predictions
|
||||||
|
|
||||||
|
By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
|
||||||
|
|
||||||
|
You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
|
||||||
|
|
||||||
|
**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
|
||||||
|
|
||||||
|
Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
|
||||||
|
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data coco128.yaml \
|
||||||
|
--weights yolov5s.pt \
|
||||||
|
--bbox_interval 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Controlling the number of Prediction Images logged to Comet
|
||||||
|
|
||||||
|
When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data coco128.yaml \
|
||||||
|
--weights yolov5s.pt \
|
||||||
|
--bbox_interval 1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logging Class Level Metrics
|
||||||
|
|
||||||
|
Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
env COMET_LOG_PER_CLASS_METRICS=true python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data coco128.yaml \
|
||||||
|
--weights yolov5s.pt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Uploading a Dataset to Comet Artifacts
|
||||||
|
|
||||||
|
If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration), you can do so using the `upload_dataset` flag.
|
||||||
|
|
||||||
|
The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data coco128.yaml \
|
||||||
|
--weights yolov5s.pt \
|
||||||
|
--upload_dataset
|
||||||
|
```
|
||||||
|
|
||||||
|
You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
|
||||||
|
<img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
|
||||||
|
|
||||||
|
You can preview the data directly in the Comet UI.
|
||||||
|
<img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
|
||||||
|
|
||||||
|
Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
|
||||||
|
<img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
|
||||||
|
|
||||||
|
### Using a saved Artifact
|
||||||
|
|
||||||
|
If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
|
||||||
|
|
||||||
|
```
|
||||||
|
# contents of artifact.yaml file
|
||||||
|
path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
|
||||||
|
```
|
||||||
|
Then pass this file to your training script in the following way
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python train.py \
|
||||||
|
--img 640 \
|
||||||
|
--batch 16 \
|
||||||
|
--epochs 5 \
|
||||||
|
--data artifact.yaml \
|
||||||
|
--weights yolov5s.pt
|
||||||
|
```
|
||||||
|
|
||||||
|
Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
|
||||||
|
<img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
|
||||||
|
|
||||||
|
## Resuming a Training Run
|
||||||
|
|
||||||
|
If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
|
||||||
|
|
||||||
|
The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
|
||||||
|
|
||||||
|
This will restore the run to its state before the interruption, which includes restoring the model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python train.py \
|
||||||
|
--resume "comet://<your run path>"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Hyperparameter Search with the Comet Optimizer
|
||||||
|
|
||||||
|
YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
|
||||||
|
|
||||||
|
### Configuring an Optimizer Sweep
|
||||||
|
|
||||||
|
To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python utils/loggers/comet/hpo.py \
|
||||||
|
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
|
||||||
|
```
|
||||||
|
|
||||||
|
The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
|
||||||
|
the script.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python utils/loggers/comet/hpo.py \
|
||||||
|
--comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
|
||||||
|
--save-period 1 \
|
||||||
|
--bbox_interval 1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running a Sweep in Parallel
|
||||||
|
|
||||||
|
```shell
|
||||||
|
comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
|
||||||
|
utils/loggers/comet/optimizer_config.json"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Visualizing Results
|
||||||
|
|
||||||
|
Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
|
||||||
|
|
||||||
|
<img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">
|
501
utils/loggers/comet/__init__.py
Normal file
501
utils/loggers/comet/__init__.py
Normal file
@ -0,0 +1,501 @@
|
|||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FILE = Path(__file__).resolve()
|
||||||
|
ROOT = FILE.parents[3] # YOLOv5 root directory
|
||||||
|
if str(ROOT) not in sys.path:
|
||||||
|
sys.path.append(str(ROOT)) # add ROOT to PATH
|
||||||
|
|
||||||
|
try:
|
||||||
|
import comet_ml
|
||||||
|
|
||||||
|
# Project Configuration
|
||||||
|
config = comet_ml.config.get_config()
|
||||||
|
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
comet_ml = None
|
||||||
|
COMET_PROJECT_NAME = None
|
||||||
|
|
||||||
|
import PIL
|
||||||
|
import torch
|
||||||
|
import torchvision.transforms as T
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from utils.dataloaders import img2label_paths
|
||||||
|
from utils.general import check_dataset, scale_boxes, xywh2xyxy
|
||||||
|
from utils.metrics import box_iou
|
||||||
|
|
||||||
|
COMET_PREFIX = "comet://"
|
||||||
|
|
||||||
|
COMET_MODE = os.getenv("COMET_MODE", "online")
|
||||||
|
|
||||||
|
# Model Saving Settings
|
||||||
|
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
|
||||||
|
|
||||||
|
# Dataset Artifact Settings
|
||||||
|
COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
|
||||||
|
|
||||||
|
# Evaluation Settings
|
||||||
|
COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
|
||||||
|
COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
|
||||||
|
COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100))
|
||||||
|
|
||||||
|
# Confusion Matrix Settings
|
||||||
|
CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
|
||||||
|
IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
|
||||||
|
|
||||||
|
# Batch Logging Settings
|
||||||
|
COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
|
||||||
|
COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1)
|
||||||
|
COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1)
|
||||||
|
COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
|
||||||
|
|
||||||
|
RANK = int(os.getenv("RANK", -1))
|
||||||
|
|
||||||
|
to_pil = T.ToPILImage()
|
||||||
|
|
||||||
|
|
||||||
|
class CometLogger:
|
||||||
|
"""Log metrics, parameters, source code, models and much more
|
||||||
|
with Comet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
|
||||||
|
self.job_type = job_type
|
||||||
|
self.opt = opt
|
||||||
|
self.hyp = hyp
|
||||||
|
|
||||||
|
# Comet Flags
|
||||||
|
self.comet_mode = COMET_MODE
|
||||||
|
|
||||||
|
self.save_model = opt.save_period > -1
|
||||||
|
self.model_name = COMET_MODEL_NAME
|
||||||
|
|
||||||
|
# Batch Logging Settings
|
||||||
|
self.log_batch_metrics = COMET_LOG_BATCH_METRICS
|
||||||
|
self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
|
||||||
|
|
||||||
|
# Dataset Artifact Settings
|
||||||
|
self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
|
||||||
|
self.resume = self.opt.resume
|
||||||
|
|
||||||
|
# Default parameters to pass to Experiment objects
|
||||||
|
self.default_experiment_kwargs = {
|
||||||
|
"log_code": False,
|
||||||
|
"log_env_gpu": True,
|
||||||
|
"log_env_cpu": True,
|
||||||
|
"project_name": COMET_PROJECT_NAME,}
|
||||||
|
self.default_experiment_kwargs.update(experiment_kwargs)
|
||||||
|
self.experiment = self._get_experiment(self.comet_mode, run_id)
|
||||||
|
|
||||||
|
self.data_dict = self.check_dataset(self.opt.data)
|
||||||
|
self.class_names = self.data_dict["names"]
|
||||||
|
self.num_classes = self.data_dict["nc"]
|
||||||
|
|
||||||
|
self.logged_images_count = 0
|
||||||
|
self.max_images = COMET_MAX_IMAGE_UPLOADS
|
||||||
|
|
||||||
|
if run_id is None:
|
||||||
|
self.experiment.log_other("Created from", "YOLOv5")
|
||||||
|
if not isinstance(self.experiment, comet_ml.OfflineExperiment):
|
||||||
|
workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
|
||||||
|
self.experiment.log_other(
|
||||||
|
"Run Path",
|
||||||
|
f"{workspace}/{project_name}/{experiment_id}",
|
||||||
|
)
|
||||||
|
self.log_parameters(vars(opt))
|
||||||
|
self.log_parameters(self.opt.hyp)
|
||||||
|
self.log_asset_data(
|
||||||
|
self.opt.hyp,
|
||||||
|
name="hyperparameters.json",
|
||||||
|
metadata={"type": "hyp-config-file"},
|
||||||
|
)
|
||||||
|
self.log_asset(
|
||||||
|
f"{self.opt.save_dir}/opt.yaml",
|
||||||
|
metadata={"type": "opt-config-file"},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
|
||||||
|
|
||||||
|
if hasattr(self.opt, "conf_thres"):
|
||||||
|
self.conf_thres = self.opt.conf_thres
|
||||||
|
else:
|
||||||
|
self.conf_thres = CONF_THRES
|
||||||
|
if hasattr(self.opt, "iou_thres"):
|
||||||
|
self.iou_thres = self.opt.iou_thres
|
||||||
|
else:
|
||||||
|
self.iou_thres = IOU_THRES
|
||||||
|
|
||||||
|
self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
|
||||||
|
|
||||||
|
self.comet_log_predictions = COMET_LOG_PREDICTIONS
|
||||||
|
if self.opt.bbox_interval == -1:
|
||||||
|
self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
|
||||||
|
else:
|
||||||
|
self.comet_log_prediction_interval = self.opt.bbox_interval
|
||||||
|
|
||||||
|
if self.comet_log_predictions:
|
||||||
|
self.metadata_dict = {}
|
||||||
|
self.logged_image_names = []
|
||||||
|
|
||||||
|
self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
|
||||||
|
|
||||||
|
self.experiment.log_others({
|
||||||
|
"comet_mode": COMET_MODE,
|
||||||
|
"comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
|
||||||
|
"comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
|
||||||
|
"comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
|
||||||
|
"comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
|
||||||
|
"comet_model_name": COMET_MODEL_NAME,})
|
||||||
|
|
||||||
|
# Check if running the Experiment with the Comet Optimizer
|
||||||
|
if hasattr(self.opt, "comet_optimizer_id"):
|
||||||
|
self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
|
||||||
|
self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
|
||||||
|
self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
|
||||||
|
self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
|
||||||
|
|
||||||
|
def _get_experiment(self, mode, experiment_id=None):
|
||||||
|
if mode == "offline":
|
||||||
|
if experiment_id is not None:
|
||||||
|
return comet_ml.ExistingOfflineExperiment(
|
||||||
|
previous_experiment=experiment_id,
|
||||||
|
**self.default_experiment_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
|
||||||
|
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
if experiment_id is not None:
|
||||||
|
return comet_ml.ExistingExperiment(
|
||||||
|
previous_experiment=experiment_id,
|
||||||
|
**self.default_experiment_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return comet_ml.Experiment(**self.default_experiment_kwargs)
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
logger.warning("COMET WARNING: "
|
||||||
|
"Comet credentials have not been set. "
|
||||||
|
"Comet will default to offline logging. "
|
||||||
|
"Please set your credentials to enable online logging.")
|
||||||
|
return self._get_experiment("offline", experiment_id)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def log_metrics(self, log_dict, **kwargs):
|
||||||
|
self.experiment.log_metrics(log_dict, **kwargs)
|
||||||
|
|
||||||
|
def log_parameters(self, log_dict, **kwargs):
|
||||||
|
self.experiment.log_parameters(log_dict, **kwargs)
|
||||||
|
|
||||||
|
def log_asset(self, asset_path, **kwargs):
|
||||||
|
self.experiment.log_asset(asset_path, **kwargs)
|
||||||
|
|
||||||
|
def log_asset_data(self, asset, **kwargs):
|
||||||
|
self.experiment.log_asset_data(asset, **kwargs)
|
||||||
|
|
||||||
|
def log_image(self, img, **kwargs):
|
||||||
|
self.experiment.log_image(img, **kwargs)
|
||||||
|
|
||||||
|
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
|
||||||
|
if not self.save_model:
|
||||||
|
return
|
||||||
|
|
||||||
|
model_metadata = {
|
||||||
|
"fitness_score": fitness_score[-1],
|
||||||
|
"epochs_trained": epoch + 1,
|
||||||
|
"save_period": opt.save_period,
|
||||||
|
"total_epochs": opt.epochs,}
|
||||||
|
|
||||||
|
model_files = glob.glob(f"{path}/*.pt")
|
||||||
|
for model_path in model_files:
|
||||||
|
name = Path(model_path).name
|
||||||
|
|
||||||
|
self.experiment.log_model(
|
||||||
|
self.model_name,
|
||||||
|
file_or_folder=model_path,
|
||||||
|
file_name=name,
|
||||||
|
metadata=model_metadata,
|
||||||
|
overwrite=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def check_dataset(self, data_file):
|
||||||
|
with open(data_file) as f:
|
||||||
|
data_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
if data_config['path'].startswith(COMET_PREFIX):
|
||||||
|
path = data_config['path'].replace(COMET_PREFIX, "")
|
||||||
|
data_dict = self.download_dataset_artifact(path)
|
||||||
|
|
||||||
|
return data_dict
|
||||||
|
|
||||||
|
self.log_asset(self.opt.data, metadata={"type": "data-config-file"})
|
||||||
|
|
||||||
|
return check_dataset(data_file)
|
||||||
|
|
||||||
|
def log_predictions(self, image, labelsn, path, shape, predn):
|
||||||
|
if self.logged_images_count >= self.max_images:
|
||||||
|
return
|
||||||
|
detections = predn[predn[:, 4] > self.conf_thres]
|
||||||
|
iou = box_iou(labelsn[:, 1:], detections[:, :4])
|
||||||
|
mask, _ = torch.where(iou > self.iou_thres)
|
||||||
|
if len(mask) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
filtered_detections = detections[mask]
|
||||||
|
filtered_labels = labelsn[mask]
|
||||||
|
|
||||||
|
image_id = path.split("/")[-1].split(".")[0]
|
||||||
|
image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
|
||||||
|
if image_name not in self.logged_image_names:
|
||||||
|
native_scale_image = PIL.Image.open(path)
|
||||||
|
self.log_image(native_scale_image, name=image_name)
|
||||||
|
self.logged_image_names.append(image_name)
|
||||||
|
|
||||||
|
metadata = []
|
||||||
|
for cls, *xyxy in filtered_labels.tolist():
|
||||||
|
metadata.append({
|
||||||
|
"label": f"{self.class_names[int(cls)]}-gt",
|
||||||
|
"score": 100,
|
||||||
|
"box": {
|
||||||
|
"x": xyxy[0],
|
||||||
|
"y": xyxy[1],
|
||||||
|
"x2": xyxy[2],
|
||||||
|
"y2": xyxy[3]},})
|
||||||
|
for *xyxy, conf, cls in filtered_detections.tolist():
|
||||||
|
metadata.append({
|
||||||
|
"label": f"{self.class_names[int(cls)]}",
|
||||||
|
"score": conf * 100,
|
||||||
|
"box": {
|
||||||
|
"x": xyxy[0],
|
||||||
|
"y": xyxy[1],
|
||||||
|
"x2": xyxy[2],
|
||||||
|
"y2": xyxy[3]},})
|
||||||
|
|
||||||
|
self.metadata_dict[image_name] = metadata
|
||||||
|
self.logged_images_count += 1
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def preprocess_prediction(self, image, labels, shape, pred):
|
||||||
|
nl, _ = labels.shape[0], pred.shape[0]
|
||||||
|
|
||||||
|
# Predictions
|
||||||
|
if self.opt.single_cls:
|
||||||
|
pred[:, 5] = 0
|
||||||
|
|
||||||
|
predn = pred.clone()
|
||||||
|
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
|
||||||
|
|
||||||
|
labelsn = None
|
||||||
|
if nl:
|
||||||
|
tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
|
||||||
|
scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels
|
||||||
|
labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
|
||||||
|
scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred
|
||||||
|
|
||||||
|
return predn, labelsn
|
||||||
|
|
||||||
|
def add_assets_to_artifact(self, artifact, path, asset_path, split):
|
||||||
|
img_paths = sorted(glob.glob(f"{asset_path}/*"))
|
||||||
|
label_paths = img2label_paths(img_paths)
|
||||||
|
|
||||||
|
for image_file, label_file in zip(img_paths, label_paths):
|
||||||
|
image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
|
||||||
|
|
||||||
|
try:
|
||||||
|
artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
|
||||||
|
artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
|
||||||
|
logger.error(f"COMET ERROR: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return artifact
|
||||||
|
|
||||||
|
def upload_dataset_artifact(self):
|
||||||
|
dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
|
||||||
|
path = str((ROOT / Path(self.data_dict["path"])).resolve())
|
||||||
|
|
||||||
|
metadata = self.data_dict.copy()
|
||||||
|
for key in ["train", "val", "test"]:
|
||||||
|
split_path = metadata.get(key)
|
||||||
|
if split_path is not None:
|
||||||
|
metadata[key] = split_path.replace(path, "")
|
||||||
|
|
||||||
|
artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
|
||||||
|
for key in metadata.keys():
|
||||||
|
if key in ["train", "val", "test"]:
|
||||||
|
if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
|
||||||
|
continue
|
||||||
|
|
||||||
|
asset_path = self.data_dict.get(key)
|
||||||
|
if asset_path is not None:
|
||||||
|
artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
|
||||||
|
|
||||||
|
self.experiment.log_artifact(artifact)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def download_dataset_artifact(self, artifact_path):
|
||||||
|
logged_artifact = self.experiment.get_artifact(artifact_path)
|
||||||
|
artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
|
||||||
|
logged_artifact.download(artifact_save_dir)
|
||||||
|
|
||||||
|
metadata = logged_artifact.metadata
|
||||||
|
data_dict = metadata.copy()
|
||||||
|
data_dict["path"] = artifact_save_dir
|
||||||
|
data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
|
||||||
|
|
||||||
|
data_dict = self.update_data_paths(data_dict)
|
||||||
|
return data_dict
|
||||||
|
|
||||||
|
def update_data_paths(self, data_dict):
|
||||||
|
path = data_dict.get("path", "")
|
||||||
|
|
||||||
|
for split in ["train", "val", "test"]:
|
||||||
|
if data_dict.get(split):
|
||||||
|
split_path = data_dict.get(split)
|
||||||
|
data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
|
||||||
|
f"{path}/{x}" for x in split_path])
|
||||||
|
|
||||||
|
return data_dict
|
||||||
|
|
||||||
|
def on_pretrain_routine_end(self, paths):
|
||||||
|
if self.opt.resume:
|
||||||
|
return
|
||||||
|
|
||||||
|
for path in paths:
|
||||||
|
self.log_asset(str(path))
|
||||||
|
|
||||||
|
if self.upload_dataset:
|
||||||
|
if not self.resume:
|
||||||
|
self.upload_dataset_artifact()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_train_start(self):
|
||||||
|
self.log_parameters(self.hyp)
|
||||||
|
|
||||||
|
def on_train_epoch_start(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_train_epoch_end(self, epoch):
|
||||||
|
self.experiment.curr_epoch = epoch
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_train_batch_start(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_train_batch_end(self, log_dict, step):
|
||||||
|
self.experiment.curr_step = step
|
||||||
|
if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
|
||||||
|
self.log_metrics(log_dict, step=step)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_train_end(self, files, save_dir, last, best, epoch, results):
|
||||||
|
if self.comet_log_predictions:
|
||||||
|
curr_epoch = self.experiment.curr_epoch
|
||||||
|
self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
self.log_asset(f, metadata={"epoch": epoch})
|
||||||
|
self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
|
||||||
|
|
||||||
|
if not self.opt.evolve:
|
||||||
|
model_path = str(best if best.exists() else last)
|
||||||
|
name = Path(model_path).name
|
||||||
|
if self.save_model:
|
||||||
|
self.experiment.log_model(
|
||||||
|
self.model_name,
|
||||||
|
file_or_folder=model_path,
|
||||||
|
file_name=name,
|
||||||
|
overwrite=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if running Experiment with Comet Optimizer
|
||||||
|
if hasattr(self.opt, 'comet_optimizer_id'):
|
||||||
|
metric = results.get(self.opt.comet_optimizer_metric)
|
||||||
|
self.experiment.log_other('optimizer_metric_value', metric)
|
||||||
|
|
||||||
|
self.finish_run()
|
||||||
|
|
||||||
|
def on_val_start(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_val_batch_start(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
|
||||||
|
if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
|
||||||
|
return
|
||||||
|
|
||||||
|
for si, pred in enumerate(outputs):
|
||||||
|
if len(pred) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
image = images[si]
|
||||||
|
labels = targets[targets[:, 0] == si, 1:]
|
||||||
|
shape = shapes[si]
|
||||||
|
path = paths[si]
|
||||||
|
predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
|
||||||
|
if labelsn is not None:
|
||||||
|
self.log_predictions(image, labelsn, path, shape, predn)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
|
||||||
|
if self.comet_log_per_class_metrics:
|
||||||
|
if self.num_classes > 1:
|
||||||
|
for i, c in enumerate(ap_class):
|
||||||
|
class_name = self.class_names[c]
|
||||||
|
self.experiment.log_metrics(
|
||||||
|
{
|
||||||
|
'mAP@.5': ap50[i],
|
||||||
|
'mAP@.5:.95': ap[i],
|
||||||
|
'precision': p[i],
|
||||||
|
'recall': r[i],
|
||||||
|
'f1': f1[i],
|
||||||
|
'true_positives': tp[i],
|
||||||
|
'false_positives': fp[i],
|
||||||
|
'support': nt[c]},
|
||||||
|
prefix=class_name)
|
||||||
|
|
||||||
|
if self.comet_log_confusion_matrix:
|
||||||
|
epoch = self.experiment.curr_epoch
|
||||||
|
class_names = list(self.class_names.values())
|
||||||
|
class_names.append("background")
|
||||||
|
num_classes = len(class_names)
|
||||||
|
|
||||||
|
self.experiment.log_confusion_matrix(
|
||||||
|
matrix=confusion_matrix.matrix,
|
||||||
|
max_categories=num_classes,
|
||||||
|
labels=class_names,
|
||||||
|
epoch=epoch,
|
||||||
|
column_label='Actual Category',
|
||||||
|
row_label='Predicted Category',
|
||||||
|
file_name=f"confusion-matrix-epoch-{epoch}.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
def on_fit_epoch_end(self, result, epoch):
|
||||||
|
self.log_metrics(result, epoch=epoch)
|
||||||
|
|
||||||
|
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
||||||
|
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
|
||||||
|
self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
|
||||||
|
|
||||||
|
def on_params_update(self, params):
|
||||||
|
self.log_parameters(params)
|
||||||
|
|
||||||
|
def finish_run(self):
|
||||||
|
self.experiment.end()
|
150
utils/loggers/comet/comet_utils.py
Normal file
150
utils/loggers/comet/comet_utils.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
try:
|
||||||
|
import comet_ml
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
comet_ml = None
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
COMET_PREFIX = "comet://"
|
||||||
|
COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
|
||||||
|
COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
|
||||||
|
|
||||||
|
|
||||||
|
def download_model_checkpoint(opt, experiment):
|
||||||
|
model_dir = f"{opt.project}/{experiment.name}"
|
||||||
|
os.makedirs(model_dir, exist_ok=True)
|
||||||
|
|
||||||
|
model_name = COMET_MODEL_NAME
|
||||||
|
model_asset_list = experiment.get_model_asset_list(model_name)
|
||||||
|
|
||||||
|
if len(model_asset_list) == 0:
|
||||||
|
logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
|
||||||
|
return
|
||||||
|
|
||||||
|
model_asset_list = sorted(
|
||||||
|
model_asset_list,
|
||||||
|
key=lambda x: x["step"],
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
|
||||||
|
|
||||||
|
resource_url = urlparse(opt.weights)
|
||||||
|
checkpoint_filename = resource_url.query
|
||||||
|
|
||||||
|
if checkpoint_filename:
|
||||||
|
asset_id = logged_checkpoint_map.get(checkpoint_filename)
|
||||||
|
else:
|
||||||
|
asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
|
||||||
|
checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
|
||||||
|
|
||||||
|
if asset_id is None:
|
||||||
|
logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
|
||||||
|
asset_filename = checkpoint_filename
|
||||||
|
|
||||||
|
model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
|
||||||
|
model_download_path = f"{model_dir}/{asset_filename}"
|
||||||
|
with open(model_download_path, "wb") as f:
|
||||||
|
f.write(model_binary)
|
||||||
|
|
||||||
|
opt.weights = model_download_path
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
|
||||||
|
logger.exception(e)
|
||||||
|
|
||||||
|
|
||||||
|
def set_opt_parameters(opt, experiment):
|
||||||
|
"""Update the opts Namespace with parameters
|
||||||
|
from Comet's ExistingExperiment when resuming a run
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opt (argparse.Namespace): Namespace of command line options
|
||||||
|
experiment (comet_ml.APIExperiment): Comet API Experiment object
|
||||||
|
"""
|
||||||
|
asset_list = experiment.get_asset_list()
|
||||||
|
resume_string = opt.resume
|
||||||
|
|
||||||
|
for asset in asset_list:
|
||||||
|
if asset["fileName"] == "opt.yaml":
|
||||||
|
asset_id = asset["assetId"]
|
||||||
|
asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
|
||||||
|
opt_dict = yaml.safe_load(asset_binary)
|
||||||
|
for key, value in opt_dict.items():
|
||||||
|
setattr(opt, key, value)
|
||||||
|
opt.resume = resume_string
|
||||||
|
|
||||||
|
# Save hyperparameters to YAML file
|
||||||
|
# Necessary to pass checks in training script
|
||||||
|
save_dir = f"{opt.project}/{experiment.name}"
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
|
||||||
|
hyp_yaml_path = f"{save_dir}/hyp.yaml"
|
||||||
|
with open(hyp_yaml_path, "w") as f:
|
||||||
|
yaml.dump(opt.hyp, f)
|
||||||
|
opt.hyp = hyp_yaml_path
|
||||||
|
|
||||||
|
|
||||||
|
def check_comet_weights(opt):
|
||||||
|
"""Downloads model weights from Comet and updates the
|
||||||
|
weights path to point to saved weights location
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opt (argparse.Namespace): Command Line arguments passed
|
||||||
|
to YOLOv5 training script
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None/bool: Return True if weights are successfully downloaded
|
||||||
|
else return None
|
||||||
|
"""
|
||||||
|
if comet_ml is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(opt.weights, str):
|
||||||
|
if opt.weights.startswith(COMET_PREFIX):
|
||||||
|
api = comet_ml.API()
|
||||||
|
resource = urlparse(opt.weights)
|
||||||
|
experiment_path = f"{resource.netloc}{resource.path}"
|
||||||
|
experiment = api.get(experiment_path)
|
||||||
|
download_model_checkpoint(opt, experiment)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_comet_resume(opt):
|
||||||
|
"""Restores run parameters to its original state based on the model checkpoint
|
||||||
|
and logged Experiment parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opt (argparse.Namespace): Command Line arguments passed
|
||||||
|
to YOLOv5 training script
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None/bool: Return True if the run is restored successfully
|
||||||
|
else return None
|
||||||
|
"""
|
||||||
|
if comet_ml is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(opt.resume, str):
|
||||||
|
if opt.resume.startswith(COMET_PREFIX):
|
||||||
|
api = comet_ml.API()
|
||||||
|
resource = urlparse(opt.resume)
|
||||||
|
experiment_path = f"{resource.netloc}{resource.path}"
|
||||||
|
experiment = api.get(experiment_path)
|
||||||
|
set_opt_parameters(opt, experiment)
|
||||||
|
download_model_checkpoint(opt, experiment)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
return None
|
118
utils/loggers/comet/hpo.py
Normal file
118
utils/loggers/comet/hpo.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import comet_ml
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FILE = Path(__file__).resolve()
|
||||||
|
ROOT = FILE.parents[3] # YOLOv5 root directory
|
||||||
|
if str(ROOT) not in sys.path:
|
||||||
|
sys.path.append(str(ROOT)) # add ROOT to PATH
|
||||||
|
|
||||||
|
from train import train
|
||||||
|
from utils.callbacks import Callbacks
|
||||||
|
from utils.general import increment_path
|
||||||
|
from utils.torch_utils import select_device
|
||||||
|
|
||||||
|
# Project Configuration
|
||||||
|
config = comet_ml.config.get_config()
|
||||||
|
COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
|
||||||
|
|
||||||
|
|
||||||
|
def get_args(known=False):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
|
||||||
|
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
|
||||||
|
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
|
||||||
|
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
|
||||||
|
parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
|
||||||
|
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
|
||||||
|
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
|
||||||
|
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
||||||
|
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
|
||||||
|
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
||||||
|
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
|
||||||
|
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
|
||||||
|
parser.add_argument('--noplots', action='store_true', help='save no plot files')
|
||||||
|
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
|
||||||
|
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
||||||
|
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
|
||||||
|
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
||||||
|
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||||
|
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
||||||
|
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
|
||||||
|
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
|
||||||
|
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
|
||||||
|
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
|
||||||
|
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
|
||||||
|
parser.add_argument('--name', default='exp', help='save to project/name')
|
||||||
|
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||||
|
parser.add_argument('--quad', action='store_true', help='quad dataloader')
|
||||||
|
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
|
||||||
|
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
|
||||||
|
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
|
||||||
|
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
|
||||||
|
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
|
||||||
|
parser.add_argument('--seed', type=int, default=0, help='Global training seed')
|
||||||
|
parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
|
||||||
|
|
||||||
|
# Weights & Biases arguments
|
||||||
|
parser.add_argument('--entity', default=None, help='W&B: Entity')
|
||||||
|
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
|
||||||
|
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
|
||||||
|
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
|
||||||
|
|
||||||
|
# Comet Arguments
|
||||||
|
parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
|
||||||
|
parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
|
||||||
|
parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
|
||||||
|
parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
|
||||||
|
parser.add_argument("--comet_optimizer_workers",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
|
||||||
|
|
||||||
|
return parser.parse_known_args()[0] if known else parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def run(parameters, opt):
|
||||||
|
hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
|
||||||
|
|
||||||
|
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
|
||||||
|
opt.batch_size = parameters.get("batch_size")
|
||||||
|
opt.epochs = parameters.get("epochs")
|
||||||
|
|
||||||
|
device = select_device(opt.device, batch_size=opt.batch_size)
|
||||||
|
train(hyp_dict, opt, device, callbacks=Callbacks())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
opt = get_args(known=True)
|
||||||
|
|
||||||
|
opt.weights = str(opt.weights)
|
||||||
|
opt.cfg = str(opt.cfg)
|
||||||
|
opt.data = str(opt.data)
|
||||||
|
opt.project = str(opt.project)
|
||||||
|
|
||||||
|
optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
|
||||||
|
if optimizer_id is None:
|
||||||
|
with open(opt.comet_optimizer_config) as f:
|
||||||
|
optimizer_config = json.load(f)
|
||||||
|
optimizer = comet_ml.Optimizer(optimizer_config)
|
||||||
|
else:
|
||||||
|
optimizer = comet_ml.Optimizer(optimizer_id)
|
||||||
|
|
||||||
|
opt.comet_optimizer_id = optimizer.id
|
||||||
|
status = optimizer.status()
|
||||||
|
|
||||||
|
opt.comet_optimizer_objective = status["spec"]["objective"]
|
||||||
|
opt.comet_optimizer_metric = status["spec"]["metric"]
|
||||||
|
|
||||||
|
logger.info("COMET INFO: Starting Hyperparameter Sweep")
|
||||||
|
for parameter in optimizer.get_parameters():
|
||||||
|
run(parameter["parameters"], opt)
|
209
utils/loggers/comet/optimizer_config.json
Normal file
209
utils/loggers/comet/optimizer_config.json
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
{
|
||||||
|
"algorithm": "random",
|
||||||
|
"parameters": {
|
||||||
|
"anchor_t": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
2,
|
||||||
|
8
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"batch_size": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
16,
|
||||||
|
32,
|
||||||
|
64
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"box": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.02,
|
||||||
|
0.2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cls": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cls_pw": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.5
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"copy_paste": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"degrees": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0,
|
||||||
|
45
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"epochs": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
5
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"fl_gamma": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"fliplr": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"flipud": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hsv_h": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hsv_s": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hsv_v": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"iou_t": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.7
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lr0": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
1e-05,
|
||||||
|
0.1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lrf": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.01,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mixup": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"momentum": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.6
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mosaic": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obj": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obj_pw": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0.5
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"optimizer": {
|
||||||
|
"type": "categorical",
|
||||||
|
"values": [
|
||||||
|
"SGD",
|
||||||
|
"Adam",
|
||||||
|
"AdamW"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"perspective": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"scale": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"shear": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"translate": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"warmup_bias_lr": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0,
|
||||||
|
0.2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"warmup_epochs": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
5
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"warmup_momentum": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0,
|
||||||
|
0.95
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"weight_decay": {
|
||||||
|
"type": "discrete",
|
||||||
|
"values": [
|
||||||
|
0,
|
||||||
|
0.001
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"maxCombo": 0,
|
||||||
|
"metric": "metrics/mAP_0.5",
|
||||||
|
"objective": "maximize"
|
||||||
|
},
|
||||||
|
"trials": 1
|
||||||
|
}
|
@ -28,7 +28,7 @@ def smooth(y, f=0.05):
|
|||||||
return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
|
return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
|
||||||
|
|
||||||
|
|
||||||
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
|
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
|
||||||
""" Compute the average precision, given the recall and precision curves.
|
""" Compute the average precision, given the recall and precision curves.
|
||||||
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||||
# Arguments
|
# Arguments
|
||||||
@ -83,10 +83,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
|
|||||||
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
|
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
|
||||||
names = dict(enumerate(names)) # to dict
|
names = dict(enumerate(names)) # to dict
|
||||||
if plot:
|
if plot:
|
||||||
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
|
plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
|
||||||
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
|
plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
|
||||||
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
|
plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
|
||||||
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
|
plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
|
||||||
|
|
||||||
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
|
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
|
||||||
p, r, f1 = p[:, i], r[:, i], f1[:, i]
|
p, r, f1 = p[:, i], r[:, i], f1[:, i]
|
||||||
@ -170,12 +170,12 @@ class ConfusionMatrix:
|
|||||||
if n and sum(j) == 1:
|
if n and sum(j) == 1:
|
||||||
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
|
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
|
||||||
else:
|
else:
|
||||||
self.matrix[self.nc, gc] += 1 # background FP
|
self.matrix[self.nc, gc] += 1 # true background
|
||||||
|
|
||||||
if n:
|
if n:
|
||||||
for i, dc in enumerate(detection_classes):
|
for i, dc in enumerate(detection_classes):
|
||||||
if not any(m1 == i):
|
if not any(m1 == i):
|
||||||
self.matrix[dc, self.nc] += 1 # background FN
|
self.matrix[dc, self.nc] += 1 # predicted background
|
||||||
|
|
||||||
def matrix(self):
|
def matrix(self):
|
||||||
return self.matrix
|
return self.matrix
|
||||||
@ -186,7 +186,7 @@ class ConfusionMatrix:
|
|||||||
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
|
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
|
||||||
return tp[:-1], fp[:-1] # remove background class
|
return tp[:-1], fp[:-1] # remove background class
|
||||||
|
|
||||||
@TryExcept('WARNING: ConfusionMatrix plot failure: ')
|
@TryExcept('WARNING ⚠️ ConfusionMatrix plot failure: ')
|
||||||
def plot(self, normalize=True, save_dir='', names=()):
|
def plot(self, normalize=True, save_dir='', names=()):
|
||||||
import seaborn as sn
|
import seaborn as sn
|
||||||
|
|
||||||
@ -197,6 +197,7 @@ class ConfusionMatrix:
|
|||||||
nc, nn = self.nc, len(names) # number of classes, names
|
nc, nn = self.nc, len(names) # number of classes, names
|
||||||
sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
|
sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
|
||||||
labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
|
labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
|
||||||
|
ticklabels = (names + ['background']) if labels else "auto"
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
|
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
|
||||||
sn.heatmap(array,
|
sn.heatmap(array,
|
||||||
@ -208,8 +209,8 @@ class ConfusionMatrix:
|
|||||||
fmt='.2f',
|
fmt='.2f',
|
||||||
square=True,
|
square=True,
|
||||||
vmin=0.0,
|
vmin=0.0,
|
||||||
xticklabels=names + ['background FP'] if labels else "auto",
|
xticklabels=ticklabels,
|
||||||
yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
|
yticklabels=ticklabels).set_facecolor((1, 1, 1))
|
||||||
ax.set_ylabel('True')
|
ax.set_ylabel('True')
|
||||||
ax.set_ylabel('Predicted')
|
ax.set_ylabel('Predicted')
|
||||||
ax.set_title('Confusion Matrix')
|
ax.set_title('Confusion Matrix')
|
||||||
|
@ -20,9 +20,10 @@ import torch
|
|||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
from utils import TryExcept, threaded
|
from utils import TryExcept, threaded
|
||||||
from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path,
|
from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
|
||||||
is_ascii, xywh2xyxy, xyxy2xywh)
|
is_ascii, xywh2xyxy, xyxy2xywh)
|
||||||
from utils.metrics import fitness
|
from utils.metrics import fitness
|
||||||
|
from utils.segment.general import scale_image
|
||||||
|
|
||||||
# Settings
|
# Settings
|
||||||
RANK = int(os.getenv('RANK', -1))
|
RANK = int(os.getenv('RANK', -1))
|
||||||
@ -113,6 +114,52 @@ class Annotator:
|
|||||||
thickness=tf,
|
thickness=tf,
|
||||||
lineType=cv2.LINE_AA)
|
lineType=cv2.LINE_AA)
|
||||||
|
|
||||||
|
def masks(self, masks, colors, im_gpu=None, alpha=0.5):
|
||||||
|
"""Plot masks at once.
|
||||||
|
Args:
|
||||||
|
masks (tensor): predicted masks on cuda, shape: [n, h, w]
|
||||||
|
colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
|
||||||
|
im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
|
||||||
|
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
|
||||||
|
"""
|
||||||
|
if self.pil:
|
||||||
|
# convert to numpy first
|
||||||
|
self.im = np.asarray(self.im).copy()
|
||||||
|
if im_gpu is None:
|
||||||
|
# Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
|
||||||
|
if len(masks) == 0:
|
||||||
|
return
|
||||||
|
if isinstance(masks, torch.Tensor):
|
||||||
|
masks = torch.as_tensor(masks, dtype=torch.uint8)
|
||||||
|
masks = masks.permute(1, 2, 0).contiguous()
|
||||||
|
masks = masks.cpu().numpy()
|
||||||
|
# masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
|
||||||
|
masks = scale_image(masks.shape[:2], masks, self.im.shape)
|
||||||
|
masks = np.asarray(masks, dtype=np.float32)
|
||||||
|
colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
|
||||||
|
s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
|
||||||
|
masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
|
||||||
|
self.im[:] = masks * alpha + self.im * (1 - s * alpha)
|
||||||
|
else:
|
||||||
|
if len(masks) == 0:
|
||||||
|
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
|
||||||
|
colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
|
||||||
|
colors = colors[:, None, None] # shape(n,1,1,3)
|
||||||
|
masks = masks.unsqueeze(3) # shape(n,h,w,1)
|
||||||
|
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
|
||||||
|
|
||||||
|
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
|
||||||
|
mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)
|
||||||
|
|
||||||
|
im_gpu = im_gpu.flip(dims=[0]) # flip channel
|
||||||
|
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
|
||||||
|
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
|
||||||
|
im_mask = (im_gpu * 255).byte().cpu().numpy()
|
||||||
|
self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
|
||||||
|
if self.pil:
|
||||||
|
# convert im back to PIL and update draw
|
||||||
|
self.fromarray(self.im)
|
||||||
|
|
||||||
def rectangle(self, xy, fill=None, outline=None, width=1):
|
def rectangle(self, xy, fill=None, outline=None, width=1):
|
||||||
# Add rectangle to image (PIL-only)
|
# Add rectangle to image (PIL-only)
|
||||||
self.draw.rectangle(xy, fill, outline, width)
|
self.draw.rectangle(xy, fill, outline, width)
|
||||||
@ -124,6 +171,11 @@ class Annotator:
|
|||||||
xy[1] += 1 - h
|
xy[1] += 1 - h
|
||||||
self.draw.text(xy, text, fill=txt_color, font=self.font)
|
self.draw.text(xy, text, fill=txt_color, font=self.font)
|
||||||
|
|
||||||
|
def fromarray(self, im):
|
||||||
|
# Update self.im from a numpy array
|
||||||
|
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
|
||||||
|
self.draw = ImageDraw.Draw(self.im)
|
||||||
|
|
||||||
def result(self):
|
def result(self):
|
||||||
# Return annotated image as array
|
# Return annotated image as array
|
||||||
return np.asarray(self.im)
|
return np.asarray(self.im)
|
||||||
@ -152,7 +204,6 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detec
|
|||||||
ax[i].axis('off')
|
ax[i].axis('off')
|
||||||
|
|
||||||
LOGGER.info(f'Saving {f}... ({n}/{channels})')
|
LOGGER.info(f'Saving {f}... ({n}/{channels})')
|
||||||
plt.title('Features')
|
|
||||||
plt.savefig(f, dpi=300, bbox_inches='tight')
|
plt.savefig(f, dpi=300, bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save
|
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save
|
||||||
@ -180,26 +231,31 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
|
|||||||
return filtfilt(b, a, data) # forward-backward filter
|
return filtfilt(b, a, data) # forward-backward filter
|
||||||
|
|
||||||
|
|
||||||
def output_to_target(output):
|
def output_to_target(output, max_det=300):
|
||||||
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
|
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
|
||||||
targets = []
|
targets = []
|
||||||
for i, o in enumerate(output):
|
for i, o in enumerate(output):
|
||||||
targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy())
|
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
|
||||||
return np.array(targets)
|
j = torch.full((conf.shape[0], 1), i)
|
||||||
|
targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
|
||||||
|
return torch.cat(targets, 0).numpy()
|
||||||
|
|
||||||
|
|
||||||
@threaded
|
@threaded
|
||||||
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
|
def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
|
||||||
# Plot image grid with labels
|
# Plot image grid with labels
|
||||||
if isinstance(images, torch.Tensor):
|
if isinstance(images, torch.Tensor):
|
||||||
images = images.cpu().float().numpy()
|
images = images.cpu().float().numpy()
|
||||||
if isinstance(targets, torch.Tensor):
|
if isinstance(targets, torch.Tensor):
|
||||||
targets = targets.cpu().numpy()
|
targets = targets.cpu().numpy()
|
||||||
if np.max(images[0]) <= 1:
|
|
||||||
images *= 255 # de-normalise (optional)
|
max_size = 1920 # max image size
|
||||||
|
max_subplots = 16 # max image subplots, i.e. 4x4
|
||||||
bs, _, h, w = images.shape # batch size, _, height, width
|
bs, _, h, w = images.shape # batch size, _, height, width
|
||||||
bs = min(bs, max_subplots) # limit plot images
|
bs = min(bs, max_subplots) # limit plot images
|
||||||
ns = np.ceil(bs ** 0.5) # number of subplots (square)
|
ns = np.ceil(bs ** 0.5) # number of subplots (square)
|
||||||
|
if np.max(images[0]) <= 1:
|
||||||
|
images *= 255 # de-normalise (optional)
|
||||||
|
|
||||||
# Build Image
|
# Build Image
|
||||||
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
|
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
|
||||||
@ -364,7 +420,7 @@ def plot_labels(labels, names=(), save_dir=Path('')):
|
|||||||
ax[0].set_ylabel('instances')
|
ax[0].set_ylabel('instances')
|
||||||
if 0 < len(names) < 30:
|
if 0 < len(names) < 30:
|
||||||
ax[0].set_xticks(range(len(names)))
|
ax[0].set_xticks(range(len(names)))
|
||||||
ax[0].set_xticklabels(names, rotation=90, fontsize=10)
|
ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
|
||||||
else:
|
else:
|
||||||
ax[0].set_xlabel('classes')
|
ax[0].set_xlabel('classes')
|
||||||
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
|
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
|
||||||
@ -509,7 +565,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
|
|||||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
||||||
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
||||||
xyxy = xywh2xyxy(b).long()
|
xyxy = xywh2xyxy(b).long()
|
||||||
clip_coords(xyxy, im.shape)
|
clip_boxes(xyxy, im.shape)
|
||||||
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
||||||
if save:
|
if save:
|
||||||
file.parent.mkdir(parents=True, exist_ok=True) # make directory
|
file.parent.mkdir(parents=True, exist_ok=True) # make directory
|
||||||
|
0
utils/segment/__init__.py
Normal file
0
utils/segment/__init__.py
Normal file
104
utils/segment/augmentations.py
Normal file
104
utils/segment/augmentations.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
"""
|
||||||
|
Image augmentation functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from ..augmentations import box_candidates
|
||||||
|
from ..general import resample_segments, segment2box
|
||||||
|
|
||||||
|
|
||||||
|
def mixup(im, labels, segments, im2, labels2, segments2):
|
||||||
|
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||||
|
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||||
|
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
||||||
|
labels = np.concatenate((labels, labels2), 0)
|
||||||
|
segments = np.concatenate((segments, segments2), 0)
|
||||||
|
return im, labels, segments
|
||||||
|
|
||||||
|
|
||||||
|
def random_perspective(im,
|
||||||
|
targets=(),
|
||||||
|
segments=(),
|
||||||
|
degrees=10,
|
||||||
|
translate=.1,
|
||||||
|
scale=.1,
|
||||||
|
shear=10,
|
||||||
|
perspective=0.0,
|
||||||
|
border=(0, 0)):
|
||||||
|
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
||||||
|
# targets = [cls, xyxy]
|
||||||
|
|
||||||
|
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
|
||||||
|
width = im.shape[1] + border[1] * 2
|
||||||
|
|
||||||
|
# Center
|
||||||
|
C = np.eye(3)
|
||||||
|
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
|
||||||
|
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
|
||||||
|
|
||||||
|
# Perspective
|
||||||
|
P = np.eye(3)
|
||||||
|
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
|
||||||
|
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
|
||||||
|
|
||||||
|
# Rotation and Scale
|
||||||
|
R = np.eye(3)
|
||||||
|
a = random.uniform(-degrees, degrees)
|
||||||
|
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
|
||||||
|
s = random.uniform(1 - scale, 1 + scale)
|
||||||
|
# s = 2 ** random.uniform(-scale, scale)
|
||||||
|
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
|
||||||
|
|
||||||
|
# Shear
|
||||||
|
S = np.eye(3)
|
||||||
|
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
|
||||||
|
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
|
||||||
|
|
||||||
|
# Translation
|
||||||
|
T = np.eye(3)
|
||||||
|
T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels)
|
||||||
|
T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels)
|
||||||
|
|
||||||
|
# Combined rotation matrix
|
||||||
|
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
||||||
|
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
||||||
|
if perspective:
|
||||||
|
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
|
||||||
|
else: # affine
|
||||||
|
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
|
||||||
|
|
||||||
|
# Visualize
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
|
||||||
|
# ax[0].imshow(im[:, :, ::-1]) # base
|
||||||
|
# ax[1].imshow(im2[:, :, ::-1]) # warped
|
||||||
|
|
||||||
|
# Transform label coordinates
|
||||||
|
n = len(targets)
|
||||||
|
new_segments = []
|
||||||
|
if n:
|
||||||
|
new = np.zeros((n, 4))
|
||||||
|
segments = resample_segments(segments) # upsample
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
xy = np.ones((len(segment), 3))
|
||||||
|
xy[:, :2] = segment
|
||||||
|
xy = xy @ M.T # transform
|
||||||
|
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine
|
||||||
|
|
||||||
|
# clip
|
||||||
|
new[i] = segment2box(xy, width, height)
|
||||||
|
new_segments.append(xy)
|
||||||
|
|
||||||
|
# filter candidates
|
||||||
|
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
|
||||||
|
targets = targets[i]
|
||||||
|
targets[:, 1:5] = new[i]
|
||||||
|
new_segments = np.array(new_segments)[i]
|
||||||
|
|
||||||
|
return im, targets, new_segments
|
330
utils/segment/dataloaders.py
Normal file
330
utils/segment/dataloaders.py
Normal file
@ -0,0 +1,330 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
"""
|
||||||
|
Dataloaders
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader, distributed
|
||||||
|
|
||||||
|
from ..augmentations import augment_hsv, copy_paste, letterbox
|
||||||
|
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
|
||||||
|
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
|
||||||
|
from ..torch_utils import torch_distributed_zero_first
|
||||||
|
from .augmentations import mixup, random_perspective
|
||||||
|
|
||||||
|
RANK = int(os.getenv('RANK', -1))
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataloader(path,
|
||||||
|
imgsz,
|
||||||
|
batch_size,
|
||||||
|
stride,
|
||||||
|
single_cls=False,
|
||||||
|
hyp=None,
|
||||||
|
augment=False,
|
||||||
|
cache=False,
|
||||||
|
pad=0.0,
|
||||||
|
rect=False,
|
||||||
|
rank=-1,
|
||||||
|
workers=8,
|
||||||
|
image_weights=False,
|
||||||
|
quad=False,
|
||||||
|
prefix='',
|
||||||
|
shuffle=False,
|
||||||
|
mask_downsample_ratio=1,
|
||||||
|
overlap_mask=False):
|
||||||
|
if rect and shuffle:
|
||||||
|
LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
|
||||||
|
shuffle = False
|
||||||
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||||
|
dataset = LoadImagesAndLabelsAndMasks(
|
||||||
|
path,
|
||||||
|
imgsz,
|
||||||
|
batch_size,
|
||||||
|
augment=augment, # augmentation
|
||||||
|
hyp=hyp, # hyperparameters
|
||||||
|
rect=rect, # rectangular batches
|
||||||
|
cache_images=cache,
|
||||||
|
single_cls=single_cls,
|
||||||
|
stride=int(stride),
|
||||||
|
pad=pad,
|
||||||
|
image_weights=image_weights,
|
||||||
|
prefix=prefix,
|
||||||
|
downsample_ratio=mask_downsample_ratio,
|
||||||
|
overlap=overlap_mask)
|
||||||
|
|
||||||
|
batch_size = min(batch_size, len(dataset))
|
||||||
|
nd = torch.cuda.device_count() # number of CUDA devices
|
||||||
|
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
|
||||||
|
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||||
|
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
|
||||||
|
generator = torch.Generator()
|
||||||
|
generator.manual_seed(6148914691236517205 + RANK)
|
||||||
|
return loader(
|
||||||
|
dataset,
|
||||||
|
batch_size=batch_size,
|
||||||
|
shuffle=shuffle and sampler is None,
|
||||||
|
num_workers=nw,
|
||||||
|
sampler=sampler,
|
||||||
|
pin_memory=True,
|
||||||
|
collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
|
||||||
|
worker_init_fn=seed_worker,
|
||||||
|
generator=generator,
|
||||||
|
), dataset
|
||||||
|
|
||||||
|
|
||||||
|
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path,
|
||||||
|
img_size=640,
|
||||||
|
batch_size=16,
|
||||||
|
augment=False,
|
||||||
|
hyp=None,
|
||||||
|
rect=False,
|
||||||
|
image_weights=False,
|
||||||
|
cache_images=False,
|
||||||
|
single_cls=False,
|
||||||
|
stride=32,
|
||||||
|
pad=0,
|
||||||
|
prefix="",
|
||||||
|
downsample_ratio=1,
|
||||||
|
overlap=False,
|
||||||
|
):
|
||||||
|
super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
|
||||||
|
stride, pad, prefix)
|
||||||
|
self.downsample_ratio = downsample_ratio
|
||||||
|
self.overlap = overlap
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
index = self.indices[index] # linear, shuffled, or image_weights
|
||||||
|
|
||||||
|
hyp = self.hyp
|
||||||
|
mosaic = self.mosaic and random.random() < hyp['mosaic']
|
||||||
|
masks = []
|
||||||
|
if mosaic:
|
||||||
|
# Load mosaic
|
||||||
|
img, labels, segments = self.load_mosaic(index)
|
||||||
|
shapes = None
|
||||||
|
|
||||||
|
# MixUp augmentation
|
||||||
|
if random.random() < hyp["mixup"]:
|
||||||
|
img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Load image
|
||||||
|
img, (h0, w0), (h, w) = self.load_image(index)
|
||||||
|
|
||||||
|
# Letterbox
|
||||||
|
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
|
||||||
|
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
|
||||||
|
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
|
||||||
|
|
||||||
|
labels = self.labels[index].copy()
|
||||||
|
# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
|
||||||
|
segments = self.segments[index].copy()
|
||||||
|
if len(segments):
|
||||||
|
for i_s in range(len(segments)):
|
||||||
|
segments[i_s] = xyn2xy(
|
||||||
|
segments[i_s],
|
||||||
|
ratio[0] * w,
|
||||||
|
ratio[1] * h,
|
||||||
|
padw=pad[0],
|
||||||
|
padh=pad[1],
|
||||||
|
)
|
||||||
|
if labels.size: # normalized xywh to pixel xyxy format
|
||||||
|
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
|
||||||
|
|
||||||
|
if self.augment:
|
||||||
|
img, labels, segments = random_perspective(img,
|
||||||
|
labels,
|
||||||
|
segments=segments,
|
||||||
|
degrees=hyp["degrees"],
|
||||||
|
translate=hyp["translate"],
|
||||||
|
scale=hyp["scale"],
|
||||||
|
shear=hyp["shear"],
|
||||||
|
perspective=hyp["perspective"])
|
||||||
|
|
||||||
|
nl = len(labels) # number of labels
|
||||||
|
if nl:
|
||||||
|
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
|
||||||
|
if self.overlap:
|
||||||
|
masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
|
||||||
|
segments,
|
||||||
|
downsample_ratio=self.downsample_ratio)
|
||||||
|
masks = masks[None] # (640, 640) -> (1, 640, 640)
|
||||||
|
labels = labels[sorted_idx]
|
||||||
|
else:
|
||||||
|
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
|
||||||
|
|
||||||
|
masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
|
||||||
|
self.downsample_ratio, img.shape[1] //
|
||||||
|
self.downsample_ratio))
|
||||||
|
# TODO: albumentations support
|
||||||
|
if self.augment:
|
||||||
|
# Albumentations
|
||||||
|
# there are some augmentation that won't change boxes and masks,
|
||||||
|
# so just be it for now.
|
||||||
|
img, labels = self.albumentations(img, labels)
|
||||||
|
nl = len(labels) # update after albumentations
|
||||||
|
|
||||||
|
# HSV color-space
|
||||||
|
augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
|
||||||
|
|
||||||
|
# Flip up-down
|
||||||
|
if random.random() < hyp["flipud"]:
|
||||||
|
img = np.flipud(img)
|
||||||
|
if nl:
|
||||||
|
labels[:, 2] = 1 - labels[:, 2]
|
||||||
|
masks = torch.flip(masks, dims=[1])
|
||||||
|
|
||||||
|
# Flip left-right
|
||||||
|
if random.random() < hyp["fliplr"]:
|
||||||
|
img = np.fliplr(img)
|
||||||
|
if nl:
|
||||||
|
labels[:, 1] = 1 - labels[:, 1]
|
||||||
|
masks = torch.flip(masks, dims=[2])
|
||||||
|
|
||||||
|
# Cutouts # labels = cutout(img, labels, p=0.5)
|
||||||
|
|
||||||
|
labels_out = torch.zeros((nl, 6))
|
||||||
|
if nl:
|
||||||
|
labels_out[:, 1:] = torch.from_numpy(labels)
|
||||||
|
|
||||||
|
# Convert
|
||||||
|
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||||
|
img = np.ascontiguousarray(img)
|
||||||
|
|
||||||
|
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
|
||||||
|
|
||||||
|
def load_mosaic(self, index):
|
||||||
|
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
|
||||||
|
labels4, segments4 = [], []
|
||||||
|
s = self.img_size
|
||||||
|
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
|
||||||
|
|
||||||
|
# 3 additional image indices
|
||||||
|
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
|
||||||
|
for i, index in enumerate(indices):
|
||||||
|
# Load image
|
||||||
|
img, _, (h, w) = self.load_image(index)
|
||||||
|
|
||||||
|
# place img in img4
|
||||||
|
if i == 0: # top left
|
||||||
|
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||||
|
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
|
||||||
|
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
|
||||||
|
elif i == 1: # top right
|
||||||
|
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
|
||||||
|
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
||||||
|
elif i == 2: # bottom left
|
||||||
|
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
|
||||||
|
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
||||||
|
elif i == 3: # bottom right
|
||||||
|
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
|
||||||
|
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
||||||
|
|
||||||
|
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
||||||
|
padw = x1a - x1b
|
||||||
|
padh = y1a - y1b
|
||||||
|
|
||||||
|
labels, segments = self.labels[index].copy(), self.segments[index].copy()
|
||||||
|
|
||||||
|
if labels.size:
|
||||||
|
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
|
||||||
|
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
|
||||||
|
labels4.append(labels)
|
||||||
|
segments4.extend(segments)
|
||||||
|
|
||||||
|
# Concat/clip labels
|
||||||
|
labels4 = np.concatenate(labels4, 0)
|
||||||
|
for x in (labels4[:, 1:], *segments4):
|
||||||
|
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
|
||||||
|
# img4, labels4 = replicate(img4, labels4) # replicate
|
||||||
|
|
||||||
|
# Augment
|
||||||
|
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
|
||||||
|
img4, labels4, segments4 = random_perspective(img4,
|
||||||
|
labels4,
|
||||||
|
segments4,
|
||||||
|
degrees=self.hyp["degrees"],
|
||||||
|
translate=self.hyp["translate"],
|
||||||
|
scale=self.hyp["scale"],
|
||||||
|
shear=self.hyp["shear"],
|
||||||
|
perspective=self.hyp["perspective"],
|
||||||
|
border=self.mosaic_border) # border to remove
|
||||||
|
return img4, labels4, segments4
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def collate_fn(batch):
|
||||||
|
img, label, path, shapes, masks = zip(*batch) # transposed
|
||||||
|
batched_masks = torch.cat(masks, 0)
|
||||||
|
for i, l in enumerate(label):
|
||||||
|
l[:, 0] = i # add target image index for build_targets()
|
||||||
|
return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
|
||||||
|
|
||||||
|
|
||||||
|
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img_size (tuple): The image size.
|
||||||
|
polygons (np.ndarray): [N, M], N is the number of polygons,
|
||||||
|
M is the number of points(Be divided by 2).
|
||||||
|
"""
|
||||||
|
mask = np.zeros(img_size, dtype=np.uint8)
|
||||||
|
polygons = np.asarray(polygons)
|
||||||
|
polygons = polygons.astype(np.int32)
|
||||||
|
shape = polygons.shape
|
||||||
|
polygons = polygons.reshape(shape[0], -1, 2)
|
||||||
|
cv2.fillPoly(mask, polygons, color=color)
|
||||||
|
nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
|
||||||
|
# NOTE: fillPoly firstly then resize is trying the keep the same way
|
||||||
|
# of loss calculation when mask-ratio=1.
|
||||||
|
mask = cv2.resize(mask, (nw, nh))
|
||||||
|
return mask
|
||||||
|
|
||||||
|
|
||||||
|
def polygons2masks(img_size, polygons, color, downsample_ratio=1):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img_size (tuple): The image size.
|
||||||
|
polygons (list[np.ndarray]): each polygon is [N, M],
|
||||||
|
N is the number of polygons,
|
||||||
|
M is the number of points(Be divided by 2).
|
||||||
|
"""
|
||||||
|
masks = []
|
||||||
|
for si in range(len(polygons)):
|
||||||
|
mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
|
||||||
|
masks.append(mask)
|
||||||
|
return np.array(masks)
|
||||||
|
|
||||||
|
|
||||||
|
def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
|
||||||
|
"""Return a (640, 640) overlap mask."""
|
||||||
|
masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
|
||||||
|
dtype=np.int32 if len(segments) > 255 else np.uint8)
|
||||||
|
areas = []
|
||||||
|
ms = []
|
||||||
|
for si in range(len(segments)):
|
||||||
|
mask = polygon2mask(
|
||||||
|
img_size,
|
||||||
|
[segments[si].reshape(-1)],
|
||||||
|
downsample_ratio=downsample_ratio,
|
||||||
|
color=1,
|
||||||
|
)
|
||||||
|
ms.append(mask)
|
||||||
|
areas.append(mask.sum())
|
||||||
|
areas = np.asarray(areas)
|
||||||
|
index = np.argsort(-areas)
|
||||||
|
ms = np.array(ms)[index]
|
||||||
|
for i in range(len(segments)):
|
||||||
|
mask = ms[i] * (i + 1)
|
||||||
|
masks = masks + mask
|
||||||
|
masks = np.clip(masks, a_min=0, a_max=i + 1)
|
||||||
|
return masks, index
|
134
utils/segment/general.py
Normal file
134
utils/segment/general.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
def crop_mask(masks, boxes):
|
||||||
|
"""
|
||||||
|
"Crop" predicted masks by zeroing out everything not in the predicted bbox.
|
||||||
|
Vectorized by Chong (thanks Chong).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- masks should be a size [h, w, n] tensor of masks
|
||||||
|
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
|
||||||
|
"""
|
||||||
|
|
||||||
|
n, h, w = masks.shape
|
||||||
|
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
|
||||||
|
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
|
||||||
|
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
|
||||||
|
|
||||||
|
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
|
||||||
|
|
||||||
|
|
||||||
|
def process_mask_upsample(protos, masks_in, bboxes, shape):
|
||||||
|
"""
|
||||||
|
Crop after upsample.
|
||||||
|
proto_out: [mask_dim, mask_h, mask_w]
|
||||||
|
out_masks: [n, mask_dim], n is number of masks after nms
|
||||||
|
bboxes: [n, 4], n is number of masks after nms
|
||||||
|
shape:input_image_size, (h, w)
|
||||||
|
|
||||||
|
return: h, w, n
|
||||||
|
"""
|
||||||
|
|
||||||
|
c, mh, mw = protos.shape # CHW
|
||||||
|
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
||||||
|
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
||||||
|
masks = crop_mask(masks, bboxes) # CHW
|
||||||
|
return masks.gt_(0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
||||||
|
"""
|
||||||
|
Crop before upsample.
|
||||||
|
proto_out: [mask_dim, mask_h, mask_w]
|
||||||
|
out_masks: [n, mask_dim], n is number of masks after nms
|
||||||
|
bboxes: [n, 4], n is number of masks after nms
|
||||||
|
shape:input_image_size, (h, w)
|
||||||
|
|
||||||
|
return: h, w, n
|
||||||
|
"""
|
||||||
|
|
||||||
|
c, mh, mw = protos.shape # CHW
|
||||||
|
ih, iw = shape
|
||||||
|
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
|
||||||
|
|
||||||
|
downsampled_bboxes = bboxes.clone()
|
||||||
|
downsampled_bboxes[:, 0] *= mw / iw
|
||||||
|
downsampled_bboxes[:, 2] *= mw / iw
|
||||||
|
downsampled_bboxes[:, 3] *= mh / ih
|
||||||
|
downsampled_bboxes[:, 1] *= mh / ih
|
||||||
|
|
||||||
|
masks = crop_mask(masks, downsampled_bboxes) # CHW
|
||||||
|
if upsample:
|
||||||
|
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
||||||
|
return masks.gt_(0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
||||||
|
"""
|
||||||
|
img1_shape: model input shape, [h, w]
|
||||||
|
img0_shape: origin pic shape, [h, w, 3]
|
||||||
|
masks: [h, w, num]
|
||||||
|
"""
|
||||||
|
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
||||||
|
if ratio_pad is None: # calculate from im0_shape
|
||||||
|
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
|
||||||
|
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
|
||||||
|
else:
|
||||||
|
pad = ratio_pad[1]
|
||||||
|
top, left = int(pad[1]), int(pad[0]) # y, x
|
||||||
|
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
|
||||||
|
|
||||||
|
if len(masks.shape) < 2:
|
||||||
|
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
||||||
|
masks = masks[top:bottom, left:right]
|
||||||
|
# masks = masks.permute(2, 0, 1).contiguous()
|
||||||
|
# masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
|
||||||
|
# masks = masks.permute(1, 2, 0).contiguous()
|
||||||
|
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
|
||||||
|
|
||||||
|
if len(masks.shape) == 2:
|
||||||
|
masks = masks[:, :, None]
|
||||||
|
return masks
|
||||||
|
|
||||||
|
|
||||||
|
def mask_iou(mask1, mask2, eps=1e-7):
|
||||||
|
"""
|
||||||
|
mask1: [N, n] m1 means number of predicted objects
|
||||||
|
mask2: [M, n] m2 means number of gt objects
|
||||||
|
Note: n means image_w x image_h
|
||||||
|
|
||||||
|
return: masks iou, [N, M]
|
||||||
|
"""
|
||||||
|
intersection = torch.matmul(mask1, mask2.t()).clamp(0)
|
||||||
|
union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
|
||||||
|
return intersection / (union + eps)
|
||||||
|
|
||||||
|
|
||||||
|
def masks_iou(mask1, mask2, eps=1e-7):
|
||||||
|
"""
|
||||||
|
mask1: [N, n] m1 means number of predicted objects
|
||||||
|
mask2: [N, n] m2 means number of gt objects
|
||||||
|
Note: n means image_w x image_h
|
||||||
|
|
||||||
|
return: masks iou, (N, )
|
||||||
|
"""
|
||||||
|
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
|
||||||
|
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
|
||||||
|
return intersection / (union + eps)
|
||||||
|
|
||||||
|
|
||||||
|
def masks2segments(masks, strategy='largest'):
|
||||||
|
# Convert masks(n,160,160) into segments(n,xy)
|
||||||
|
segments = []
|
||||||
|
for x in masks.int().numpy().astype('uint8'):
|
||||||
|
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
|
||||||
|
if strategy == 'concat': # concatenate all segments
|
||||||
|
c = np.concatenate([x.reshape(-1, 2) for x in c])
|
||||||
|
elif strategy == 'largest': # select largest segment
|
||||||
|
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
|
||||||
|
segments.append(c.astype('float32'))
|
||||||
|
return segments
|
186
utils/segment/loss.py
Normal file
186
utils/segment/loss.py
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from ..general import xywh2xyxy
|
||||||
|
from ..loss import FocalLoss, smooth_BCE
|
||||||
|
from ..metrics import bbox_iou
|
||||||
|
from ..torch_utils import de_parallel
|
||||||
|
from .general import crop_mask
|
||||||
|
|
||||||
|
|
||||||
|
class ComputeLoss:
|
||||||
|
# Compute losses
|
||||||
|
def __init__(self, model, autobalance=False, overlap=False):
|
||||||
|
self.sort_obj_iou = False
|
||||||
|
self.overlap = overlap
|
||||||
|
device = next(model.parameters()).device # get model device
|
||||||
|
h = model.hyp # hyperparameters
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
# Define criteria
|
||||||
|
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
|
||||||
|
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
|
||||||
|
|
||||||
|
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
|
||||||
|
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
|
||||||
|
|
||||||
|
# Focal loss
|
||||||
|
g = h['fl_gamma'] # focal loss gamma
|
||||||
|
if g > 0:
|
||||||
|
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
||||||
|
|
||||||
|
m = de_parallel(model).model[-1] # Detect() module
|
||||||
|
self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
|
||||||
|
self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
|
||||||
|
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
|
||||||
|
self.na = m.na # number of anchors
|
||||||
|
self.nc = m.nc # number of classes
|
||||||
|
self.nl = m.nl # number of layers
|
||||||
|
self.nm = m.nm # number of masks
|
||||||
|
self.anchors = m.anchors
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
def __call__(self, preds, targets, masks): # predictions, targets, model
|
||||||
|
p, proto = preds
|
||||||
|
bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
|
||||||
|
lcls = torch.zeros(1, device=self.device)
|
||||||
|
lbox = torch.zeros(1, device=self.device)
|
||||||
|
lobj = torch.zeros(1, device=self.device)
|
||||||
|
lseg = torch.zeros(1, device=self.device)
|
||||||
|
tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets
|
||||||
|
|
||||||
|
# Losses
|
||||||
|
for i, pi in enumerate(p): # layer index, layer predictions
|
||||||
|
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
||||||
|
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
|
||||||
|
|
||||||
|
n = b.shape[0] # number of targets
|
||||||
|
if n:
|
||||||
|
pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions
|
||||||
|
|
||||||
|
# Box regression
|
||||||
|
pxy = pxy.sigmoid() * 2 - 0.5
|
||||||
|
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
|
||||||
|
pbox = torch.cat((pxy, pwh), 1) # predicted box
|
||||||
|
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
|
||||||
|
lbox += (1.0 - iou).mean() # iou loss
|
||||||
|
|
||||||
|
# Objectness
|
||||||
|
iou = iou.detach().clamp(0).type(tobj.dtype)
|
||||||
|
if self.sort_obj_iou:
|
||||||
|
j = iou.argsort()
|
||||||
|
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
|
||||||
|
if self.gr < 1:
|
||||||
|
iou = (1.0 - self.gr) + self.gr * iou
|
||||||
|
tobj[b, a, gj, gi] = iou # iou ratio
|
||||||
|
|
||||||
|
# Classification
|
||||||
|
if self.nc > 1: # cls loss (only if multiple classes)
|
||||||
|
t = torch.full_like(pcls, self.cn, device=self.device) # targets
|
||||||
|
t[range(n), tcls[i]] = self.cp
|
||||||
|
lcls += self.BCEcls(pcls, t) # BCE
|
||||||
|
|
||||||
|
# Mask regression
|
||||||
|
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
|
||||||
|
masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
|
||||||
|
marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized
|
||||||
|
mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
|
||||||
|
for bi in b.unique():
|
||||||
|
j = b == bi # matching index
|
||||||
|
if self.overlap:
|
||||||
|
mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
|
||||||
|
else:
|
||||||
|
mask_gti = masks[tidxs[i]][j]
|
||||||
|
lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
|
||||||
|
|
||||||
|
obji = self.BCEobj(pi[..., 4], tobj)
|
||||||
|
lobj += obji * self.balance[i] # obj loss
|
||||||
|
if self.autobalance:
|
||||||
|
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
|
||||||
|
|
||||||
|
if self.autobalance:
|
||||||
|
self.balance = [x / self.balance[self.ssi] for x in self.balance]
|
||||||
|
lbox *= self.hyp["box"]
|
||||||
|
lobj *= self.hyp["obj"]
|
||||||
|
lcls *= self.hyp["cls"]
|
||||||
|
lseg *= self.hyp["box"] / bs
|
||||||
|
|
||||||
|
loss = lbox + lobj + lcls + lseg
|
||||||
|
return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
|
||||||
|
|
||||||
|
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
|
||||||
|
# Mask loss for one image
|
||||||
|
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
|
||||||
|
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
|
||||||
|
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
|
||||||
|
|
||||||
|
def build_targets(self, p, targets):
|
||||||
|
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
||||||
|
na, nt = self.na, targets.shape[0] # number of anchors, targets
|
||||||
|
tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
|
||||||
|
gain = torch.ones(8, device=self.device) # normalized to gridspace gain
|
||||||
|
ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
|
||||||
|
if self.overlap:
|
||||||
|
batch = p[0].shape[0]
|
||||||
|
ti = []
|
||||||
|
for i in range(batch):
|
||||||
|
num = (targets[:, 0] == i).sum() # find number of targets of each image
|
||||||
|
ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num)
|
||||||
|
ti = torch.cat(ti, 1) # (na, nt)
|
||||||
|
else:
|
||||||
|
ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
|
||||||
|
targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices
|
||||||
|
|
||||||
|
g = 0.5 # bias
|
||||||
|
off = torch.tensor(
|
||||||
|
[
|
||||||
|
[0, 0],
|
||||||
|
[1, 0],
|
||||||
|
[0, 1],
|
||||||
|
[-1, 0],
|
||||||
|
[0, -1], # j,k,l,m
|
||||||
|
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
|
||||||
|
],
|
||||||
|
device=self.device).float() * g # offsets
|
||||||
|
|
||||||
|
for i in range(self.nl):
|
||||||
|
anchors, shape = self.anchors[i], p[i].shape
|
||||||
|
gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
|
||||||
|
|
||||||
|
# Match targets to anchors
|
||||||
|
t = targets * gain # shape(3,n,7)
|
||||||
|
if nt:
|
||||||
|
# Matches
|
||||||
|
r = t[..., 4:6] / anchors[:, None] # wh ratio
|
||||||
|
j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
|
||||||
|
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
|
||||||
|
t = t[j] # filter
|
||||||
|
|
||||||
|
# Offsets
|
||||||
|
gxy = t[:, 2:4] # grid xy
|
||||||
|
gxi = gain[[2, 3]] - gxy # inverse
|
||||||
|
j, k = ((gxy % 1 < g) & (gxy > 1)).T
|
||||||
|
l, m = ((gxi % 1 < g) & (gxi > 1)).T
|
||||||
|
j = torch.stack((torch.ones_like(j), j, k, l, m))
|
||||||
|
t = t.repeat((5, 1, 1))[j]
|
||||||
|
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
|
||||||
|
else:
|
||||||
|
t = targets[0]
|
||||||
|
offsets = 0
|
||||||
|
|
||||||
|
# Define
|
||||||
|
bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
|
||||||
|
(a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class
|
||||||
|
gij = (gxy - offsets).long()
|
||||||
|
gi, gj = gij.T # grid indices
|
||||||
|
|
||||||
|
# Append
|
||||||
|
indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
|
||||||
|
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
|
||||||
|
anch.append(anchors[a]) # anchors
|
||||||
|
tcls.append(c) # class
|
||||||
|
tidxs.append(tidx)
|
||||||
|
xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized
|
||||||
|
|
||||||
|
return tcls, tbox, indices, anch, tidxs, xywhn
|
210
utils/segment/metrics.py
Normal file
210
utils/segment/metrics.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
"""
|
||||||
|
Model validation metrics
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from ..metrics import ap_per_class
|
||||||
|
|
||||||
|
|
||||||
|
def fitness(x):
|
||||||
|
# Model fitness as a weighted combination of metrics
|
||||||
|
w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
|
||||||
|
return (x[:, :8] * w).sum(1)
|
||||||
|
|
||||||
|
|
||||||
|
def ap_per_class_box_and_mask(
|
||||||
|
tp_m,
|
||||||
|
tp_b,
|
||||||
|
conf,
|
||||||
|
pred_cls,
|
||||||
|
target_cls,
|
||||||
|
plot=False,
|
||||||
|
save_dir=".",
|
||||||
|
names=(),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
tp_b: tp of boxes.
|
||||||
|
tp_m: tp of masks.
|
||||||
|
other arguments see `func: ap_per_class`.
|
||||||
|
"""
|
||||||
|
results_boxes = ap_per_class(tp_b,
|
||||||
|
conf,
|
||||||
|
pred_cls,
|
||||||
|
target_cls,
|
||||||
|
plot=plot,
|
||||||
|
save_dir=save_dir,
|
||||||
|
names=names,
|
||||||
|
prefix="Box")[2:]
|
||||||
|
results_masks = ap_per_class(tp_m,
|
||||||
|
conf,
|
||||||
|
pred_cls,
|
||||||
|
target_cls,
|
||||||
|
plot=plot,
|
||||||
|
save_dir=save_dir,
|
||||||
|
names=names,
|
||||||
|
prefix="Mask")[2:]
|
||||||
|
|
||||||
|
results = {
|
||||||
|
"boxes": {
|
||||||
|
"p": results_boxes[0],
|
||||||
|
"r": results_boxes[1],
|
||||||
|
"ap": results_boxes[3],
|
||||||
|
"f1": results_boxes[2],
|
||||||
|
"ap_class": results_boxes[4]},
|
||||||
|
"masks": {
|
||||||
|
"p": results_masks[0],
|
||||||
|
"r": results_masks[1],
|
||||||
|
"ap": results_masks[3],
|
||||||
|
"f1": results_masks[2],
|
||||||
|
"ap_class": results_masks[4]}}
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
class Metric:
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.p = [] # (nc, )
|
||||||
|
self.r = [] # (nc, )
|
||||||
|
self.f1 = [] # (nc, )
|
||||||
|
self.all_ap = [] # (nc, 10)
|
||||||
|
self.ap_class_index = [] # (nc, )
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ap50(self):
|
||||||
|
"""AP@0.5 of all classes.
|
||||||
|
Return:
|
||||||
|
(nc, ) or [].
|
||||||
|
"""
|
||||||
|
return self.all_ap[:, 0] if len(self.all_ap) else []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ap(self):
|
||||||
|
"""AP@0.5:0.95
|
||||||
|
Return:
|
||||||
|
(nc, ) or [].
|
||||||
|
"""
|
||||||
|
return self.all_ap.mean(1) if len(self.all_ap) else []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mp(self):
|
||||||
|
"""mean precision of all classes.
|
||||||
|
Return:
|
||||||
|
float.
|
||||||
|
"""
|
||||||
|
return self.p.mean() if len(self.p) else 0.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mr(self):
|
||||||
|
"""mean recall of all classes.
|
||||||
|
Return:
|
||||||
|
float.
|
||||||
|
"""
|
||||||
|
return self.r.mean() if len(self.r) else 0.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def map50(self):
|
||||||
|
"""Mean AP@0.5 of all classes.
|
||||||
|
Return:
|
||||||
|
float.
|
||||||
|
"""
|
||||||
|
return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def map(self):
|
||||||
|
"""Mean AP@0.5:0.95 of all classes.
|
||||||
|
Return:
|
||||||
|
float.
|
||||||
|
"""
|
||||||
|
return self.all_ap.mean() if len(self.all_ap) else 0.0
|
||||||
|
|
||||||
|
def mean_results(self):
|
||||||
|
"""Mean of results, return mp, mr, map50, map"""
|
||||||
|
return (self.mp, self.mr, self.map50, self.map)
|
||||||
|
|
||||||
|
def class_result(self, i):
|
||||||
|
"""class-aware result, return p[i], r[i], ap50[i], ap[i]"""
|
||||||
|
return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
|
||||||
|
|
||||||
|
def get_maps(self, nc):
|
||||||
|
maps = np.zeros(nc) + self.map
|
||||||
|
for i, c in enumerate(self.ap_class_index):
|
||||||
|
maps[c] = self.ap[i]
|
||||||
|
return maps
|
||||||
|
|
||||||
|
def update(self, results):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
results: tuple(p, r, ap, f1, ap_class)
|
||||||
|
"""
|
||||||
|
p, r, all_ap, f1, ap_class_index = results
|
||||||
|
self.p = p
|
||||||
|
self.r = r
|
||||||
|
self.all_ap = all_ap
|
||||||
|
self.f1 = f1
|
||||||
|
self.ap_class_index = ap_class_index
|
||||||
|
|
||||||
|
|
||||||
|
class Metrics:
|
||||||
|
"""Metric for boxes and masks."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.metric_box = Metric()
|
||||||
|
self.metric_mask = Metric()
|
||||||
|
|
||||||
|
def update(self, results):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
results: Dict{'boxes': Dict{}, 'masks': Dict{}}
|
||||||
|
"""
|
||||||
|
self.metric_box.update(list(results["boxes"].values()))
|
||||||
|
self.metric_mask.update(list(results["masks"].values()))
|
||||||
|
|
||||||
|
def mean_results(self):
|
||||||
|
return self.metric_box.mean_results() + self.metric_mask.mean_results()
|
||||||
|
|
||||||
|
def class_result(self, i):
|
||||||
|
return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
|
||||||
|
|
||||||
|
def get_maps(self, nc):
|
||||||
|
return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ap_class_index(self):
|
||||||
|
# boxes and masks have the same ap_class_index
|
||||||
|
return self.metric_box.ap_class_index
|
||||||
|
|
||||||
|
|
||||||
|
KEYS = [
|
||||||
|
"train/box_loss",
|
||||||
|
"train/seg_loss", # train loss
|
||||||
|
"train/obj_loss",
|
||||||
|
"train/cls_loss",
|
||||||
|
"metrics/precision(B)",
|
||||||
|
"metrics/recall(B)",
|
||||||
|
"metrics/mAP_0.5(B)",
|
||||||
|
"metrics/mAP_0.5:0.95(B)", # metrics
|
||||||
|
"metrics/precision(M)",
|
||||||
|
"metrics/recall(M)",
|
||||||
|
"metrics/mAP_0.5(M)",
|
||||||
|
"metrics/mAP_0.5:0.95(M)", # metrics
|
||||||
|
"val/box_loss",
|
||||||
|
"val/seg_loss", # val loss
|
||||||
|
"val/obj_loss",
|
||||||
|
"val/cls_loss",
|
||||||
|
"x/lr0",
|
||||||
|
"x/lr1",
|
||||||
|
"x/lr2",]
|
||||||
|
|
||||||
|
BEST_KEYS = [
|
||||||
|
"best/epoch",
|
||||||
|
"best/precision(B)",
|
||||||
|
"best/recall(B)",
|
||||||
|
"best/mAP_0.5(B)",
|
||||||
|
"best/mAP_0.5:0.95(B)",
|
||||||
|
"best/precision(M)",
|
||||||
|
"best/recall(M)",
|
||||||
|
"best/mAP_0.5(M)",
|
||||||
|
"best/mAP_0.5:0.95(M)",]
|
143
utils/segment/plots.py
Normal file
143
utils/segment/plots.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
import contextlib
|
||||||
|
import math
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from .. import threaded
|
||||||
|
from ..general import xywh2xyxy
|
||||||
|
from ..plots import Annotator, colors
|
||||||
|
|
||||||
|
|
||||||
|
@threaded
|
||||||
|
def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
|
||||||
|
# Plot image grid with labels
|
||||||
|
if isinstance(images, torch.Tensor):
|
||||||
|
images = images.cpu().float().numpy()
|
||||||
|
if isinstance(targets, torch.Tensor):
|
||||||
|
targets = targets.cpu().numpy()
|
||||||
|
if isinstance(masks, torch.Tensor):
|
||||||
|
masks = masks.cpu().numpy().astype(int)
|
||||||
|
|
||||||
|
max_size = 1920 # max image size
|
||||||
|
max_subplots = 16 # max image subplots, i.e. 4x4
|
||||||
|
bs, _, h, w = images.shape # batch size, _, height, width
|
||||||
|
bs = min(bs, max_subplots) # limit plot images
|
||||||
|
ns = np.ceil(bs ** 0.5) # number of subplots (square)
|
||||||
|
if np.max(images[0]) <= 1:
|
||||||
|
images *= 255 # de-normalise (optional)
|
||||||
|
|
||||||
|
# Build Image
|
||||||
|
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
|
||||||
|
for i, im in enumerate(images):
|
||||||
|
if i == max_subplots: # if last batch has fewer images than we expect
|
||||||
|
break
|
||||||
|
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
|
||||||
|
im = im.transpose(1, 2, 0)
|
||||||
|
mosaic[y:y + h, x:x + w, :] = im
|
||||||
|
|
||||||
|
# Resize (optional)
|
||||||
|
scale = max_size / ns / max(h, w)
|
||||||
|
if scale < 1:
|
||||||
|
h = math.ceil(scale * h)
|
||||||
|
w = math.ceil(scale * w)
|
||||||
|
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
|
||||||
|
|
||||||
|
# Annotate
|
||||||
|
fs = int((h + w) * ns * 0.01) # font size
|
||||||
|
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
|
||||||
|
for i in range(i + 1):
|
||||||
|
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
|
||||||
|
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
|
||||||
|
if paths:
|
||||||
|
annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
|
||||||
|
if len(targets) > 0:
|
||||||
|
idx = targets[:, 0] == i
|
||||||
|
ti = targets[idx] # image targets
|
||||||
|
|
||||||
|
boxes = xywh2xyxy(ti[:, 2:6]).T
|
||||||
|
classes = ti[:, 1].astype('int')
|
||||||
|
labels = ti.shape[1] == 6 # labels if no conf column
|
||||||
|
conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
|
||||||
|
|
||||||
|
if boxes.shape[1]:
|
||||||
|
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
|
||||||
|
boxes[[0, 2]] *= w # scale to pixels
|
||||||
|
boxes[[1, 3]] *= h
|
||||||
|
elif scale < 1: # absolute coords need scale if image scales
|
||||||
|
boxes *= scale
|
||||||
|
boxes[[0, 2]] += x
|
||||||
|
boxes[[1, 3]] += y
|
||||||
|
for j, box in enumerate(boxes.T.tolist()):
|
||||||
|
cls = classes[j]
|
||||||
|
color = colors(cls)
|
||||||
|
cls = names[cls] if names else cls
|
||||||
|
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||||
|
label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
|
||||||
|
annotator.box_label(box, label, color=color)
|
||||||
|
|
||||||
|
# Plot masks
|
||||||
|
if len(masks):
|
||||||
|
if masks.max() > 1.0: # mean that masks are overlap
|
||||||
|
image_masks = masks[[i]] # (1, 640, 640)
|
||||||
|
nl = len(ti)
|
||||||
|
index = np.arange(nl).reshape(nl, 1, 1) + 1
|
||||||
|
image_masks = np.repeat(image_masks, nl, axis=0)
|
||||||
|
image_masks = np.where(image_masks == index, 1.0, 0.0)
|
||||||
|
else:
|
||||||
|
image_masks = masks[idx]
|
||||||
|
|
||||||
|
im = np.asarray(annotator.im).copy()
|
||||||
|
for j, box in enumerate(boxes.T.tolist()):
|
||||||
|
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||||
|
color = colors(classes[j])
|
||||||
|
mh, mw = image_masks[j].shape
|
||||||
|
if mh != h or mw != w:
|
||||||
|
mask = image_masks[j].astype(np.uint8)
|
||||||
|
mask = cv2.resize(mask, (w, h))
|
||||||
|
mask = mask.astype(bool)
|
||||||
|
else:
|
||||||
|
mask = image_masks[j].astype(bool)
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
|
||||||
|
annotator.fromarray(im)
|
||||||
|
annotator.im.save(fname) # save
|
||||||
|
|
||||||
|
|
||||||
|
def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
|
||||||
|
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||||
|
save_dir = Path(file).parent if file else Path(dir)
|
||||||
|
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
||||||
|
ax = ax.ravel()
|
||||||
|
files = list(save_dir.glob("results*.csv"))
|
||||||
|
assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
data = pd.read_csv(f)
|
||||||
|
index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
|
||||||
|
0.1 * data.values[:, 11])
|
||||||
|
s = [x.strip() for x in data.columns]
|
||||||
|
x = data.values[:, 0]
|
||||||
|
for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
|
||||||
|
y = data.values[:, j]
|
||||||
|
# y[y == 0] = np.nan # don't show zero values
|
||||||
|
ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
|
||||||
|
if best:
|
||||||
|
# best
|
||||||
|
ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
|
||||||
|
ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
|
||||||
|
else:
|
||||||
|
# last
|
||||||
|
ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
|
||||||
|
ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
|
||||||
|
# if j in [8, 9, 10]: # share train and val loss y axes
|
||||||
|
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Plotting error for {f}: {e}")
|
||||||
|
ax[1].legend()
|
||||||
|
fig.savefig(save_dir / "results.png", dpi=200)
|
||||||
|
plt.close()
|
@ -47,7 +47,7 @@ def smartCrossEntropyLoss(label_smoothing=0.0):
|
|||||||
if check_version(torch.__version__, '1.10.0'):
|
if check_version(torch.__version__, '1.10.0'):
|
||||||
return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
|
return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
|
||||||
if label_smoothing > 0:
|
if label_smoothing > 0:
|
||||||
LOGGER.warning(f'WARNING: label smoothing {label_smoothing} requires torch>=1.10.0')
|
LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
|
||||||
return nn.CrossEntropyLoss()
|
return nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
|
||||||
@ -251,6 +251,7 @@ def fuse_conv_and_bn(conv, bn):
|
|||||||
kernel_size=conv.kernel_size,
|
kernel_size=conv.kernel_size,
|
||||||
stride=conv.stride,
|
stride=conv.stride,
|
||||||
padding=conv.padding,
|
padding=conv.padding,
|
||||||
|
dilation=conv.dilation,
|
||||||
groups=conv.groups,
|
groups=conv.groups,
|
||||||
bias=True).requires_grad_(False).to(conv.weight.device)
|
bias=True).requires_grad_(False).to(conv.weight.device)
|
||||||
|
|
||||||
|
85
utils/triton.py
Normal file
85
utils/triton.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||||
|
""" Utils to interact with the Triton Inference Server
|
||||||
|
"""
|
||||||
|
|
||||||
|
import typing
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class TritonRemoteModel:
|
||||||
|
""" A wrapper over a model served by the Triton Inference Server. It can
|
||||||
|
be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
|
||||||
|
as input and returns them as outputs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, url: str):
|
||||||
|
"""
|
||||||
|
Keyword arguments:
|
||||||
|
url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
|
||||||
|
"""
|
||||||
|
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
if parsed_url.scheme == "grpc":
|
||||||
|
from tritonclient.grpc import InferenceServerClient, InferInput
|
||||||
|
|
||||||
|
self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client
|
||||||
|
model_repository = self.client.get_model_repository_index()
|
||||||
|
self.model_name = model_repository.models[0].name
|
||||||
|
self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
|
||||||
|
|
||||||
|
def create_input_placeholders() -> typing.List[InferInput]:
|
||||||
|
return [
|
||||||
|
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
|
||||||
|
|
||||||
|
else:
|
||||||
|
from tritonclient.http import InferenceServerClient, InferInput
|
||||||
|
|
||||||
|
self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client
|
||||||
|
model_repository = self.client.get_model_repository_index()
|
||||||
|
self.model_name = model_repository[0]['name']
|
||||||
|
self.metadata = self.client.get_model_metadata(self.model_name)
|
||||||
|
|
||||||
|
def create_input_placeholders() -> typing.List[InferInput]:
|
||||||
|
return [
|
||||||
|
InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
|
||||||
|
|
||||||
|
self._create_input_placeholders_fn = create_input_placeholders
|
||||||
|
|
||||||
|
@property
|
||||||
|
def runtime(self):
|
||||||
|
"""Returns the model runtime"""
|
||||||
|
return self.metadata.get("backend", self.metadata.get("platform"))
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
|
||||||
|
""" Invokes the model. Parameters can be provided via args or kwargs.
|
||||||
|
args, if provided, are assumed to match the order of inputs of the model.
|
||||||
|
kwargs are matched with the model input names.
|
||||||
|
"""
|
||||||
|
inputs = self._create_inputs(*args, **kwargs)
|
||||||
|
response = self.client.infer(model_name=self.model_name, inputs=inputs)
|
||||||
|
result = []
|
||||||
|
for output in self.metadata['outputs']:
|
||||||
|
tensor = torch.as_tensor(response.as_numpy(output['name']))
|
||||||
|
result.append(tensor)
|
||||||
|
return result[0] if len(result) == 1 else result
|
||||||
|
|
||||||
|
def _create_inputs(self, *args, **kwargs):
|
||||||
|
args_len, kwargs_len = len(args), len(kwargs)
|
||||||
|
if not args_len and not kwargs_len:
|
||||||
|
raise RuntimeError("No inputs provided.")
|
||||||
|
if args_len and kwargs_len:
|
||||||
|
raise RuntimeError("Cannot specify args and kwargs at the same time")
|
||||||
|
|
||||||
|
placeholders = self._create_input_placeholders_fn()
|
||||||
|
if args_len:
|
||||||
|
if args_len != len(placeholders):
|
||||||
|
raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
|
||||||
|
for input, value in zip(placeholders, args):
|
||||||
|
input.set_data_from_numpy(value.cpu().numpy())
|
||||||
|
else:
|
||||||
|
for input in placeholders:
|
||||||
|
value = kwargs[input.name]
|
||||||
|
input.set_data_from_numpy(value.cpu().numpy())
|
||||||
|
return placeholders
|
Loading…
x
Reference in New Issue
Block a user