Source code for cv2ext.tracking.trackers._klt

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License

from __future__ import annotations

from typing import TYPE_CHECKING

import cv2
import numpy as np

from cv2ext.bboxes import constrain
from cv2ext.tracking._interface import AbstractMultiTracker, AbstractTracker

if TYPE_CHECKING:
    from typing_extensions import Self


[docs] class KLTTracker(AbstractTracker): """Class for tracking objects with the KLT algorithm.""" def __init__( self: Self, num_features: int = 500, window_size: tuple[int, int] = (15, 15), max_level: int = 2, criteria: tuple[int, int, float] = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03, ), ) -> None: """ Create a new KLTTracker object. Parameters ---------- num_features : int The number of features to track. By default, this is set to 500. window_size : tuple[int, int] The size of the window used for tracking. By default, this is set to (15, 15). max_level : int The maximum pyramid level for tracking. By default, this is set to 2. criteria : tuple[int, int, float] The criteria used for tracking. By default, this is set to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03). """ self._window_size = window_size self._max_level = max_level self._criteria = criteria self._lk_params = { "winSize": self._window_size, "maxLevel": self._max_level, "criteria": self._criteria, } self._orb: cv2.ORB = cv2.ORB_create(nfeatures=num_features) # type: ignore[attr-defined] # state storage self._prev_frame: np.ndarray = np.zeros((1, 1)) self._prev_bbox: tuple[int, int, int, int] = (0, 0, 0, 0) self._prev_keypoints: np.ndarray = np.zeros((1, 1)) def _detect_keypoints(self: Self, image: np.ndarray) -> np.ndarray: keypoints = self._orb.detect(image, None) np_keypoints: np.ndarray = np.asarray( [kp.pt for kp in keypoints], dtype=np.float32, ) return np_keypoints
[docs] def init(self: Self, image: np.ndarray, bbox: tuple[int, int, int, int]) -> None: """ Initialize the tracker. Parameters ---------- image : np.ndarray The image to track the object in. bbox : tuple[int, int, int, int] The bounding box of the object to track. In format: (x1, y1, x2, y2) """ if len(image.shape) == 3 and image.shape[2] == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = image.shape[:2] self._prev_bbox = constrain(bbox, (width, height)) self._prev_frame = image self._prev_keypoints = self._detect_keypoints(image)
[docs] def update(self: Self, image: np.ndarray) -> tuple[bool, tuple[int, int, int, int]]: """ Update the tracker. Parameters ---------- image : np.ndarray The image to track the object in. Returns ------- bool Whether the update was successful. tuple[int, int, int, int] The bounding box of the object. In format: (x1, y1, x2, y2) """ if len(image.shape) == 3 and image.shape[2] == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) data = cv2.calcOpticalFlowPyrLK( # type: ignore[call-overload] self._prev_frame, image, self._prev_keypoints, None, **self._lk_params, ) current_keypoints = data[0] status: np.ndarray = data[1] mask = status.ravel() == 1 current_keypoints = current_keypoints[mask] x, y, w, h = cv2.boundingRect(current_keypoints) bbox = (x, y, x + w, y + h) self._prev_frame = image self._prev_keypoints = current_keypoints return True, bbox
[docs] class KLTMultiTracker(AbstractMultiTracker): """Class for tracking objects with the KLT algorithm.""" def __init__( self: Self, num_features: int = 750, window_size: tuple[int, int] = (15, 15), max_level: int = 2, criteria: tuple[int, int, float] = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03, ), ) -> None: """ Create a new KLTMultiTracker object. Parameters ---------- num_features : int The number of features to track. By default, this is set to 750. window_size : tuple[int, int] The size of the window used for tracking. By default, this is set to (15, 15). max_level : int The maximum pyramid level for tracking. By default, this is set to 2. criteria : tuple[int, int, float] The criteria used for tracking. By default, this is set to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03). """ self._window_size = window_size self._max_level = max_level self._criteria = criteria self._lk_params = { "winSize": self._window_size, "maxLevel": self._max_level, "criteria": self._criteria, } self._orb: cv2.ORB = cv2.ORB_create(nfeatures=num_features) # type: ignore[attr-defined] # state storage self._prev_frame: np.ndarray = np.zeros((1, 1)) self._prev_bboxes: list[tuple[int, int, int, int]] = [] self._prev_keypoints: list[np.ndarray] = [] def _detect_keypoints(self: Self, image: np.ndarray) -> np.ndarray: keypoints = self._orb.detect(image, None) np_keypoints: np.ndarray = np.asarray( [kp.pt for kp in keypoints], dtype=np.float32, ) return np_keypoints
[docs] def init( self: Self, image: np.ndarray, bboxes: list[tuple[int, int, int, int]], ) -> None: """ Initialize the tracker. Parameters ---------- image : np.ndarray The image to track the object in. bboxes : list[tuple[int, int, int, int]] The bounding boxes of the objects to track. In format: (x1, y1, x2, y2) """ # store image and convert accordingly if len(image.shape) == 3 and image.shape[2] == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) self._prev_frame = image # ensure constraint on bounding boxes height, width = image.shape[:2] bboxes = [constrain(bbox, (width, height)) for bbox in bboxes] self._prev_bboxes = bboxes # match keypoints to boxes global_keypoints = self._detect_keypoints(image) self._prev_keypoints = [ global_keypoints[ (global_keypoints[:, 0] >= x1) & (global_keypoints[:, 0] <= x2) & (global_keypoints[:, 1] >= y1) & (global_keypoints[:, 1] <= y2) ] for x1, y1, x2, y2 in bboxes ]
[docs] def update( self: Self, image: np.ndarray, ) -> list[tuple[bool, tuple[int, int, int, int]]]: """ Update the tracker. Parameters ---------- image : np.ndarray The image to track the object in. Returns ------- list[tuple[bool, tuple[int, int, int, int]]] A list of outputs of bool, tuple[int, int, int, int] Whether or not the track was successful and the bounding box bbox is format: (x1, y1, x2, y2) """ # convert frame if needed if len(image.shape) == 3 and image.shape[2] == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) results: list[tuple[bool, tuple[int, int, int, int]]] = [] for i, (prev_kp, bbox) in enumerate( zip(self._prev_keypoints, self._prev_bboxes), ): # if no keypoints simply skip if prev_kp.size == 0: results.append((False, bbox)) continue # compute new box using optical flow new_kp, status, _ = cv2.calcOpticalFlowPyrLK( # type: ignore[call-overload] self._prev_frame, image, prev_kp, None, **self._lk_params, ) # mask based on valid keypoints mask = status.ravel() == 1 new_kp = new_kp[mask] if new_kp is not None else np.array([]) if new_kp.size == 0: results.append((False, bbox)) continue # create the new bbox x, y, w, h = cv2.boundingRect(new_kp) new_bbox = (x, y, x + w, y + h) # update state self._prev_keypoints[i] = new_kp self._prev_bboxes[i] = new_bbox results.append((True, new_bbox)) # update frame state and return self._prev_frame = image return results