Source code for nicetoolbox.detectors.method_detectors.mmpose.pose_utils

"""
Pose estimation utilities. # TODO: Move to a more appropriate location?
"""

import logging
from typing import Optional

import numpy as np
import scipy.interpolate as interp


[docs]def interpolate_data(data, is_3d=True, max_empty=10):  # TODO make max_empty 1/3 of FPS
    """
    Interpolates missing data in the given multi-dimensional array using scipy's
    interp1d function.

    Args:
        data (ndarray): The input data array with shape
            (num_persons, num_cameras, num_frames, num_keypoints, _).
        is_3d (bool, optional): Indicates whether the data is 3D or not.
            Defaults to True.
        max_empty (int, optional): The maximum number of consecutive empty frames
            allowed. Defaults to 10.

    Returns:
        ndarray: The interpolated data array with the same shape as the input data.

    """
    num_people, num_cameras, _, num_keypoints, _ = data.shape
    for i in range(num_people):
        for j in range(num_cameras):
            for k in range(num_keypoints):
                x = data[i, j, :, k, 0]
                y = data[i, j, :, k, 1]
                z = None
                if is_3d:
                    z = data[i, j, :, k, 2]

                # Check for NaNs and only proceed if there are any
                if np.isnan(x).any() or np.isnan(y).any():
                    valid = ~np.isnan(x)
                    valid_idx = np.where(valid)[0]

                    # Check gaps in valid indices and filter out large gaps
                    if valid_idx.size > 1:
                        gaps = np.diff(valid_idx)
                        small_gaps_idx = np.where(gaps <= max_empty)[0]
                        small_gaps_valid_idx = valid_idx[small_gaps_idx]

                        if small_gaps_valid_idx.size > 0:
                            small_gaps_valid_idx = np.append(small_gaps_valid_idx, valid_idx[small_gaps_idx[-1] + 1])

                        if small_gaps_valid_idx.size > 1:  # Need at least two points to interpolate
                            # Create interpolation functions for bounded regions
                            f_x = interp.interp1d(
                                small_gaps_valid_idx,
                                x[small_gaps_valid_idx],
                                kind="linear",
                                bounds_error=False,
                                fill_value=np.nan,
                            )
                            f_y = interp.interp1d(
                                small_gaps_valid_idx,
                                y[small_gaps_valid_idx],
                                kind="linear",
                                bounds_error=False,
                                fill_value=np.nan,
                            )
                            f_z = None
                            if is_3d:
                                f_z = interp.interp1d(
                                    small_gaps_valid_idx,
                                    z[small_gaps_valid_idx],
                                    kind="linear",
                                    bounds_error=False,
                                    fill_value=np.nan,
                                )

                            # Apply interpolation only within the gaps
                            for gap_start, gap_end in zip(small_gaps_valid_idx[:-1], small_gaps_valid_idx[1:]):
                                data[i, j, gap_start : gap_end + 1, k, 0] = f_x(np.arange(gap_start, gap_end + 1))
                                data[i, j, gap_start : gap_end + 1, k, 1] = f_y(np.arange(gap_start, gap_end + 1))
                                if is_3d:
                                    data[i, j, gap_start : gap_end + 1, k, 2] = f_z(np.arange(gap_start, gap_end + 1))

    return data


[docs]def create_iou_all_pairs(data):
    """
    Compute the intersection over union (IoU) of subject bounding boxes across frames and cameras.

    Args:
    data (np.ndarray): Bounding box array with shape
    (#Subject, #Camera, #Frame, 'full_body', BBox), where each BBox
            is represented as [x1, y1, x2, y2, conf].

    Returns:
    iou_array (np.ndarray) :  float32
        IoU laid out as (subject, camera, frame, with_subject).

    """

    if data.shape[-1] != 5:
        logging.error("Last dim must contain bbox values [x1,y1,x2,y2] and confidence score.")
        raise ValueError

    # boxes: (S, C, F, 1, 4) with coords (x1, y1, x2, y2) in the last dim, 4th dimension is "full_body" label
    boxes = data[..., 0, :4].astype(np.float32)
    x1_raw, y1_raw, x2_raw, y2_raw = boxes[..., 0], boxes[..., 1], boxes[..., 2], boxes[..., 3]
    x1 = np.minimum(x1_raw, x2_raw)
    x2 = np.maximum(x1_raw, x2_raw)
    y1 = np.minimum(y1_raw, y2_raw)
    y2 = np.maximum(y1_raw, y2_raw)

    w = x2 - x1
    h = y2 - y1
    invalid_mask = (w <= 0) | (h <= 0)
    if np.any(invalid_mask):
        num_invalid = np.sum(invalid_mask)
        logging.error(
            f"Found {num_invalid} boxes with non-positive area." f"The area will be saved as zero for that cases"
        )
    # Areas (safe)
    w = np.maximum(0.0, w)
    h = np.maximum(0.0, h)
    # Area per subject/camera/frame
    area = w * h  # (S,C,F)

    # Pairwise intersections across subjects -> (S,S,C,F)
    # This creates an intersection coordinates matrix for all subjects pairs
    # For two speakers:
    #    |    S1              S2
    # ================================
    # S1 |    S1          max(S1, S2)
    # S2 | max(S2, S1)        S2
    #
    # If we interested in intersection bbox of S1 and S2 speaker
    # It will be (inter_x1, inter_y1) (inter_x2, inter_y2)
    inter_x1 = np.maximum(x1[:, None, :, :], x1[None, :, :, :])
    inter_y1 = np.maximum(y1[:, None, :, :], y1[None, :, :, :])
    inter_x2 = np.minimum(x2[:, None, :, :], x2[None, :, :, :])
    inter_y2 = np.minimum(y2[:, None, :, :], y2[None, :, :, :])

    # Calculate width, height and area of intersection bbox
    # If there is no intersection, difference would be negative
    # We clamp it to 0, to mark no intersection
    inter_w = np.maximum(0.0, inter_x2 - inter_x1)
    inter_h = np.maximum(0.0, inter_y2 - inter_y1)
    inter = inter_w * inter_h  # (S,S,C,F)

    # We sum pairwise original areas and remove intersections areas from them
    # This result pairwise union areas matrix (S,S,C,F)
    union = area[:, None, :, :] + area[None, :, :, :] - inter
    with np.errstate(divide="ignore", invalid="ignore"):
        iou_sscf = inter / union

    # Reorder to array: (S, C, F, S)
    iou_scfs = np.transpose(iou_sscf, (0, 2, 3, 1)).astype(np.float32)

    return iou_scfs


[docs]def merge_2d_for_pose_overlay(
    interpolated: np.ndarray,
    projected: Optional[np.ndarray] = None,
    raw: Optional[np.ndarray] = None,
) -> np.ndarray:
    """
    Stack 2D pose layers for OpenCV overlay videos: prefer ``projected`` when provided
    and finite, else temporally interpolated 2D, then raw detector output.

    All arrays share shape (..., 3) with x, y, score/conf in the last dimension.
    """
    out = np.array(interpolated, dtype=np.float64, copy=True)
    if projected is not None:
        p = np.asarray(projected, dtype=np.float64)
        use_p = np.isfinite(p[..., 0]) & np.isfinite(p[..., 1])
        out = np.where(use_p[..., np.newaxis], p, out)
    if raw is not None:
        r = np.asarray(raw, dtype=np.float64)
        use_r = np.isfinite(r[..., 0]) & np.isfinite(r[..., 1])
        need = ~(np.isfinite(out[..., 0]) & np.isfinite(out[..., 1]))
        pick = need & use_r
        out = np.where(pick[..., np.newaxis], r, out)
    return out.astype(np.float32, copy=False)