Source code for nicetoolbox.detectors.method_detectors.mmpose.pose_utils

"""
Pose estimation utilities. # TODO: Move to a more appropriate location?
"""

import logging
from typing import Optional

import numpy as np
import scipy.interpolate as interp


[docs]def interpolate_data(data, is_3d=True, max_empty=10): # TODO make max_empty 1/3 of FPS """ Interpolates missing data in the given multi-dimensional array using scipy's interp1d function. Args: data (ndarray): The input data array with shape (num_persons, num_cameras, num_frames, num_keypoints, _). is_3d (bool, optional): Indicates whether the data is 3D or not. Defaults to True. max_empty (int, optional): The maximum number of consecutive empty frames allowed. Defaults to 10. Returns: ndarray: The interpolated data array with the same shape as the input data. """ num_people, num_cameras, _, num_keypoints, _ = data.shape for i in range(num_people): for j in range(num_cameras): for k in range(num_keypoints): x = data[i, j, :, k, 0] y = data[i, j, :, k, 1] z = None if is_3d: z = data[i, j, :, k, 2] # Check for NaNs and only proceed if there are any if np.isnan(x).any() or np.isnan(y).any(): valid = ~np.isnan(x) valid_idx = np.where(valid)[0] # Check gaps in valid indices and filter out large gaps if valid_idx.size > 1: gaps = np.diff(valid_idx) small_gaps_idx = np.where(gaps <= max_empty)[0] small_gaps_valid_idx = valid_idx[small_gaps_idx] if small_gaps_valid_idx.size > 0: small_gaps_valid_idx = np.append(small_gaps_valid_idx, valid_idx[small_gaps_idx[-1] + 1]) if small_gaps_valid_idx.size > 1: # Need at least two points to interpolate # Create interpolation functions for bounded regions f_x = interp.interp1d( small_gaps_valid_idx, x[small_gaps_valid_idx], kind="linear", bounds_error=False, fill_value=np.nan, ) f_y = interp.interp1d( small_gaps_valid_idx, y[small_gaps_valid_idx], kind="linear", bounds_error=False, fill_value=np.nan, ) f_z = None if is_3d: f_z = interp.interp1d( small_gaps_valid_idx, z[small_gaps_valid_idx], kind="linear", bounds_error=False, fill_value=np.nan, ) # Apply interpolation only within the gaps for gap_start, gap_end in zip(small_gaps_valid_idx[:-1], small_gaps_valid_idx[1:]): data[i, j, gap_start : gap_end + 1, k, 0] = f_x(np.arange(gap_start, gap_end + 1)) data[i, j, gap_start : gap_end + 1, k, 1] = f_y(np.arange(gap_start, gap_end + 1)) if is_3d: data[i, j, gap_start : gap_end + 1, k, 2] = f_z(np.arange(gap_start, gap_end + 1)) return data
[docs]def create_iou_all_pairs(data): """ Compute the intersection over union (IoU) of subject bounding boxes across frames and cameras. Args: data (np.ndarray): Bounding box array with shape (#Subject, #Camera, #Frame, 'full_body', BBox), where each BBox is represented as [x1, y1, x2, y2, conf]. Returns: iou_array (np.ndarray) : float32 IoU laid out as (subject, camera, frame, with_subject). """ if data.shape[-1] != 5: logging.error("Last dim must contain bbox values [x1,y1,x2,y2] and confidence score.") raise ValueError # boxes: (S, C, F, 1, 4) with coords (x1, y1, x2, y2) in the last dim, 4th dimension is "full_body" label boxes = data[..., 0, :4].astype(np.float32) x1_raw, y1_raw, x2_raw, y2_raw = boxes[..., 0], boxes[..., 1], boxes[..., 2], boxes[..., 3] x1 = np.minimum(x1_raw, x2_raw) x2 = np.maximum(x1_raw, x2_raw) y1 = np.minimum(y1_raw, y2_raw) y2 = np.maximum(y1_raw, y2_raw) w = x2 - x1 h = y2 - y1 invalid_mask = (w <= 0) | (h <= 0) if np.any(invalid_mask): num_invalid = np.sum(invalid_mask) logging.error( f"Found {num_invalid} boxes with non-positive area." f"The area will be saved as zero for that cases" ) # Areas (safe) w = np.maximum(0.0, w) h = np.maximum(0.0, h) # Area per subject/camera/frame area = w * h # (S,C,F) # Pairwise intersections across subjects -> (S,S,C,F) # This creates an intersection coordinates matrix for all subjects pairs # For two speakers: # | S1 S2 # ================================ # S1 | S1 max(S1, S2) # S2 | max(S2, S1) S2 # # If we interested in intersection bbox of S1 and S2 speaker # It will be (inter_x1, inter_y1) (inter_x2, inter_y2) inter_x1 = np.maximum(x1[:, None, :, :], x1[None, :, :, :]) inter_y1 = np.maximum(y1[:, None, :, :], y1[None, :, :, :]) inter_x2 = np.minimum(x2[:, None, :, :], x2[None, :, :, :]) inter_y2 = np.minimum(y2[:, None, :, :], y2[None, :, :, :]) # Calculate width, height and area of intersection bbox # If there is no intersection, difference would be negative # We clamp it to 0, to mark no intersection inter_w = np.maximum(0.0, inter_x2 - inter_x1) inter_h = np.maximum(0.0, inter_y2 - inter_y1) inter = inter_w * inter_h # (S,S,C,F) # We sum pairwise original areas and remove intersections areas from them # This result pairwise union areas matrix (S,S,C,F) union = area[:, None, :, :] + area[None, :, :, :] - inter with np.errstate(divide="ignore", invalid="ignore"): iou_sscf = inter / union # Reorder to array: (S, C, F, S) iou_scfs = np.transpose(iou_sscf, (0, 2, 3, 1)).astype(np.float32) return iou_scfs
[docs]def merge_2d_for_pose_overlay( interpolated: np.ndarray, projected: Optional[np.ndarray] = None, raw: Optional[np.ndarray] = None, ) -> np.ndarray: """ Stack 2D pose layers for OpenCV overlay videos: prefer ``projected`` when provided and finite, else temporally interpolated 2D, then raw detector output. All arrays share shape (..., 3) with x, y, score/conf in the last dimension. """ out = np.array(interpolated, dtype=np.float64, copy=True) if projected is not None: p = np.asarray(projected, dtype=np.float64) use_p = np.isfinite(p[..., 0]) & np.isfinite(p[..., 1]) out = np.where(use_p[..., np.newaxis], p, out) if raw is not None: r = np.asarray(raw, dtype=np.float64) use_r = np.isfinite(r[..., 0]) & np.isfinite(r[..., 1]) need = ~(np.isfinite(out[..., 0]) & np.isfinite(out[..., 1])) pick = need & use_r out = np.where(pick[..., np.newaxis], r, out) return out.astype(np.float32, copy=False)