"""
GazeDistance feature detector class for 2 person gaze interaction components.
"""
import logging
import os
import numpy as np
from ....utils import linear_algebra as alg
from ..base_feature import BaseFeature
from ..gaze_interaction import utils as gaze_interaction_utils
[docs]class GazeDistance(BaseFeature):
"""
The GazeDistance class is a feature detector that computes the gaze_interaction
component.
The GazeDistance feature detector accepts two primary inputs: the gaze_individual
and face_landmarks components. These components are computed using the
gaze_individual and body_joints method detectors, respectively. This feature
detector calculates the smallest distance between a gaze direction vector and face
landmarks within a 2-person context.
Additionally, it has the ability to determine whether the gaze is directed
at the face and if the gaze interaction is mutual.
Component: gaze_interaction
Attributes:
components (list): A list containing the name of the component this class is
responsible for:
gaze_interaction:
- distance_gaze , distances from the gaze (of person 1) to the face
(of person 2)
- gaze_look_at , boolean array indicating whether the gaze is directed
at the face
- gaze_mutual , boolean array indicating whether the gaze is mutual
algorithm (str): The name of the algorithm used to compute the components
(gaze_interaction).
gaze_detector_file_list (list): A list of file paths for the gaze detector
output.
threshold_look_at (float): The threshold value for determining whether the
gaze is directed at the face.
"""
components = ["gaze_interaction"]
algorithm = "gaze_distance"
def __init__(self, config, io, data):
"""
Setup the GazeDistance feature detector and extract gaze component from method
detector output.
This method initializes the GazeDistance class by setting up the necessary
configurations, input/output handler, and data. It also extracts the gaze
component and algorithm from the configuration and prepares the list of gaze
detector output files. It supports handling of multiple cameras.
Args:
config (dict): The configuration settings for the feature detector. It
should include 'input_detector_names' key which contains gaze component
and algorithm.
io (class): The input/output handler , including
'get_detector_output_folder' method which returns the output folder for
the gaze detector.
data (class): The data class.
"""
super().__init__(config, io, data, requires_out_folder=False)
# Extract gaze component and algorithm from the config
gaze_component, gaze_algorithm = [
name
for name in config["input_detector_names"]
if any(["gaze" in s for s in name])
][0]
gaze_out_folder = io.get_detector_output_folder(
gaze_component, gaze_algorithm, "output"
)
self.gaze_detector_file_list = [
[
os.path.join(gaze_out_folder, f)
for f in os.listdir(gaze_out_folder)
if "cam3" in f
],
[
os.path.join(gaze_out_folder, f)
for f in os.listdir(gaze_out_folder)
if "cam4" in f
],
]
self.threshold_look_at = config["threshold_look_at"]
logging.info(f"Feature detector for component {self.components} initialized.")
[docs] def compute(self):
"""
This method computes the gaze_interaction component and saves the results as a
compressed .npz file.
It calculates the Euclidean distance between gaze direction vectors and face
landmarks within a 2-person context. The distance is calculated between
adjacent frames, measuring the change from t to t-1. The first frame will be
empty.
The method also determines whether the gaze is directed at the face (look_at)
and if the gaze interaction is mutual.
The results are saved as a compressed .npz file with the following structure:
- distance_gaze: smallest distances from the gaze vector (of person A) to the
face (of person B), and vice versa.
- gaze_look_at: a boolean array indicating whether the gaze is directed at the
face
- gaze_mutual: a boolean array indicating whether the gaze is mutual
- data_description: A dictionary containing the data description for all of the
above output numpy arrays. See the documentation of the output for more
details.
Returns:
visualization_data (list): A list containing the distances from the gaze to
the face, a boolean array indicating whether the gaze is directed at the
face, and a boolean array indicating whether the gaze is mutual.
"""
gaze_data = np.load(
self.get_input(self.input_files, "gaze", listdir=False), allow_pickle=True
)
camera_names = gaze_data["data_description"].item()["landmarks_2d"]["axis1"]
dim = "2d" if len(camera_names) == 1 else "3d"
if f"{dim}_filtered" in gaze_data["data_description"].item().keys(): # noqa: SIM118
data_name = f"{dim}_filtered"
else:
data_name = dim
gaze = gaze_data[data_name]
gaze_description = gaze_data["data_description"].item()[data_name]
if dim == "3d":
keypoints_data = np.load(
self.get_input(self.input_files, "landmarks", listdir=False),
allow_pickle=True,
)
keypoints = keypoints_data["3d"]
keypoints_description = keypoints_data["data_description"].item()["3d"]
indices = [
"face_landmarks" in key for key in keypoints_description["axis3"]
]
head = keypoints[:, :, :, indices].mean(axis=-2)
else:
head = gaze_data["landmarks_2d"].mean(axis=-2)
keypoints_description = gaze_data["data_description"].item()["landmarks_2d"]
assert gaze_description["axis0"] == keypoints_description["axis0"]
subject_description = gaze_description["axis0"]
if len(subject_description) < 2:
logging.info(
f"The selected data shows {len(subject_description)} subjects. "
"Gaze interaction can not be calculated."
)
return None
distance_p1 = alg.distance_line_point(head[0], gaze[0], head[1])
distance_p2 = alg.distance_line_point(head[1], gaze[1], head[0])
distances_face = np.stack((distance_p1, distance_p2), axis=0)
# calculate look_at and mutual_gaze
look_at = distances_face <= self.threshold_look_at
mutual = np.all(look_at, axis=0, keepdims=True)
visualization_data = [distances_face, look_at, mutual]
# reshape arrays
def reshape(arr):
return np.stack(
(
np.concatenate((np.zeros_like(arr[0]), arr[0]), axis=-1),
np.concatenate((arr[1], np.zeros_like(arr[1])), axis=-1),
),
axis=0,
)
distances_face = reshape(distances_face)
look_at = reshape(look_at)
mutual = reshape(np.concatenate((mutual, mutual), axis=0))
# save as npz file
data_description = dict(
axis0=subject_description, axis1=[dim], axis2=gaze_description["axis2"]
)
out_dict = {
f"distance_gaze_{dim}": distances_face,
f"gaze_look_at_{dim}": look_at,
f"gaze_mutual_{dim}": mutual,
"data_description": {
f"distance_gaze_{dim}": dict(
**data_description,
axis3=[f"to_face_{subj}" for subj in subject_description],
),
f"gaze_look_at_{dim}": dict(
**data_description,
axis3=[f"look_at_{subj}" for subj in subject_description],
),
f"gaze_mutual_{dim}": dict(
**data_description,
axis3=[f"with_{subj}" for subj in subject_description],
),
},
}
filepath = os.path.join(
self.result_folders["gaze_interaction"], f"{self.algorithm}.npz"
)
np.savez_compressed(filepath, **out_dict)
logging.info(
f"Computation of feature detector for {self.components} completed."
)
return visualization_data
[docs] def visualization(self, data):
"""
Creates visualizations for the computed gaze interaction features.
This method generates line graphs showing the distance between
gaze points and face landmarks, binary graphs indicating whether
the gaze is directed at the face, and binary graphs indicating
whether the gaze is mutual. Additionally, it creates videos of
these line graphs evolving over time.
Args:
data (tuple): Output data from the compute method containing:
- the distances of the gaze to the face
- a boolean array indicating whether the gaze is directed at the face
- a boolean array indicating whether the gaze is mutual.
Returns:
None
"""
if data is not None:
logging.info(
f"Visualizing the feature detector output {self.components}."
f"This may take longer due to the evolving linegraph video creation."
)
distances, look_at, mutual = data
mutual = np.concatenate((mutual, mutual), axis=0)
# Determine global_min and global_max - define y-lims of graphs
# global_min = min(distances[0].min(), distances[1].min())
# global_max = max(distances[0].max(), distances[1].max())
# scale binary data
# look_at = look_at * (global_max - global_min) / 2
# look_at += global_min
# mutual = mutual * (global_max - global_min) / 2
# mutual += global_min
input_data = np.concatenate((distances, look_at, mutual), axis=-1)
categories = ["distance_gaze_face", "gaze_look_at", "gaze_mutual"]
gaze_interaction_utils.visualize_gaze_interaction(
input_data, categories, self.viz_folder, self.subjects_descr
)
logging.info(
f"Visualization of feature detector {self.components} completed."
)
[docs] def post_compute(self, data):
pass