* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
242 lines
11 KiB
Python
242 lines
11 KiB
Python
|
|
from typing import Tuple
|
|
import pandas as pd
|
|
from utils import get_logger
|
|
|
|
|
|
BODY_IDENTIFIERS = [
|
|
"nose",
|
|
"neck",
|
|
"rightEye",
|
|
"leftEye",
|
|
"rightEar",
|
|
"leftEar",
|
|
"rightShoulder",
|
|
"leftShoulder",
|
|
"rightElbow",
|
|
"leftElbow",
|
|
"rightWrist",
|
|
"leftWrist"
|
|
]
|
|
|
|
|
|
def normalize_body_full(df: pd.DataFrame) -> Tuple[pd.DataFrame, list]:
|
|
"""
|
|
Normalizes the body position data using the Bohacek-normalization algorithm.
|
|
|
|
:param df: pd.DataFrame to be normalized
|
|
:return: pd.DataFrame with normalized values for body pose
|
|
"""
|
|
logger = get_logger(__name__)
|
|
|
|
# TODO: Fix division by zero
|
|
|
|
normalized_df = pd.DataFrame(columns=df.columns)
|
|
invalid_row_indexes = []
|
|
body_landmarks = {"X": [], "Y": []}
|
|
|
|
# Construct the relevant identifiers
|
|
for identifier in BODY_IDENTIFIERS:
|
|
body_landmarks["X"].append(identifier + "_X")
|
|
body_landmarks["Y"].append(identifier + "_Y")
|
|
|
|
# Iterate over all of the records in the dataset
|
|
for index, row in df.iterrows():
|
|
|
|
sequence_size = len(row["leftEar_Y"])
|
|
valid_sequence = True
|
|
original_row = row
|
|
|
|
last_starting_point, last_ending_point = None, None
|
|
|
|
# Treat each element of the sequence (analyzed frame) individually
|
|
for sequence_index in range(sequence_size):
|
|
|
|
# Prevent from even starting the analysis if some necessary elements are not present
|
|
if (row["leftShoulder_X"][sequence_index] == 0 or row["rightShoulder_X"][sequence_index] == 0) and \
|
|
(row["neck_X"][sequence_index] == 0 or row["nose_X"][sequence_index] == 0):
|
|
if not last_starting_point:
|
|
valid_sequence = False
|
|
continue
|
|
|
|
else:
|
|
starting_point, ending_point = last_starting_point, last_ending_point
|
|
|
|
else:
|
|
|
|
# NOTE:
|
|
#
|
|
# While in the paper, it is written that the head metric is calculated by halving the shoulder distance,
|
|
# this is meant for the distance between the very ends of one's shoulder, as literature studying body
|
|
# metrics and ratios generally states. The Vision Pose Estimation API, however, seems to be predicting
|
|
# rather the center of one's shoulder. Based on our experiments and manual reviews of the data,
|
|
# employing
|
|
# this as just the plain shoulder distance seems to be more corresponding to the desired metric.
|
|
#
|
|
# Please, review this if using other third-party pose estimation libraries.
|
|
|
|
if row["leftShoulder_X"][sequence_index] != 0 and row["rightShoulder_X"][sequence_index] != 0:
|
|
left_shoulder = (row["leftShoulder_X"][sequence_index], row["leftShoulder_Y"][sequence_index])
|
|
right_shoulder = (row["rightShoulder_X"][sequence_index], row["rightShoulder_Y"][sequence_index])
|
|
shoulder_distance = ((((left_shoulder[0] - right_shoulder[0]) ** 2) + (
|
|
(left_shoulder[1] - right_shoulder[1]) ** 2)) ** 0.5)
|
|
head_metric = shoulder_distance
|
|
else:
|
|
neck = (row["neck_X"][sequence_index], row["neck_Y"][sequence_index])
|
|
nose = (row["nose_X"][sequence_index], row["nose_Y"][sequence_index])
|
|
neck_nose_distance = ((((neck[0] - nose[0]) ** 2) + ((neck[1] - nose[1]) ** 2)) ** 0.5)
|
|
head_metric = neck_nose_distance
|
|
|
|
# Set the starting and ending point of the normalization bounding box
|
|
starting_point = [row["neck_X"][sequence_index] - 3 * head_metric,
|
|
row["leftEye_Y"][sequence_index] + (head_metric / 2)]
|
|
ending_point = [row["neck_X"][sequence_index] + 3 * head_metric, starting_point[1] - 6 * head_metric]
|
|
|
|
last_starting_point, last_ending_point = starting_point, ending_point
|
|
|
|
# Ensure that all of the bounding-box-defining coordinates are not out of the picture
|
|
if starting_point[0] < 0:
|
|
starting_point[0] = 0
|
|
if starting_point[1] < 0:
|
|
starting_point[1] = 0
|
|
if ending_point[0] < 0:
|
|
ending_point[0] = 0
|
|
if ending_point[1] < 0:
|
|
ending_point[1] = 0
|
|
|
|
# Normalize individual landmarks and save the results
|
|
for identifier in BODY_IDENTIFIERS:
|
|
key = identifier + "_"
|
|
|
|
# Prevent from trying to normalize incorrectly captured points
|
|
if row[key + "X"][sequence_index] == 0:
|
|
continue
|
|
|
|
normalized_x = (row[key + "X"][sequence_index] - starting_point[0]) / (ending_point[0] -
|
|
starting_point[0])
|
|
normalized_y = (row[key + "Y"][sequence_index] - ending_point[1]) / (starting_point[1] -
|
|
ending_point[1])
|
|
|
|
row[key + "X"][sequence_index] = normalized_x
|
|
row[key + "Y"][sequence_index] = normalized_y
|
|
|
|
if valid_sequence:
|
|
normalized_df = normalized_df.append(row, ignore_index=True)
|
|
else:
|
|
logger.warning(" BODY LANDMARKS: One video instance could not be normalized.")
|
|
normalized_df = normalized_df.append(original_row, ignore_index=True)
|
|
invalid_row_indexes.append(index)
|
|
|
|
logger.info("The normalization of body is finished.")
|
|
logger.info("\t-> Original size:", df.shape[0])
|
|
logger.info("\t-> Normalized size:", normalized_df.shape[0])
|
|
logger.info("\t-> Problematic videos:", len(invalid_row_indexes))
|
|
|
|
return normalized_df, invalid_row_indexes
|
|
|
|
|
|
def normalize_single_dict(row: dict):
|
|
"""
|
|
Normalizes the skeletal data for a given sequence of frames with signer's body pose data. The normalization follows
|
|
the definition from our paper.
|
|
|
|
:param row: Dictionary containing key-value pairs with joint identifiers and corresponding lists (sequences) of
|
|
that particular joints coordinates
|
|
:return: Dictionary with normalized skeletal data (following the same schema as input data)
|
|
"""
|
|
|
|
sequence_size = len(row["leftEar"])
|
|
valid_sequence = True
|
|
original_row = row
|
|
logger = get_logger(__name__)
|
|
|
|
last_starting_point, last_ending_point = None, None
|
|
|
|
# Treat each element of the sequence (analyzed frame) individually
|
|
for sequence_index in range(sequence_size):
|
|
left_shoulder = (row["leftShoulder"][sequence_index][0], row["leftShoulder"][sequence_index][1])
|
|
right_shoulder = (row["rightShoulder"][sequence_index][0], row["rightShoulder"][sequence_index][1])
|
|
neck = (row["neck"][sequence_index][0], row["neck"][sequence_index][1])
|
|
nose = (row["nose"][sequence_index][0], row["nose"][sequence_index][1])
|
|
# Prevent from even starting the analysis if some necessary elements are not present
|
|
if (left_shoulder[0] == 0 or right_shoulder[0] == 0
|
|
or (left_shoulder[0] == right_shoulder[0] and left_shoulder[1] == right_shoulder[1])) and (
|
|
neck[0] == 0 or nose[0] == 0 or (neck[0] == nose[0] and neck[1] == nose[1])):
|
|
if not last_starting_point:
|
|
valid_sequence = False
|
|
continue
|
|
|
|
else:
|
|
starting_point, ending_point = last_starting_point, last_ending_point
|
|
|
|
else:
|
|
|
|
# NOTE:
|
|
#
|
|
# While in the paper, it is written that the head metric is calculated by halving the shoulder distance,
|
|
# this is meant for the distance between the very ends of one's shoulder, as literature studying body
|
|
# metrics and ratios generally states. The Vision Pose Estimation API, however, seems to be predicting
|
|
# rather the center of one's shoulder. Based on our experiments and manual reviews of the data, employing
|
|
# this as just the plain shoulder distance seems to be more corresponding to the desired metric.
|
|
#
|
|
# Please, review this if using other third-party pose estimation libraries.
|
|
|
|
if left_shoulder[0] != 0 and right_shoulder[0] != 0 and \
|
|
(left_shoulder[0] != right_shoulder[0] or left_shoulder[1] != right_shoulder[1]):
|
|
shoulder_distance = ((((left_shoulder[0] - right_shoulder[0]) ** 2) + (
|
|
(left_shoulder[1] - right_shoulder[1]) ** 2)) ** 0.5)
|
|
head_metric = shoulder_distance
|
|
else:
|
|
neck_nose_distance = ((((neck[0] - nose[0]) ** 2) + ((neck[1] - nose[1]) ** 2)) ** 0.5)
|
|
head_metric = neck_nose_distance
|
|
|
|
# Set the starting and ending point of the normalization bounding box
|
|
# starting_point = [row["neck"][sequence_index][0] - 3 * head_metric,
|
|
# row["leftEye"][sequence_index][1] + (head_metric / 2)]
|
|
starting_point = [row["neck"][sequence_index][0] - 3 * head_metric,
|
|
row["leftEye"][sequence_index][1] + head_metric]
|
|
ending_point = [row["neck"][sequence_index][0] + 3 * head_metric, starting_point[1] - 6 * head_metric]
|
|
|
|
last_starting_point, last_ending_point = starting_point, ending_point
|
|
|
|
# Ensure that all of the bounding-box-defining coordinates are not out of the picture
|
|
if starting_point[0] < 0:
|
|
starting_point[0] = 0
|
|
if starting_point[1] < 0:
|
|
starting_point[1] = 0
|
|
if ending_point[0] < 0:
|
|
ending_point[0] = 0
|
|
if ending_point[1] < 0:
|
|
ending_point[1] = 0
|
|
|
|
# Normalize individual landmarks and save the results
|
|
for identifier in BODY_IDENTIFIERS:
|
|
key = identifier
|
|
|
|
# Prevent from trying to normalize incorrectly captured points
|
|
if row[key][sequence_index][0] == 0:
|
|
continue
|
|
|
|
if (ending_point[0] - starting_point[0]) == 0 or (starting_point[1] - ending_point[1]) == 0:
|
|
logger.warning("Problematic normalization")
|
|
valid_sequence = False
|
|
break
|
|
|
|
normalized_x = (row[key][sequence_index][0] - starting_point[0]) / (ending_point[0] - starting_point[0])
|
|
normalized_y = (row[key][sequence_index][1] - ending_point[1]) / (starting_point[1] - ending_point[1])
|
|
|
|
row[key][sequence_index] = list(row[key][sequence_index])
|
|
|
|
row[key][sequence_index][0] = normalized_x
|
|
row[key][sequence_index][1] = normalized_y
|
|
|
|
if valid_sequence:
|
|
return row
|
|
|
|
else:
|
|
return original_row
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|