* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
196 lines
8.0 KiB
Python
196 lines
8.0 KiB
Python
|
|
import pandas as pd
|
|
from utils import get_logger
|
|
|
|
|
|
HAND_IDENTIFIERS = [
|
|
"wrist",
|
|
"indexTip",
|
|
"indexDIP",
|
|
"indexPIP",
|
|
"indexMCP",
|
|
"middleTip",
|
|
"middleDIP",
|
|
"middlePIP",
|
|
"middleMCP",
|
|
"ringTip",
|
|
"ringDIP",
|
|
"ringPIP",
|
|
"ringMCP",
|
|
"littleTip",
|
|
"littleDIP",
|
|
"littlePIP",
|
|
"littleMCP",
|
|
"thumbTip",
|
|
"thumbIP",
|
|
"thumbMP",
|
|
"thumbCMC"
|
|
]
|
|
|
|
|
|
def normalize_hands_full(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
Normalizes the hands position data using the Bohacek-normalization algorithm.
|
|
|
|
:param df: pd.DataFrame to be normalized
|
|
:return: pd.DataFrame with normalized values for hand pose
|
|
"""
|
|
|
|
logger = get_logger(__name__)
|
|
# TODO: Fix division by zero
|
|
df.columns = [item.replace("_left_", "_0_").replace("_right_", "_1_") for item in list(df.columns)]
|
|
|
|
normalized_df = pd.DataFrame(columns=df.columns)
|
|
|
|
hand_landmarks = {"X": {0: [], 1: []}, "Y": {0: [], 1: []}}
|
|
|
|
# Determine how many hands are present in the dataset
|
|
range_hand_size = 1
|
|
if "wrist_1_X" in df.columns:
|
|
range_hand_size = 2
|
|
|
|
# Construct the relevant identifiers
|
|
for identifier in HAND_IDENTIFIERS:
|
|
for hand_index in range(range_hand_size):
|
|
hand_landmarks["X"][hand_index].append(identifier + "_" + str(hand_index) + "_X")
|
|
hand_landmarks["Y"][hand_index].append(identifier + "_" + str(hand_index) + "_Y")
|
|
|
|
# Iterate over all of the records in the dataset
|
|
for index, row in df.iterrows():
|
|
# Treat each hand individually
|
|
for hand_index in range(range_hand_size):
|
|
|
|
sequence_size = len(row["wrist_" + str(hand_index) + "_X"])
|
|
|
|
# Treat each element of the sequence (analyzed frame) individually
|
|
for sequence_index in range(sequence_size):
|
|
|
|
# Retrieve all of the X and Y values of the current frame
|
|
landmarks_x_values = [row[key][sequence_index]
|
|
for key in hand_landmarks["X"][hand_index] if row[key][sequence_index] != 0]
|
|
landmarks_y_values = [row[key][sequence_index]
|
|
for key in hand_landmarks["Y"][hand_index] if row[key][sequence_index] != 0]
|
|
|
|
# Prevent from even starting the analysis if some necessary elements are not present
|
|
if not landmarks_x_values or not landmarks_y_values:
|
|
logger.warning(
|
|
" HAND LANDMARKS: One frame could not be normalized as there is no data present. Record: " +
|
|
str(index) +
|
|
", Frame: " + str(sequence_index))
|
|
continue
|
|
|
|
# Calculate the deltas
|
|
width, height = max(landmarks_x_values) - min(landmarks_x_values), max(landmarks_y_values) - min(
|
|
landmarks_y_values)
|
|
if width > height:
|
|
delta_x = 0.1 * width
|
|
delta_y = delta_x + ((width - height) / 2)
|
|
else:
|
|
delta_y = 0.1 * height
|
|
delta_x = delta_y + ((height - width) / 2)
|
|
|
|
# Set the starting and ending point of the normalization bounding box
|
|
starting_point = (min(landmarks_x_values) - delta_x, min(landmarks_y_values) - delta_y)
|
|
ending_point = (max(landmarks_x_values) + delta_x, max(landmarks_y_values) + delta_y)
|
|
|
|
# Normalize individual landmarks and save the results
|
|
for identifier in HAND_IDENTIFIERS:
|
|
key = identifier + "_" + str(hand_index) + "_"
|
|
|
|
# Prevent from trying to normalize incorrectly captured points
|
|
if row[key + "X"][sequence_index] == 0 or (ending_point[0] - starting_point[0]) == 0 or \
|
|
(starting_point[1] - ending_point[1]) == 0:
|
|
continue
|
|
|
|
normalized_x = (row[key + "X"][sequence_index] - starting_point[0]) / (ending_point[0] -
|
|
starting_point[0])
|
|
normalized_y = (row[key + "Y"][sequence_index] - ending_point[1]) / (starting_point[1] -
|
|
ending_point[1])
|
|
|
|
row[key + "X"][sequence_index] = normalized_x
|
|
row[key + "Y"][sequence_index] = normalized_y
|
|
|
|
normalized_df = normalized_df.append(row, ignore_index=True)
|
|
|
|
return normalized_df
|
|
|
|
|
|
def normalize_single_dict(row: dict):
|
|
"""
|
|
Normalizes the skeletal data for a given sequence of frames with signer's hand pose data. The normalization follows
|
|
the definition from our paper.
|
|
|
|
:param row: Dictionary containing key-value pairs with joint identifiers and corresponding lists (sequences) of
|
|
that particular joints coordinates
|
|
:return: Dictionary with normalized skeletal data (following the same schema as input data)
|
|
"""
|
|
|
|
hand_landmarks = {0: [], 1: []}
|
|
|
|
# Determine how many hands are present in the dataset
|
|
range_hand_size = 1
|
|
if "wrist_1" in row.keys():
|
|
range_hand_size = 2
|
|
|
|
# Construct the relevant identifiers
|
|
for identifier in HAND_IDENTIFIERS:
|
|
for hand_index in range(range_hand_size):
|
|
hand_landmarks[hand_index].append(identifier + "_" + str(hand_index))
|
|
|
|
# Treat each hand individually
|
|
for hand_index in range(range_hand_size):
|
|
|
|
sequence_size = len(row["wrist_" + str(hand_index)])
|
|
|
|
# Treat each element of the sequence (analyzed frame) individually
|
|
for sequence_index in range(sequence_size):
|
|
|
|
# Retrieve all of the X and Y values of the current frame
|
|
landmarks_x_values = [row[key][sequence_index][0] for key in hand_landmarks[hand_index] if
|
|
row[key][sequence_index][0] != 0]
|
|
landmarks_y_values = [row[key][sequence_index][1] for key in hand_landmarks[hand_index] if
|
|
row[key][sequence_index][1] != 0]
|
|
|
|
# Prevent from even starting the analysis if some necessary elements are not present
|
|
if not landmarks_x_values or not landmarks_y_values:
|
|
continue
|
|
|
|
# Calculate the deltas
|
|
width, height = max(landmarks_x_values) - min(landmarks_x_values), max(landmarks_y_values) - min(
|
|
landmarks_y_values)
|
|
if width > height:
|
|
delta_x = 0.1 * width
|
|
delta_y = delta_x + ((width - height) / 2)
|
|
else:
|
|
delta_y = 0.1 * height
|
|
delta_x = delta_y + ((height - width) / 2)
|
|
|
|
# Set the starting and ending point of the normalization bounding box
|
|
starting_point = (min(landmarks_x_values) - delta_x, min(landmarks_y_values) - delta_y)
|
|
ending_point = (max(landmarks_x_values) + delta_x, max(landmarks_y_values) + delta_y)
|
|
|
|
# Normalize individual landmarks and save the results
|
|
for identifier in HAND_IDENTIFIERS:
|
|
key = identifier + "_" + str(hand_index)
|
|
|
|
# Prevent from trying to normalize incorrectly captured points
|
|
if row[key][sequence_index][0] == 0 or (ending_point[0] - starting_point[0]) == 0 or (
|
|
starting_point[1] - ending_point[1]) == 0:
|
|
continue
|
|
|
|
normalized_x = (row[key][sequence_index][0] - starting_point[0]) / (ending_point[0] -
|
|
starting_point[0])
|
|
normalized_y = (row[key][sequence_index][1] - starting_point[1]) / (ending_point[1] -
|
|
starting_point[1])
|
|
|
|
row[key][sequence_index] = list(row[key][sequence_index])
|
|
|
|
row[key][sequence_index][0] = normalized_x
|
|
row[key][sequence_index][1] = normalized_y
|
|
|
|
return row
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|