* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
134 lines
4.9 KiB
Python
134 lines
4.9 KiB
Python
import pandas as pd
|
|
import ast
|
|
import torch
|
|
import random
|
|
import numpy as np
|
|
from torch.nn.utils.rnn import pad_sequence
|
|
from random import randrange
|
|
|
|
from augmentations import augment_arm_joint_rotate, augment_rotate, augment_shear
|
|
from normalization.body_normalization import BODY_IDENTIFIERS
|
|
from augmentations.augment import HAND_IDENTIFIERS
|
|
|
|
|
|
def load_dataset(file_location: str):
|
|
|
|
# Load the datset csv file
|
|
df = pd.read_csv(file_location, encoding="utf-8")
|
|
df.columns = [item.replace("_left_", "_0_").replace("_right_", "_1_") for item in list(df.columns)]
|
|
|
|
# TEMP
|
|
labels = df["labels"].to_list()
|
|
|
|
data = []
|
|
|
|
for row_index, row in df.iterrows():
|
|
current_row = np.empty(shape=(len(ast.literal_eval(row["leftEar_X"])),
|
|
len(BODY_IDENTIFIERS + HAND_IDENTIFIERS),
|
|
2)
|
|
)
|
|
for index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
|
current_row[:, index, 0] = ast.literal_eval(row[identifier + "_X"])
|
|
current_row[:, index, 1] = ast.literal_eval(row[identifier + "_Y"])
|
|
|
|
data.append(current_row)
|
|
|
|
return data, labels
|
|
|
|
|
|
def tensor_to_dictionary(landmarks_tensor: torch.Tensor) -> dict:
|
|
|
|
data_array = landmarks_tensor.numpy()
|
|
output = {}
|
|
|
|
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
|
output[identifier] = data_array[:, landmark_index]
|
|
|
|
return output
|
|
|
|
|
|
def dictionary_to_tensor(landmarks_dict: dict) -> torch.Tensor:
|
|
|
|
output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS + HAND_IDENTIFIERS), 2))
|
|
|
|
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
|
output[:, landmark_index, 0] = [frame[0] for frame in landmarks_dict[identifier]]
|
|
output[:, landmark_index, 1] = [frame[1] for frame in landmarks_dict[identifier]]
|
|
|
|
return torch.from_numpy(output)
|
|
|
|
|
|
def random_augmentation(augmentations, augmentations_prob, depth_map):
|
|
if augmentations and random.random() < augmentations_prob:
|
|
selected_aug = randrange(4)
|
|
if selected_aug == 0:
|
|
depth_map = augment_arm_joint_rotate(depth_map, 0.3, (-4, 4))
|
|
elif selected_aug == 1:
|
|
depth_map = augment_shear(depth_map, "perspective", (0, 0.1))
|
|
elif selected_aug == 2:
|
|
depth_map = augment_shear(depth_map, "squeeze", (0, 0.15))
|
|
elif selected_aug == 3:
|
|
depth_map = augment_rotate(depth_map, (-13, 13))
|
|
|
|
return depth_map
|
|
|
|
|
|
def collate_fn_triplet_padd(batch):
|
|
'''
|
|
Padds batch of variable length
|
|
|
|
note: it converts things ToTensor manually here since the ToTensor transform
|
|
assume it takes in images rather than arbitrary tensors.
|
|
'''
|
|
# batch: list of length batch_size, each element contains ouput of dataset
|
|
# MASKING
|
|
anchor_lengths = [element[0].shape[0] for element in batch]
|
|
max_anchor_l = max(anchor_lengths)
|
|
positive_lengths = [element[1].shape[0] for element in batch]
|
|
max_positive_l = max(positive_lengths)
|
|
negative_lengths = [element[2].shape[0] for element in batch]
|
|
max_negative_l = max(negative_lengths)
|
|
|
|
anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
|
|
for n in range(len(batch))]
|
|
positive_mask = [[False] * positive_lengths[n] + [True] * (max_positive_l - positive_lengths[n])
|
|
for n in range(len(batch))]
|
|
negative_mask = [[False] * negative_lengths[n] + [True] * (max_negative_l - negative_lengths[n])
|
|
for n in range(len(batch))]
|
|
|
|
# PADDING
|
|
anchor_batch = [element[0] for element in batch]
|
|
positive_batch = [element[1] for element in batch]
|
|
negative_batch = [element[2] for element in batch]
|
|
|
|
anchor_batch = pad_sequence(anchor_batch, batch_first=True)
|
|
positive_batch = pad_sequence(positive_batch, batch_first=True)
|
|
negative_batch = pad_sequence(negative_batch, batch_first=True)
|
|
|
|
return anchor_batch, positive_batch, negative_batch, \
|
|
torch.Tensor(anchor_mask), torch.Tensor(positive_mask), torch.Tensor(negative_mask)
|
|
|
|
|
|
def collate_fn_padd(batch):
|
|
'''
|
|
Padds batch of variable length
|
|
|
|
note: it converts things ToTensor manually here since the ToTensor transform
|
|
assume it takes in images rather than arbitrary tensors.
|
|
'''
|
|
# batch: list of length batch_size, each element contains ouput of dataset
|
|
# MASKING
|
|
anchor_lengths = [element[0].shape[0] for element in batch]
|
|
max_anchor_l = max(anchor_lengths)
|
|
|
|
anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
|
|
for n in range(len(batch))]
|
|
|
|
# PADDING
|
|
anchor_batch = [element[0] for element in batch]
|
|
anchor_batch = pad_sequence(anchor_batch, batch_first=True)
|
|
|
|
labels = torch.Tensor([element[1] for element in batch])
|
|
|
|
return anchor_batch, labels, torch.Tensor(anchor_mask)
|