import os import ast import pandas as pd from normalization.hand_normalization import normalize_hands_full from normalization.body_normalization import normalize_body_full DATASET_PATH = './data/processed' # Load the dataset df = pd.read_csv(os.path.join(DATASET_PATH, "spoter_train.csv"), encoding="utf-8") print(df.head()) print(df.columns) # Retrieve metadata # video_size_heights = df["video_height"].to_list() # video_size_widths = df["video_width"].to_list() # Delete redundant (non-related) properties # del df["video_height"] # del df["video_width"] # Temporarily remove other relevant metadata labels = df["labels"].to_list() signs = df["sign"].to_list() del df["labels"] del df["sign"] del df["path"] del df["participant_id"] del df["sequence_id"] # Convert the strings into lists def convert(x): return ast.literal_eval(str(x)) for column in df.columns: df[column] = df[column].apply(convert) # Perform the normalizations df = normalize_hands_full(df) # df, invalid_row_indexes = normalize_body_full(df) # Clear lists of items from deleted rows # labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes] # video_fps = [t for i, t in enumerate(video_fps) if i not in invalid_row_indexes] # Return the metadata back to the dataset df["labels"] = labels df["sign"] = signs df.to_csv(os.path.join(DATASET_PATH, "spoter_train_norm.csv"), encoding="utf-8", index=False)