import os import ast import pandas as pd from normalization.hand_normalization import normalize_hands_full from normalization.body_normalization import normalize_body_full DATASET_PATH = './data/wlasl' # Load the dataset df = pd.read_csv(os.path.join(DATASET_PATH, "WLASL100_train.csv"), encoding="utf-8") print(df.head()) print(df.columns) # Retrieve metadata video_size_heights = df["video_height"].to_list() video_size_widths = df["video_width"].to_list() # Delete redundant (non-related) properties del df["video_height"] del df["video_width"] # Temporarily remove other relevant metadata labels = df["labels"].to_list() video_fps = df["fps"].to_list() del df["labels"] del df["fps"] del df["split"] del df["video_id"] del df["label_name"] del df["length"] # Convert the strings into lists def convert(x): return ast.literal_eval(str(x)) for column in df.columns: df[column] = df[column].apply(convert) # Perform the normalizations df = normalize_hands_full(df) df, invalid_row_indexes = normalize_body_full(df) # Clear lists of items from deleted rows # labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes] # video_fps = [t for i, t in enumerate(video_fps) if i not in invalid_row_indexes] # Return the metadata back to the dataset df["labels"] = labels df["fps"] = video_fps df.to_csv(os.path.join(DATASET_PATH, "wlasl_train_norm.csv"), encoding="utf-8", index=False)