55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
import os
|
|
import ast
|
|
import pandas as pd
|
|
|
|
from normalization.hand_normalization import normalize_hands_full
|
|
from normalization.body_normalization import normalize_body_full
|
|
|
|
DATASET_PATH = './data/processed'
|
|
# Load the dataset
|
|
df = pd.read_csv(os.path.join(DATASET_PATH, "spoter_train.csv"), encoding="utf-8")
|
|
|
|
print(df.head())
|
|
print(df.columns)
|
|
|
|
# Retrieve metadata
|
|
# video_size_heights = df["video_height"].to_list()
|
|
# video_size_widths = df["video_width"].to_list()
|
|
|
|
# Delete redundant (non-related) properties
|
|
# del df["video_height"]
|
|
# del df["video_width"]
|
|
|
|
# Temporarily remove other relevant metadata
|
|
labels = df["labels"].to_list()
|
|
signs = df["sign"].to_list()
|
|
|
|
del df["labels"]
|
|
del df["sign"]
|
|
del df["path"]
|
|
del df["participant_id"]
|
|
del df["sequence_id"]
|
|
|
|
# Convert the strings into lists
|
|
|
|
|
|
def convert(x): return ast.literal_eval(str(x))
|
|
|
|
|
|
for column in df.columns:
|
|
df[column] = df[column].apply(convert)
|
|
|
|
# Perform the normalizations
|
|
df = normalize_hands_full(df)
|
|
# df, invalid_row_indexes = normalize_body_full(df)
|
|
|
|
# Clear lists of items from deleted rows
|
|
# labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes]
|
|
# video_fps = [t for i, t in enumerate(video_fps) if i not in invalid_row_indexes]
|
|
|
|
# Return the metadata back to the dataset
|
|
df["labels"] = labels
|
|
df["sign"] = signs
|
|
|
|
df.to_csv(os.path.join(DATASET_PATH, "spoter_train_norm.csv"), encoding="utf-8", index=False)
|