diff --git a/dataset.py b/dataset.py new file mode 100644 index 0000000..5b3ff7f --- /dev/null +++ b/dataset.py @@ -0,0 +1,28 @@ +import torch +import pandas as pd +from PIL import Image +import json + +class WLASLDataset(torch.utils.data.Dataset): + def __init__(self, csv_file: str, video_dir: str, subset:str="train", keypoints_file: str = "keypoints.csv", transform=None): + self.df = pd.read_csv(csv_file) + # filter wlasl data by subset + self.df = self.df[self.df["subset"] == subset] + self.video_dir = video_dir + self.transform = transform + self.subset = subset + self.keypoints_file = keypoints_file + + def __len__(self): + return len(self.df) + + def __getitem__(self, index): + video_id = self.df.iloc[index]["video_id"] + + # check if keypoints file exists + if not os.path.exists(self.keypoints_file): + # create empty dataframe + keypoints_df = pd.DataFrame(columns=["video_id", "keypoints"]) + + # check if keypoints are available else extract from video + \ No newline at end of file diff --git a/identifiers.py b/identifiers.py new file mode 100644 index 0000000..916aadf --- /dev/null +++ b/identifiers.py @@ -0,0 +1,61 @@ +# Pose Landmarks +POSE_LANDMARKS = { + "nose": 0, + "left_eye_inner": 1, + "left_eye": 2, + "left_eye_outer": 3, + "right_eye_inner": 4, + "right_eye": 5, + "right_eye_outer": 6, + "left_ear": 7, + "right_ear": 8, + "mouth_left": 9, + "mouth_right": 10, + "left_shoulder": 11, + "right_shoulder": 12, + "left_elbow": 13, + "right_elbow": 14, + "left_wrist": 15, + "right_wrist": 16, + "left_pinky": 17, + "right_pinky": 18, + "left_index": 19, + "right_index": 20, + "left_thumb": 21, + "right_thumb": 22, + "left_hip": 23, + "right_hip": 24, + "left_knee": 25, + "right_knee": 26, + "left_ankle": 27, + "right_ankle": 28, + "left_heel": 29, + "right_heel": 30, + "left_foot_index": 31, + "right_foot_index": 32, +} + +# Hand Landmarks +HAND_LANDMARKS = { + "wrist": 0, + "thumb_cmc": 1, + "thumb_mcp": 2, + "thumb_ip": 3, + "thumb_tip": 4, + "index_finger_mcp": 5, + "index_finger_pip": 6, + "index_finger_dip": 7, + "index_finger_tip": 8, + "middle_finger_mcp": 9, + "middle_finger_pip": 10, + "middle_finger_dip": 11, + "middle_finger_tip": 12, + "ring_finger_mcp": 13, + "ring_finger_pip": 14, + "ring_finger_dip": 15, + "ring_finger_tip": 16, + "pinky_mcp": 17, + "pinky_pip": 18, + "pinky_dip": 19, + "pinky_tip": 20, +} diff --git a/keypoint_extractor.py b/keypoint_extractor.py new file mode 100644 index 0000000..15938f6 --- /dev/null +++ b/keypoint_extractor.py @@ -0,0 +1,58 @@ +import mediapipe as mp +import cv2 + +class KeypointExtractor: + def __init__(self): + self.mp_drawing = mp.solutions.drawing_utils + + # hands extractor + self.hands = mp.solutions.hands.Hands( + min_detection_confidence=0.5, + min_tracking_confidence=0.5, + max_num_hands=2 + ) + + # pose extractor + self.pose = mp.solutions.pose.Pose( + min_detection_confidence=0.5, + min_tracking_confidence=0.5, + model_complexity=2 + ) + + def extract(self, image, video): + # load video + pass + + + def extract_from_frame(self, image): + # Convert the BGR image to RGB and process it with MediaPipe Pose. + hand_results = self.hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + + # Draw the hand annotations on the image. + draw_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + draw_image.flags.writeable = False + + for hand_landmarks in hand_results.multi_hand_landmarks: + self.mp_drawing.draw_landmarks( + draw_image, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS) + + pose_results = self.pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + + self.mp_drawing.draw_landmarks( + draw_image, pose_results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS) + + + draw_image.flags.writeable = True + draw_image = cv2.cvtColor(draw_image, cv2.COLOR_RGB2BGR) + + return draw_image + + +ke = KeypointExtractor() +image = cv2.imread('data/test_photo.jpg') + +image = ke.extract_from_frame(image) + +# save image +cv2.imwrite('test_output.jpg', image) \ No newline at end of file diff --git a/test_output.jpg b/test_output.jpg new file mode 100644 index 0000000..a4dca74 Binary files /dev/null and b/test_output.jpg differ