implementing KeypointExtractor

2023-02-16 17:56:37 +00:00 · 2023-02-16 17:56:37 +00:00 · ad7b160c92
commit ad7b160c92
parent 970dd19702
4 changed files with 147 additions and 0 deletions
--- a/dataset.py
+++ b/dataset.py
@ -0,0 +1,28 @@
+import torch
+import pandas as pd
+from PIL import Image
+import json
+
+class WLASLDataset(torch.utils.data.Dataset):
+    def __init__(self, csv_file: str, video_dir: str, subset:str="train", keypoints_file: str = "keypoints.csv", transform=None):
+        self.df = pd.read_csv(csv_file)
+        # filter wlasl data by subset
+        self.df = self.df[self.df["subset"] == subset]
+        self.video_dir = video_dir
+        self.transform = transform
+        self.subset = subset
+        self.keypoints_file = keypoints_file
+
+    def __len__(self):
+        return len(self.df)
+
+    def __getitem__(self, index):
+        video_id = self.df.iloc[index]["video_id"]
+
+        # check if keypoints file exists
+        if not os.path.exists(self.keypoints_file):
+            # create empty dataframe
+            keypoints_df = pd.DataFrame(columns=["video_id", "keypoints"])
+
+        # check if keypoints are available else extract from video
+        
--- a/identifiers.py
+++ b/identifiers.py
@ -0,0 +1,61 @@
+# Pose Landmarks
+POSE_LANDMARKS = {
+    "nose": 0,
+    "left_eye_inner": 1,
+    "left_eye": 2,
+    "left_eye_outer": 3,
+    "right_eye_inner": 4,
+    "right_eye": 5,
+    "right_eye_outer": 6,
+    "left_ear": 7,
+    "right_ear": 8,
+    "mouth_left": 9,
+    "mouth_right": 10,
+    "left_shoulder": 11,
+    "right_shoulder": 12,
+    "left_elbow": 13,
+    "right_elbow": 14,
+    "left_wrist": 15,
+    "right_wrist": 16,
+    "left_pinky": 17,
+    "right_pinky": 18,
+    "left_index": 19,
+    "right_index": 20,
+    "left_thumb": 21,
+    "right_thumb": 22,
+    "left_hip": 23,
+    "right_hip": 24,
+    "left_knee": 25,
+    "right_knee": 26,
+    "left_ankle": 27,
+    "right_ankle": 28,
+    "left_heel": 29,
+    "right_heel": 30,
+    "left_foot_index": 31,
+    "right_foot_index": 32,
+}
+
+# Hand Landmarks
+HAND_LANDMARKS = {
+    "wrist": 0,
+    "thumb_cmc": 1,
+    "thumb_mcp": 2,
+    "thumb_ip": 3,
+    "thumb_tip": 4,
+    "index_finger_mcp": 5,
+    "index_finger_pip": 6,
+    "index_finger_dip": 7,
+    "index_finger_tip": 8,
+    "middle_finger_mcp": 9,
+    "middle_finger_pip": 10,
+    "middle_finger_dip": 11,
+    "middle_finger_tip": 12,
+    "ring_finger_mcp": 13,
+    "ring_finger_pip": 14,
+    "ring_finger_dip": 15,
+    "ring_finger_tip": 16,
+    "pinky_mcp": 17,
+    "pinky_pip": 18,
+    "pinky_dip": 19,
+    "pinky_tip": 20,
+}
--- a/keypoint_extractor.py
+++ b/keypoint_extractor.py
@ -0,0 +1,58 @@
+import mediapipe as mp
+import cv2
+
+class KeypointExtractor:
+    def __init__(self):
+        self.mp_drawing = mp.solutions.drawing_utils
+        
+        # hands extractor
+        self.hands = mp.solutions.hands.Hands(
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5,
+            max_num_hands=2
+        )
+
+        # pose extractor
+        self.pose = mp.solutions.pose.Pose(
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5,
+            model_complexity=2
+        )
+
+    def extract(self, image, video):
+        # load video
+        pass
+
+
+    def extract_from_frame(self, image):
+        # Convert the BGR image to RGB and process it with MediaPipe Pose.
+        hand_results = self.hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+        # Draw the hand annotations on the image.
+        draw_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        
+        draw_image.flags.writeable = False
+
+        for hand_landmarks in hand_results.multi_hand_landmarks:
+            self.mp_drawing.draw_landmarks(
+                draw_image, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS)
+    
+        pose_results = self.pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+        self.mp_drawing.draw_landmarks(
+            draw_image, pose_results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS)
+
+
+        draw_image.flags.writeable = True
+        draw_image = cv2.cvtColor(draw_image, cv2.COLOR_RGB2BGR)
+
+        return draw_image
+
+
+ke = KeypointExtractor()
+image = cv2.imread('data/test_photo.jpg')
+
+image = ke.extract_from_frame(image)
+
+# save image
+cv2.imwrite('test_output.jpg', image)
--- a/test_output.jpg
+++ b/test_output.jpg