Merge branch 'dev' of https://gitlab.ilabt.imec.be/wesign/sign-predictor into dev
This commit is contained in:
@@ -57,7 +57,7 @@ class FingerSpellingDataset(torch.utils.data.Dataset):
|
||||
video_name = self.data[index]
|
||||
|
||||
# get the keypoints for the video
|
||||
keypoints_df = self.keypoint_extractor.extract_keypoints_from_video(video_name, normalize=True)
|
||||
keypoints_df = self.keypoint_extractor.extract_keypoints_from_video(video_name, normalize="minxmax")
|
||||
|
||||
# filter the keypoints by the identified subset
|
||||
if self.keypoints_to_keep:
|
||||
|
||||
@@ -27,14 +27,16 @@ class KeypointExtractor:
|
||||
|
||||
def extract_keypoints_from_video(self,
|
||||
video: str,
|
||||
normalize: bool = False,
|
||||
normalize: str = None,
|
||||
draw: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""extract_keypoints_from_video this function extracts keypoints from a video and stores them in a dataframe
|
||||
|
||||
:param video: the video to extract keypoints from
|
||||
:type video: str
|
||||
:return: dataframe with keypoints
|
||||
:param normalize: the hand normalization algorithm to use, defaults to None
|
||||
:type normalize: str, optional
|
||||
:return: dataframe with keypoints in absolute pixels
|
||||
:rtype: pd.DataFrame
|
||||
"""
|
||||
|
||||
@@ -53,7 +55,7 @@ class KeypointExtractor:
|
||||
# create dataframe from cache
|
||||
df = pd.DataFrame(np.load(self.cache_folder + "/" + video + ".npy", allow_pickle=True), columns=self.columns)
|
||||
if normalize:
|
||||
df = self.normalize_hands(df)
|
||||
df = self.normalize_hands(df, norm_algorithm=normalize)
|
||||
return df
|
||||
|
||||
# open video
|
||||
@@ -97,7 +99,15 @@ class KeypointExtractor:
|
||||
data = [k1 + (k2 or [0] * 42) + (k3 or [0] * 42)]
|
||||
new_df = pd.DataFrame(data, columns=self.columns)
|
||||
keypoints_df = pd.concat([keypoints_df, new_df], ignore_index=True)
|
||||
|
||||
|
||||
# get frame width and height
|
||||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# convert to pixels
|
||||
keypoints_df.iloc[:, ::2] *= frame_width
|
||||
keypoints_df.iloc[:, 1::2] *= frame_height
|
||||
|
||||
# close video
|
||||
cap.release()
|
||||
|
||||
@@ -105,7 +115,7 @@ class KeypointExtractor:
|
||||
np.save(self.cache_folder + "/" + video + ".npy", keypoints_df.to_numpy())
|
||||
|
||||
if normalize:
|
||||
keypoints_df = self.normalize_hands(keypoints_df)
|
||||
keypoints_df = self.normalize_hands(keypoints_df, norm_algorithm=normalize)
|
||||
|
||||
if draw:
|
||||
return keypoints_df, output_frames
|
||||
@@ -132,17 +142,19 @@ class KeypointExtractor:
|
||||
# self.mp_drawing.draw_landmarks(draw_image, results.face_landmarks, self.mp_holistic.FACEMESH_CONTOURS)
|
||||
self.mp_drawing.draw_landmarks(draw_image, results.left_hand_landmarks, self.mp_holistic.HAND_CONNECTIONS)
|
||||
self.mp_drawing.draw_landmarks(draw_image, results.right_hand_landmarks, self.mp_holistic.HAND_CONNECTIONS)
|
||||
|
||||
img_width, img_height = image.shape[1], image.shape[0]
|
||||
|
||||
# create bounding box around hands
|
||||
if results.left_hand_landmarks:
|
||||
x = [landmark.x for landmark in results.left_hand_landmarks.landmark]
|
||||
y = [landmark.y for landmark in results.left_hand_landmarks.landmark]
|
||||
draw_image = cv2.rectangle(draw_image, (int(min(x) * 640), int(min(y) * 480)), (int(max(x) * 640), int(max(y) * 480)), (255, 0, 0), 2)
|
||||
draw_image = cv2.rectangle(draw_image, (int(min(x) * img_width), int(min(y) * img_height)), (int(max(x) * img_width), int(max(y) * img_height)), (0, 255, 0), 2)
|
||||
|
||||
if results.right_hand_landmarks:
|
||||
x = [landmark.x for landmark in results.right_hand_landmarks.landmark]
|
||||
y = [landmark.y for landmark in results.right_hand_landmarks.landmark]
|
||||
draw_image = cv2.rectangle(draw_image, (int(min(x) * 640), int(min(y) * 480)), (int(max(x) * 640), int(max(y) * 480)), (255, 0, 0), 2)
|
||||
draw_image = cv2.rectangle(draw_image, (int(min(x) * img_width), int(min(y) * img_height)), (int(max(x) * img_width), int(max(y) * img_height)), (255, 0, 0), 2)
|
||||
|
||||
self.mp_drawing.draw_landmarks(draw_image, results.pose_landmarks, self.mp_holistic.POSE_CONNECTIONS)
|
||||
|
||||
@@ -240,14 +252,21 @@ class KeypointExtractor:
|
||||
min_x, min_y = np.min(hand_coords[:, :, 0], axis=1), np.min(hand_coords[:, :, 1], axis=1)
|
||||
max_x, max_y = np.max(hand_coords[:, :, 0], axis=1), np.max(hand_coords[:, :, 1], axis=1)
|
||||
|
||||
# calculate the deltas
|
||||
# calculate the hand keypoint width and height (NOT the bounding box width and height!)
|
||||
width, height = max_x - min_x, max_y - min_y
|
||||
if width > height:
|
||||
delta_x = 0.1 * width
|
||||
delta_y = delta_x + ((width - height) / 2)
|
||||
else:
|
||||
delta_y = 0.1 * height
|
||||
delta_x = delta_y + ((height - width) / 2)
|
||||
|
||||
# initialize empty arrays for deltas
|
||||
delta_x = np.zeros(width.shape, dtype='float64')
|
||||
delta_y = np.zeros(height.shape, dtype='float64')
|
||||
|
||||
# calculate the deltas
|
||||
mask = width>height
|
||||
# width > height
|
||||
delta_x[mask] = (0.1 * width)[mask]
|
||||
delta_y[mask] = (delta_x + ((width - height) / 2))[mask]
|
||||
# height >= width
|
||||
delta_y[~mask] = (0.1 * height)[~mask]
|
||||
delta_x[~mask] = (delta_y + ((height - width) / 2))[~mask]
|
||||
|
||||
# Set the starting and ending point of the normalization bounding box
|
||||
starting_x, starting_y = min_x - delta_x, min_y - delta_y
|
||||
@@ -255,10 +274,10 @@ class KeypointExtractor:
|
||||
|
||||
# calculate the center of the bounding box and the bounding box dimensions
|
||||
bbox_center_x, bbox_center_y = (starting_x + ending_x) / 2, (starting_y + ending_y) / 2
|
||||
bbox_width, bbox_height = starting_x - ending_x, starting_y - ending_y
|
||||
|
||||
bbox_width, bbox_height = ending_x - starting_x, ending_y - starting_y
|
||||
|
||||
# repeat the center coordinates and bounding box dimensions to match the shape of hand_coords
|
||||
center_x, center_y = center_x.reshape(-1, 1, 1), center_y.reshape(-1, 1, 1)
|
||||
bbox_center_x, bbox_center_y = bbox_center_x.reshape(-1, 1, 1), bbox_center_y.reshape(-1, 1, 1)
|
||||
center_coords = np.concatenate((np.tile(bbox_center_x, (1, 21, 1)), np.tile(bbox_center_y, (1, 21, 1))), axis=2)
|
||||
|
||||
bbox_width, bbox_height = bbox_width.reshape(-1, 1, 1), bbox_height.reshape(-1, 1 ,1)
|
||||
|
||||
Reference in New Issue
Block a user