Model live view
This commit is contained in:
committed by
Robbe De Waele
parent
7653b9b35c
commit
983a216f53
@@ -7,7 +7,7 @@ steps:
|
|||||||
pull: if-not-exists
|
pull: if-not-exists
|
||||||
image: sonarsource/sonar-scanner-cli
|
image: sonarsource/sonar-scanner-cli
|
||||||
commands:
|
commands:
|
||||||
- sonar-scanner -Dsonar.host.url=$SONAR_HOST -Dsonar.login=$SONAR_TOKEN -Dsonar.projectKey=$SONAR_PROJECT_KEY
|
- sonar-scanner -Dsonar.host.url=$SONAR_HOST -Dsonar.login=$SONAR_TOKEN -Dsonar.projectKey=$SONAR_PROJECT_KEY -Dsonar.qualitygate.wait=true
|
||||||
environment:
|
environment:
|
||||||
SONAR_HOST:
|
SONAR_HOST:
|
||||||
from_secret: sonar_host
|
from_secret: sonar_host
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,3 +8,4 @@ cache_wlasl/
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
||||||
checkpoints/
|
checkpoints/
|
||||||
|
.ipynb_checkpoints
|
||||||
120
analyze_model.ipynb
Normal file
120
analyze_model.ipynb
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
@@ -4,8 +4,8 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
from identifiers import LANDMARKS
|
from src.identifiers import LANDMARKS
|
||||||
from keypoint_extractor import KeypointExtractor
|
from src.keypoint_extractor import KeypointExtractor
|
||||||
|
|
||||||
|
|
||||||
class FingerSpellingDataset(torch.utils.data.Dataset):
|
class FingerSpellingDataset(torch.utils.data.Dataset):
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ from collections import OrderedDict
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from identifiers import LANDMARKS
|
from src.identifiers import LANDMARKS
|
||||||
from keypoint_extractor import KeypointExtractor
|
from src.keypoint_extractor import KeypointExtractor
|
||||||
|
|
||||||
|
|
||||||
class WLASLDataset(torch.utils.data.Dataset):
|
class WLASLDataset(torch.utils.data.Dataset):
|
||||||
|
|||||||
@@ -151,25 +151,34 @@ class KeypointExtractor:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def normalize_hands(self, dataframe: pd.DataFrame) -> pd.DataFrame:
|
def normalize_hands(self, dataframe: pd.DataFrame, norm_algorithm: str="minmax") -> pd.DataFrame:
|
||||||
"""normalize_hand this function normalizes the hand keypoints of a dataframe
|
"""normalize_hand this function normalizes the hand keypoints of a dataframe
|
||||||
|
|
||||||
:param dataframe: the dataframe to normalize
|
:param dataframe: the dataframe to normalize
|
||||||
:type dataframe: pd.DataFrame
|
:type dataframe: pd.DataFrame
|
||||||
|
:param norm_algorithm: the normalization algorithm to use, pick from "minmax" and "bohacek"
|
||||||
|
:type norm_algorithm: str
|
||||||
:return: the normalized dataframe
|
:return: the normalized dataframe
|
||||||
:rtype: pd.DataFrame
|
:rtype: pd.DataFrame
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# normalize left hand
|
if norm_algorithm == "minmax":
|
||||||
dataframe = self.normalize_hand_helper(dataframe, "left_hand")
|
# normalize left hand
|
||||||
|
dataframe = self.normalize_hand_minmax(dataframe, "left_hand")
|
||||||
# normalize right hand
|
# normalize right hand
|
||||||
dataframe = self.normalize_hand_helper(dataframe, "right_hand")
|
dataframe = self.normalize_hand_minmax(dataframe, "right_hand")
|
||||||
|
elif norm_algorithm == "bohacek":
|
||||||
|
# normalize left hand
|
||||||
|
dataframe = self.normalize_hand_bohacek(dataframe, "left_hand")
|
||||||
|
# normalize right hand
|
||||||
|
dataframe = self.normalize_hand_bohacek(dataframe, "right_hand")
|
||||||
|
else:
|
||||||
|
return dataframe
|
||||||
|
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|
||||||
def normalize_hand_helper(self, dataframe: pd.DataFrame, hand: str) -> pd.DataFrame:
|
def normalize_hand_minmax(self, dataframe: pd.DataFrame, hand: str) -> pd.DataFrame:
|
||||||
"""normalize_hand_helper this function normalizes the hand keypoints of a dataframe
|
"""normalize_hand_helper this function normalizes the hand keypoints of a dataframe with respect to the minimum and maximum coordinates
|
||||||
|
|
||||||
:param dataframe: the dataframe to normalize
|
:param dataframe: the dataframe to normalize
|
||||||
:type dataframe: pd.DataFrame
|
:type dataframe: pd.DataFrame
|
||||||
@@ -194,9 +203,66 @@ class KeypointExtractor:
|
|||||||
# calculate the width and height of the bounding box around the hand keypoints
|
# calculate the width and height of the bounding box around the hand keypoints
|
||||||
bbox_width, bbox_height = max_x - min_x, max_y - min_y
|
bbox_width, bbox_height = max_x - min_x, max_y - min_y
|
||||||
|
|
||||||
# repeat the center coordinates and bounding box dimensions to match the shape of hand_coords
|
# repeat the center coordinates and bounding box dimensions to match the shape of hand_coords (numpy magic)
|
||||||
center_coords = np.tile(np.array([center_x, center_y]), (21, 1)).reshape(-1, 21, 2)
|
center_x, center_y = center_x.reshape(-1, 1, 1), center_y.reshape(-1, 1, 1)
|
||||||
bbox_dims = np.tile(np.array([bbox_width, bbox_height]), (21, 1)).reshape(-1, 21, 2)
|
center_coords = np.concatenate((np.tile(center_x, (1, 21, 1)), np.tile(center_y, (1, 21, 1))), axis=2)
|
||||||
|
|
||||||
|
bbox_width, bbox_height = bbox_width.reshape(-1, 1, 1), bbox_height.reshape(-1, 1 ,1)
|
||||||
|
bbox_dims = np.concatenate((np.tile(bbox_width, (1, 21, 1)), np.tile(bbox_height, (1, 21, 1))), axis=2)
|
||||||
|
|
||||||
|
if np.any(bbox_dims == 0):
|
||||||
|
return dataframe
|
||||||
|
# normalize the hand keypoints based on the bounding box around the hand
|
||||||
|
norm_hand_coords = (hand_coords - center_coords) / bbox_dims
|
||||||
|
|
||||||
|
# flatten the normalized hand keypoints array and replace the original hand keypoints with the normalized hand keypoints in the dataframe
|
||||||
|
dataframe.iloc[:, hand_columns] = norm_hand_coords.reshape(-1, 42)
|
||||||
|
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
def normalize_hand_bohacek(self, dataframe: pd.DataFrame, hand: str) -> pd.DataFrame:
|
||||||
|
"""normalize_hand_helper this function normalizes the hand keypoints of a dataframe using the bohacek normalization algorithm
|
||||||
|
|
||||||
|
:param dataframe: the dataframe to normalize
|
||||||
|
:type dataframe: pd.DataFrame
|
||||||
|
:param hand: the hand to normalize
|
||||||
|
:type hand: str
|
||||||
|
:return: the normalized dataframe
|
||||||
|
:rtype: pd.DataFrame
|
||||||
|
"""
|
||||||
|
# get all columns that belong to the hand (left hand column 66 - 107, right hand column 108 - 149)
|
||||||
|
hand_columns = np.array([i for i in range(66 + (42 if hand == "right_hand" else 0), 108 + (42 if hand == "right_hand" else 0))])
|
||||||
|
|
||||||
|
# get the x, y coordinates of the hand keypoints
|
||||||
|
hand_coords = dataframe.iloc[:, hand_columns].values.reshape(-1, 21, 2)
|
||||||
|
|
||||||
|
# get the min and max x, y coordinates of the hand keypoints
|
||||||
|
min_x, min_y = np.min(hand_coords[:, :, 0], axis=1), np.min(hand_coords[:, :, 1], axis=1)
|
||||||
|
max_x, max_y = np.max(hand_coords[:, :, 0], axis=1), np.max(hand_coords[:, :, 1], axis=1)
|
||||||
|
|
||||||
|
# calculate the deltas
|
||||||
|
width, height = max_x - min_x, max_y - min_y
|
||||||
|
if width > height:
|
||||||
|
delta_x = 0.1 * width
|
||||||
|
delta_y = delta_x + ((width - height) / 2)
|
||||||
|
else:
|
||||||
|
delta_y = 0.1 * height
|
||||||
|
delta_x = delta_y + ((height - width) / 2)
|
||||||
|
|
||||||
|
# Set the starting and ending point of the normalization bounding box
|
||||||
|
starting_x, starting_y = min_x - delta_x, min_y - delta_y
|
||||||
|
ending_x, ending_y = max_x + delta_x, max_y + delta_y
|
||||||
|
|
||||||
|
# calculate the center of the bounding box and the bounding box dimensions
|
||||||
|
bbox_center_x, bbox_center_y = (starting_x + ending_x) / 2, (starting_y + ending_y) / 2
|
||||||
|
bbox_width, bbox_height = starting_x - ending_x, starting_y - ending_y
|
||||||
|
|
||||||
|
# repeat the center coordinates and bounding box dimensions to match the shape of hand_coords
|
||||||
|
center_x, center_y = center_x.reshape(-1, 1, 1), center_y.reshape(-1, 1, 1)
|
||||||
|
center_coords = np.concatenate((np.tile(bbox_center_x, (1, 21, 1)), np.tile(bbox_center_y, (1, 21, 1))), axis=2)
|
||||||
|
|
||||||
|
bbox_width, bbox_height = bbox_width.reshape(-1, 1, 1), bbox_height.reshape(-1, 1 ,1)
|
||||||
|
bbox_dims = np.concatenate((np.tile(bbox_width, (1, 21, 1)), np.tile(bbox_height, (1, 21, 1))), axis=2)
|
||||||
|
|
||||||
if np.any(bbox_dims == 0):
|
if np.any(bbox_dims == 0):
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|||||||
17
src/train.py
17
src/train.py
@@ -13,12 +13,12 @@ import torch.optim as optim
|
|||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torchvision import transforms
|
from torchvision import transforms
|
||||||
|
|
||||||
from augmentations import MirrorKeypoints
|
from src.augmentations import MirrorKeypoints
|
||||||
from datasets.finger_spelling_dataset import FingerSpellingDataset
|
from src.datasets.finger_spelling_dataset import FingerSpellingDataset
|
||||||
from datasets.wlasl_dataset import WLASLDataset
|
from src.datasets.wlasl_dataset import WLASLDataset
|
||||||
from identifiers import LANDMARKS
|
from src.identifiers import LANDMARKS
|
||||||
from keypoint_extractor import KeypointExtractor
|
from src.keypoint_extractor import KeypointExtractor
|
||||||
from model import SPOTER
|
from src.model import SPOTER
|
||||||
|
|
||||||
|
|
||||||
def train():
|
def train():
|
||||||
@@ -82,9 +82,6 @@ def train():
|
|||||||
pred_correct += 1
|
pred_correct += 1
|
||||||
pred_all += 1
|
pred_all += 1
|
||||||
|
|
||||||
# if i % 100 == 0:
|
|
||||||
# print(f"Epoch: {epoch} | Batch: {i} | Loss: {running_loss.item()} | Train Acc: {(pred_correct / pred_all)}")
|
|
||||||
|
|
||||||
if scheduler:
|
if scheduler:
|
||||||
scheduler.step(running_loss.item() / len(train_loader))
|
scheduler.step(running_loss.item() / len(train_loader))
|
||||||
|
|
||||||
@@ -107,7 +104,7 @@ def train():
|
|||||||
|
|
||||||
|
|
||||||
# save checkpoint
|
# save checkpoint
|
||||||
if val_acc > top_val_acc:
|
if val_acc > top_val_acc and epoch > 55:
|
||||||
top_val_acc = val_acc
|
top_val_acc = val_acc
|
||||||
top_train_acc = train_acc
|
top_train_acc = train_acc
|
||||||
checkpoint_index = epoch
|
checkpoint_index = epoch
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
129
webcam_view.py
Normal file
129
webcam_view.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
import cv2
|
||||||
|
import mediapipe as mp
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.identifiers import LANDMARKS
|
||||||
|
from src.model import SPOTER
|
||||||
|
|
||||||
|
# Initialize MediaPipe Hands model
|
||||||
|
holistic = mp.solutions.holistic.Holistic(
|
||||||
|
min_detection_confidence=0.5,
|
||||||
|
min_tracking_confidence=0.5,
|
||||||
|
model_complexity=2
|
||||||
|
)
|
||||||
|
mp_holistic = mp.solutions.holistic
|
||||||
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
# Initialize video capture object
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
|
||||||
|
keypoints = []
|
||||||
|
|
||||||
|
spoter_model = SPOTER(num_classes=5, hidden_dim=len(LANDMARKS) *2)
|
||||||
|
spoter_model.load_state_dict(torch.load('models/spoter_56.pth'))
|
||||||
|
|
||||||
|
m = {
|
||||||
|
0: "A",
|
||||||
|
1: "B",
|
||||||
|
2: "C",
|
||||||
|
3: "D",
|
||||||
|
4: "E"
|
||||||
|
}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Read a frame from the webcam
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Convert the frame to RGB
|
||||||
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
|
|
||||||
|
# Detect hand landmarks in the frame
|
||||||
|
results = holistic.process(frame)
|
||||||
|
|
||||||
|
def extract_keypoints(landmarks):
|
||||||
|
if landmarks:
|
||||||
|
return [i for landmark in landmarks.landmark for i in [landmark.x, landmark.y]]
|
||||||
|
|
||||||
|
k1 = extract_keypoints(results.pose_landmarks)
|
||||||
|
k2 = extract_keypoints(results.left_hand_landmarks)
|
||||||
|
k3 = extract_keypoints(results.right_hand_landmarks)
|
||||||
|
|
||||||
|
if k1 and (k2 or k3):
|
||||||
|
data = np.array([k1 + (k2 or [0] * 42) + (k3 or [0] * 42)])
|
||||||
|
|
||||||
|
def normalize_hand(frame, data, hand):
|
||||||
|
hand_columns = np.array([i for i in range(66 + (42 if hand == "right_hand" else 0), 108 + (42 if hand == "right_hand" else 0))])
|
||||||
|
hand_data = np.array(data[0])[hand_columns]
|
||||||
|
|
||||||
|
hand_data = hand_data.reshape(21, 2)
|
||||||
|
|
||||||
|
|
||||||
|
min_x, min_y = np.min(hand_data[:, 0]), np.min(hand_data[:, 1])
|
||||||
|
max_x, max_y = np.max(hand_data[:, 0]), np.max(hand_data[:, 1])
|
||||||
|
|
||||||
|
center_x, center_y = (min_x + max_x) / 2, (min_y + max_y) / 2
|
||||||
|
|
||||||
|
bbox_width, bbox_height = max_x - min_x, max_y - min_y
|
||||||
|
|
||||||
|
if bbox_height == 0 or bbox_width == 0:
|
||||||
|
return data, frame
|
||||||
|
|
||||||
|
center_coords = np.tile(np.array([center_x, center_y]), (21, 1)).reshape(21, 2)
|
||||||
|
|
||||||
|
hand_data = (hand_data - center_coords) / np.tile(np.array([bbox_width, bbox_height]), (21, 1)).reshape(21, 2)
|
||||||
|
|
||||||
|
# add bouding box to frame
|
||||||
|
frame = cv2.rectangle(frame, (int(min_x * frame.shape[1]), int(min_y * frame.shape[0])), (int(max_x * frame.shape[1]), int(max_y * frame.shape[0])), (0, 255, 0), 2)
|
||||||
|
|
||||||
|
data[:, hand_columns] = hand_data.reshape(-1, 42)
|
||||||
|
return data, frame
|
||||||
|
|
||||||
|
data, frame = normalize_hand(frame, data, "left_hand")
|
||||||
|
data, frame = normalize_hand(frame, data, "right_hand")
|
||||||
|
|
||||||
|
# get values of the landmarks as a list of integers
|
||||||
|
values = []
|
||||||
|
for i in LANDMARKS.values():
|
||||||
|
values.append(i*2)
|
||||||
|
values.append(i*2+1)
|
||||||
|
filtered = np.array(data[0])[np.array(values)]
|
||||||
|
|
||||||
|
while len(keypoints) >= 8:
|
||||||
|
keypoints.pop(0)
|
||||||
|
keypoints.append(filtered)
|
||||||
|
|
||||||
|
if len(keypoints) == 8:
|
||||||
|
# keypoints to tensor
|
||||||
|
keypoints_tensor = torch.tensor(keypoints).float()
|
||||||
|
|
||||||
|
# predict
|
||||||
|
outputs = spoter_model(keypoints_tensor).expand(1, -1, -1)
|
||||||
|
|
||||||
|
# softmax
|
||||||
|
outputs = torch.nn.functional.softmax(outputs, dim=2)
|
||||||
|
|
||||||
|
# get topk predictions
|
||||||
|
topk = torch.topk(outputs, k=3, dim=2)
|
||||||
|
|
||||||
|
# show overlay on frame at top right with confidence scores of topk predictions
|
||||||
|
for i, (label, score) in enumerate(zip(topk.indices[0][0], topk.values[0][0])):
|
||||||
|
cv2.putText(frame, f"{m[label.item()]} {score.item():.2f}", (frame.shape[1] - 200, 50 + i * 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||||
|
|
||||||
|
|
||||||
|
mp_drawing.draw_landmarks(frame, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||||
|
mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||||
|
mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
||||||
|
|
||||||
|
# Show the frame
|
||||||
|
cv2.imshow('MediaPipe Hands', frame)
|
||||||
|
|
||||||
|
# Wait for key press to exit
|
||||||
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Release the video capture object and destroy the windows
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
Reference in New Issue
Block a user