Files
sign-predictor/visualize_data.ipynb
2023-03-05 16:34:38 +00:00

201 lines
5.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from src.keypoint_extractor import KeypointExtractor\n",
"\n",
"# reload modules\n",
"%load_ext autoreload"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"video_name = 'A_robbe.mp4' "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# extract keypoints\n",
"keypoint_extractor = KeypointExtractor('data/fingerspelling/data/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from IPython.display import HTML\n",
"from base64 import b64encode\n",
"import mediapy as media\n",
"%matplotlib inline\n",
"\n",
"# Define the frames per second (fps) and duration of the video\n",
"fps = 25\n",
"duration = 10\n",
"\n",
"# Create a dummy video of random noise\n",
"_, video_frames = keypoint_extractor.extract_keypoints_from_video(video_name, draw=True)\n",
"\n",
"# Convert the video to a numpy array\n",
"video = np.array(video_frames)\n",
"media.show_video(video, height=400, codec='gif', fps=4)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from src.model import SPOTER\n",
"from src.identifiers import LANDMARKS\n",
"import torch\n",
"\n",
"spoter_model = SPOTER(num_classes=5, hidden_dim=len(LANDMARKS) *2)\n",
"spoter_model.load_state_dict(torch.load('models/spoter_40.pth'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get average number of frames in test set\n",
"from src.keypoint_extractor import KeypointExtractor\n",
"from src.datasets.finger_spelling_dataset import FingerSpellingDataset\n",
"from src.identifiers import LANDMARKS\n",
"import numpy as np\n",
"\n",
"keypoints_extractor = KeypointExtractor(\"data/fingerspelling/data/\")\n",
"test_set = FingerSpellingDataset(\"data/fingerspelling/data/\", keypoints_extractor, keypoints_identifier=LANDMARKS, subset=\"val\")\n",
"\n",
"frames = []\n",
"labels = []\n",
"for sample, label in test_set:\n",
" frames.append(sample.shape[0])\n",
" labels.append(label)\n",
"\n",
"print(np.mean(frames))\n",
"# get label frequency in the labels list\n",
"from collections import Counter\n",
"\n",
"counter = Counter(labels)\n",
"print(counter)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hand keypoint visualization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"def plot_hand_keypoints(dataframe, hand, frame):\n",
" hand_columns = np.array([i for i in range(66 + (42 if hand == \"right\" else 0), 108 + (42 if hand == \"right\" else 0))])\n",
" \n",
" # get the x, y coordinates of the hand keypoints\n",
" frame_df = dataframe.iloc[frame:frame+1, hand_columns]\n",
" hand_coords = frame_df.values.reshape(21, 2)\n",
" \n",
" x_coords = hand_coords[:, ::2] #Even indices\n",
" y_coords = hand_coords[:, 1::2] #Uneven indices\n",
" \n",
" #Plot the keypoints\n",
" plt.scatter(x_coords, y_coords)\n",
" return frame_df.style"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Set video, hand and frame to display\n",
"video_name = 'A_victor.mp4'\n",
"hand = \"right\"\n",
"frame = 1\n",
"%reload_ext autoreload"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from src.keypoint_extractor import KeypointExtractor\n",
"import numpy as np\n",
"\n",
"#Extract keypoints from requested video\n",
"keypoints_extractor = KeypointExtractor(\"data/fingerspelling/data/\")\n",
"\n",
"\n",
"#Plot the hand keypoints\n",
"df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=False)\n",
"plot_hand_keypoints(df, hand, frame)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Plot the NORMALIZED hand keypoints\n",
"df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=True)\n",
"plot_hand_keypoints(df, hand, frame)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}