406 lines
12 KiB
Plaintext
406 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"sys.path.insert(0,'..')\n",
|
|
"from src.keypoint_extractor import KeypointExtractor\n",
|
|
"\n",
|
|
"# reload modules\n",
|
|
"%load_ext autoreload"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"video_name = '../data/basics/data/GOEDENACHT!test!158_20230323093702582261_6FB7Q.mp4'\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# extract keypoints\n",
|
|
"keypoint_extractor = KeypointExtractor()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"from IPython.display import HTML\n",
|
|
"from base64 import b64encode\n",
|
|
"import mediapy as media\n",
|
|
"%matplotlib inline\n",
|
|
"\n",
|
|
"# Define the frames per second (fps) and duration of the video\n",
|
|
"fps = 25\n",
|
|
"duration = 10\n",
|
|
"\n",
|
|
"# Create a dummy video of random noise\n",
|
|
"_, video_frames = keypoint_extractor.extract_keypoints_from_video(video_name, normalize=\"minmax\", draw=True)\n",
|
|
"\n",
|
|
"# Convert the video to a numpy array\n",
|
|
"video = np.array(video_frames)\n",
|
|
"media.show_video(video, height=400, codec='gif', fps=4)\n",
|
|
"\n",
|
|
"# write the video to a file\n",
|
|
"media.write_video('test.mp4',video, fps=10)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from src.model import SPOTER\n",
|
|
"from src.identifiers import LANDMARKS\n",
|
|
"import torch\n",
|
|
"\n",
|
|
"spoter_model = SPOTER(num_classes=5, hidden_dim=len(LANDMARKS) *2)\n",
|
|
"spoter_model.load_state_dict(torch.load('models/spoter_40.pth'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# get average number of frames in test set\n",
|
|
"from src.keypoint_extractor import KeypointExtractor\n",
|
|
"from src.datasets.finger_spelling_dataset import FingerSpellingDataset\n",
|
|
"from src.identifiers import LANDMARKS\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n",
|
|
"test_set = FingerSpellingDataset(\"../data/fingerspelling/data/\", keypoints_extractor, keypoints_identifier=LANDMARKS, subset=\"val\")\n",
|
|
"\n",
|
|
"frames = []\n",
|
|
"labels = []\n",
|
|
"for sample, label in test_set:\n",
|
|
" frames.append(sample.shape[0])\n",
|
|
" labels.append(label)\n",
|
|
"\n",
|
|
"print(np.mean(frames))\n",
|
|
"# get label frequency in the labels list\n",
|
|
"from collections import Counter\n",
|
|
"\n",
|
|
"counter = Counter(labels)\n",
|
|
"print(counter)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Hand keypoint visualization"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"def plot_hand_keypoints(dataframe, hand, frame, ret=False):\n",
|
|
" plt.clf()\n",
|
|
" hand_columns = np.array([i for i in range(66 + (42 if hand == \"right\" else 0), 108 + (42 if hand == \"right\" else 0))])\n",
|
|
" \n",
|
|
" # get the x, y coordinates of the hand keypoints\n",
|
|
" frame_df = dataframe.iloc[frame:frame+1, hand_columns]\n",
|
|
" hand_coords = frame_df.values.reshape(21, 2)\n",
|
|
" \n",
|
|
" x_coords = hand_coords[:, ::2] #Even indices\n",
|
|
" y_coords = -hand_coords[:, 1::2] #Uneven indices (negative because pixels start from the top left)\n",
|
|
" if ret:\n",
|
|
" return plt.scatter(x_coords, y_coords, c='b')\n",
|
|
" #Plot the keypoints\n",
|
|
" plt.scatter(x_coords, y_coords)\n",
|
|
" return frame_df.style"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Set video, hand and frame to display\n",
|
|
"video_name = 'A!1_20230301111632818161_I3EC3.mp4'\n",
|
|
"hand = \"right\"\n",
|
|
"frame = 5\n",
|
|
"%reload_ext autoreload"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from src.keypoint_extractor import KeypointExtractor\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"#Extract keypoints from requested video\n",
|
|
"keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n",
|
|
"\n",
|
|
"#Plot the hand keypoints\n",
|
|
"df = keypoints_extractor.extract_keypoints_from_video(video_name)\n",
|
|
"df.head()\n",
|
|
"plot_hand_keypoints(df, hand, frame)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Plot the NORMALIZED hand keypoints (using minxmax)\n",
|
|
"df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"minmax\")\n",
|
|
"plt.xlim(-0.5,0.5)\n",
|
|
"plt.ylim(-0.5,0.5)\n",
|
|
"plot_hand_keypoints(df, hand, frame)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Plot the NORMALIZED hand keypoints (using bohacek)\n",
|
|
"df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n",
|
|
"plt.xlim(-0.5,0.5)\n",
|
|
"plt.ylim(-0.5,0.5)\n",
|
|
"plot_hand_keypoints(df, hand, frame)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Pose keypoint visualization"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"import sys\n",
|
|
"sys.path.insert(0,'..')\n",
|
|
"from src.augmentations import Z_augmentation\n",
|
|
"from src.keypoint_extractor import KeypointExtractor\n",
|
|
"\n",
|
|
"\n",
|
|
"def plot_pose_keypoints(dataframe, frame, ret=False, aug=False):\n",
|
|
" # clear plt\n",
|
|
" plt.clf()\n",
|
|
" pose_columns = np.array([i for i in range(34)])\n",
|
|
"\n",
|
|
" # get the x, y coordinates of the relevant pose keypoints\n",
|
|
" frame_df = dataframe.iloc[frame:frame+1, pose_columns]\n",
|
|
" pose_coords = frame_df.values.reshape(34, 1)\n",
|
|
"\n",
|
|
" if aug:\n",
|
|
" pose_coords = Z_augmentation()(pose_coords, hand_side=\"right\")\n",
|
|
" pose_coords = pose_coords.reshape(17, 2)\n",
|
|
"\n",
|
|
" x_coords = pose_coords[:, ::2] #Even indices\n",
|
|
" y_coords = -pose_coords[:, 1::2] #Uneven indices (negative because pixels start from the top left)\n",
|
|
"\n",
|
|
" # return a scatter plot of the pose keypoints\n",
|
|
" if ret:\n",
|
|
" plt.xlim(-0.5,0.5)\n",
|
|
" plt.ylim(-0.5,0.5)\n",
|
|
" return plt.scatter(x_coords, y_coords, c='r')\n",
|
|
"\n",
|
|
" plt.xlim(-0.5,0.5)\n",
|
|
" plt.ylim(-0.5,0.5)\n",
|
|
" plt.scatter(x_coords, y_coords)\n",
|
|
" return frame_df.style\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Set video, hand and frame to display\n",
|
|
"video_name = 'Z!26_20230313174809489724_CGG2F.mp4'\n",
|
|
"frame = 2\n",
|
|
"%reload_ext autoreload"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from src.keypoint_extractor import KeypointExtractor\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"#Extract keypoints from requested video\n",
|
|
"keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n",
|
|
"\n",
|
|
"#Plot the hand keypoints\n",
|
|
"df = keypoints_extractor.extract_keypoints_from_video(video_name)\n",
|
|
"df.head()\n",
|
|
"plot_pose_keypoints(df, frame)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"norm_df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n",
|
|
"plt.xlim(-0.5,0.5)\n",
|
|
"plt.ylim(-0.5,0.5)\n",
|
|
"plot_pose_keypoints(norm_df, frame)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"norm_df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n",
|
|
"\n",
|
|
"# create animation of the keypoints over time\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import matplotlib.animation as animation\n",
|
|
"from IPython.display import HTML\n",
|
|
"\n",
|
|
"fig = plt.figure()\n",
|
|
"plt.xlim(-0.5, 0.5)\n",
|
|
"plt.ylim(-0.5, 0.5)\n",
|
|
"\n",
|
|
"def init():\n",
|
|
" return plot_pose_keypoints(norm_df, 0, ret=True)\n",
|
|
"\n",
|
|
"def animate(i):\n",
|
|
" return plot_pose_keypoints(norm_df, i, ret=True, aug=True)\n",
|
|
"# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n",
|
|
"anim = animation.FuncAnimation(fig, animate, init_func=init, frames=norm_df.shape[0], interval=1000, repeat=False, )\n",
|
|
"HTML(anim.to_html5_video())\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fig = plt.figure()\n",
|
|
"ax = plt.axes(xlim=(-0.5, 0.5), ylim=(-0.5, 0.5))\n",
|
|
"ax.set_aspect('equal')\n",
|
|
"ax.set_title(\"Right Hand\")\n",
|
|
"ax.set_xlabel(\"x\")\n",
|
|
"ax.set_ylabel(\"y\")\n",
|
|
"\n",
|
|
"def init_hand():\n",
|
|
" return plot_hand_keypoints(norm_df, \"right\", 0, ret=True)\n",
|
|
"\n",
|
|
"def animate_hand(i):\n",
|
|
" return plot_hand_keypoints(norm_df, \"right\", i, ret=True)\n",
|
|
"\n",
|
|
"# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n",
|
|
"anim = animation.FuncAnimation(fig, animate_hand, init_func=init_hand, frames=norm_df.shape[0], interval=1000, repeat=False, )\n",
|
|
"HTML(anim.to_html5_video())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"fig = plt.figure()\n",
|
|
"ax = plt.axes(xlim=(-0.5, 0.5), ylim=(-0.5, 0.5))\n",
|
|
"ax.set_aspect('equal')\n",
|
|
"ax.set_title(\"Left Hand\")\n",
|
|
"ax.set_xlabel(\"x\")\n",
|
|
"ax.set_ylabel(\"y\")\n",
|
|
"\n",
|
|
"def init_hand():\n",
|
|
" return plot_hand_keypoints(norm_df, \"left\", 0, ret=True)\n",
|
|
"\n",
|
|
"def animate_hand(i):\n",
|
|
" return plot_hand_keypoints(norm_df, \"left\", i, ret=True)\n",
|
|
"\n",
|
|
"# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n",
|
|
"anim = animation.FuncAnimation(fig, animate_hand, init_func=init_hand, frames=norm_df.shape[0], interval=1000, repeat=False, )\n",
|
|
"HTML(anim.to_html5_video())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.13"
|
|
},
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|