{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.insert(0,'..')\n", "from src.keypoint_extractor import KeypointExtractor\n", "\n", "# reload modules\n", "%load_ext autoreload" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "video_name = '../data/basics/data/GOEDENACHT!test!158_20230323093702582261_6FB7Q.mp4'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# extract keypoints\n", "keypoint_extractor = KeypointExtractor()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "import mediapy as media\n", "%matplotlib inline\n", "\n", "# Define the frames per second (fps) and duration of the video\n", "fps = 25\n", "duration = 10\n", "\n", "# Create a dummy video of random noise\n", "_, video_frames = keypoint_extractor.extract_keypoints_from_video(video_name, normalize=\"minmax\", draw=True)\n", "\n", "# Convert the video to a numpy array\n", "video = np.array(video_frames)\n", "media.show_video(video, height=400, codec='gif', fps=4)\n", "\n", "# write the video to a file\n", "media.write_video('test.mp4',video, fps=10)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from src.model import SPOTER\n", "from src.identifiers import LANDMARKS\n", "import torch\n", "\n", "spoter_model = SPOTER(num_classes=5, hidden_dim=len(LANDMARKS) *2)\n", "spoter_model.load_state_dict(torch.load('models/spoter_40.pth'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get average number of frames in test set\n", "from src.keypoint_extractor import KeypointExtractor\n", "from src.datasets.finger_spelling_dataset import FingerSpellingDataset\n", "from src.identifiers import LANDMARKS\n", "import numpy as np\n", "\n", "keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n", "test_set = FingerSpellingDataset(\"../data/fingerspelling/data/\", keypoints_extractor, keypoints_identifier=LANDMARKS, subset=\"val\")\n", "\n", "frames = []\n", "labels = []\n", "for sample, label in test_set:\n", " frames.append(sample.shape[0])\n", " labels.append(label)\n", "\n", "print(np.mean(frames))\n", "# get label frequency in the labels list\n", "from collections import Counter\n", "\n", "counter = Counter(labels)\n", "print(counter)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Hand keypoint visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "def plot_hand_keypoints(dataframe, hand, frame, ret=False):\n", " plt.clf()\n", " hand_columns = np.array([i for i in range(66 + (42 if hand == \"right\" else 0), 108 + (42 if hand == \"right\" else 0))])\n", " \n", " # get the x, y coordinates of the hand keypoints\n", " frame_df = dataframe.iloc[frame:frame+1, hand_columns]\n", " hand_coords = frame_df.values.reshape(21, 2)\n", " \n", " x_coords = hand_coords[:, ::2] #Even indices\n", " y_coords = -hand_coords[:, 1::2] #Uneven indices (negative because pixels start from the top left)\n", " if ret:\n", " return plt.scatter(x_coords, y_coords, c='b')\n", " #Plot the keypoints\n", " plt.scatter(x_coords, y_coords)\n", " return frame_df.style" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Set video, hand and frame to display\n", "video_name = 'A!1_20230301111632818161_I3EC3.mp4'\n", "hand = \"right\"\n", "frame = 5\n", "%reload_ext autoreload" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from src.keypoint_extractor import KeypointExtractor\n", "import numpy as np\n", "\n", "#Extract keypoints from requested video\n", "keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n", "\n", "#Plot the hand keypoints\n", "df = keypoints_extractor.extract_keypoints_from_video(video_name)\n", "df.head()\n", "plot_hand_keypoints(df, hand, frame)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Plot the NORMALIZED hand keypoints (using minxmax)\n", "df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"minmax\")\n", "plt.xlim(-0.5,0.5)\n", "plt.ylim(-0.5,0.5)\n", "plot_hand_keypoints(df, hand, frame)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Plot the NORMALIZED hand keypoints (using bohacek)\n", "df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n", "plt.xlim(-0.5,0.5)\n", "plt.ylim(-0.5,0.5)\n", "plot_hand_keypoints(df, hand, frame)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pose keypoint visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.insert(0,'..')\n", "from src.augmentations import Z_augmentation\n", "from src.keypoint_extractor import KeypointExtractor\n", "\n", "\n", "def plot_pose_keypoints(dataframe, frame, ret=False, aug=False):\n", " # clear plt\n", " plt.clf()\n", " pose_columns = np.array([i for i in range(34)])\n", "\n", " # get the x, y coordinates of the relevant pose keypoints\n", " frame_df = dataframe.iloc[frame:frame+1, pose_columns]\n", " pose_coords = frame_df.values.reshape(34, 1)\n", "\n", " if aug:\n", " pose_coords = Z_augmentation()(pose_coords, hand_side=\"right\")\n", " pose_coords = pose_coords.reshape(17, 2)\n", "\n", " x_coords = pose_coords[:, ::2] #Even indices\n", " y_coords = -pose_coords[:, 1::2] #Uneven indices (negative because pixels start from the top left)\n", "\n", " # return a scatter plot of the pose keypoints\n", " if ret:\n", " plt.xlim(-0.5,0.5)\n", " plt.ylim(-0.5,0.5)\n", " return plt.scatter(x_coords, y_coords, c='r')\n", "\n", " plt.xlim(-0.5,0.5)\n", " plt.ylim(-0.5,0.5)\n", " plt.scatter(x_coords, y_coords)\n", " return frame_df.style\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Set video, hand and frame to display\n", "video_name = 'Z!26_20230313174809489724_CGG2F.mp4'\n", "frame = 2\n", "%reload_ext autoreload" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from src.keypoint_extractor import KeypointExtractor\n", "import numpy as np\n", "\n", "#Extract keypoints from requested video\n", "keypoints_extractor = KeypointExtractor(\"../data/fingerspelling/data/\")\n", "\n", "#Plot the hand keypoints\n", "df = keypoints_extractor.extract_keypoints_from_video(video_name)\n", "df.head()\n", "plot_pose_keypoints(df, frame)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "norm_df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n", "plt.xlim(-0.5,0.5)\n", "plt.ylim(-0.5,0.5)\n", "plot_pose_keypoints(norm_df, frame)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "norm_df = keypoints_extractor.extract_keypoints_from_video(video_name, normalize=\"bohacek\")\n", "\n", "# create animation of the keypoints over time\n", "import matplotlib.pyplot as plt\n", "import matplotlib.animation as animation\n", "from IPython.display import HTML\n", "\n", "fig = plt.figure()\n", "plt.xlim(-0.5, 0.5)\n", "plt.ylim(-0.5, 0.5)\n", "\n", "def init():\n", " return plot_pose_keypoints(norm_df, 0, ret=True)\n", "\n", "def animate(i):\n", " return plot_pose_keypoints(norm_df, i, ret=True, aug=True)\n", "# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n", "anim = animation.FuncAnimation(fig, animate, init_func=init, frames=norm_df.shape[0], interval=1000, repeat=False, )\n", "HTML(anim.to_html5_video())\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = plt.figure()\n", "ax = plt.axes(xlim=(-0.5, 0.5), ylim=(-0.5, 0.5))\n", "ax.set_aspect('equal')\n", "ax.set_title(\"Right Hand\")\n", "ax.set_xlabel(\"x\")\n", "ax.set_ylabel(\"y\")\n", "\n", "def init_hand():\n", " return plot_hand_keypoints(norm_df, \"right\", 0, ret=True)\n", "\n", "def animate_hand(i):\n", " return plot_hand_keypoints(norm_df, \"right\", i, ret=True)\n", "\n", "# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n", "anim = animation.FuncAnimation(fig, animate_hand, init_func=init_hand, frames=norm_df.shape[0], interval=1000, repeat=False, )\n", "HTML(anim.to_html5_video())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "fig = plt.figure()\n", "ax = plt.axes(xlim=(-0.5, 0.5), ylim=(-0.5, 0.5))\n", "ax.set_aspect('equal')\n", "ax.set_title(\"Left Hand\")\n", "ax.set_xlabel(\"x\")\n", "ax.set_ylabel(\"y\")\n", "\n", "def init_hand():\n", " return plot_hand_keypoints(norm_df, \"left\", 0, ret=True)\n", "\n", "def animate_hand(i):\n", " return plot_hand_keypoints(norm_df, \"left\", i, ret=True)\n", "\n", "# show animation of the keypoints over time, show one frame every 1000ms and remove the other frames\n", "anim = animation.FuncAnimation(fig, animate_hand, init_func=init_hand, frames=norm_df.shape[0], interval=1000, repeat=False, )\n", "HTML(anim.to_html5_video())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 2 }