Resolve WES-90 "Integrate signpredictor in courses"

This commit is contained in:
Louis Adriaens
2023-03-18 19:53:17 +00:00
committed by Jerome Coudron
parent 1a75791d62
commit 746906294b
463 changed files with 99422 additions and 1187 deletions

View File

@@ -0,0 +1,147 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Linq;
public class ModelInfo
{
public List<int> pose_landmarks;
public List<int> hand_landmarks;
}
public class KeypointManager
{
private ModelInfo model_info;
private List<List<float>> keypoints_buffer;
public KeypointManager()
{
TextAsset model_info_json = Resources.Load<TextAsset>("Models/FingerSpelling/landmarks");
this.model_info = JsonUtility.FromJson<ModelInfo>(model_info_json.text);
this.keypoints_buffer = new List<List<float>>();
}
private (List<float>, List<float>) normalizeHand(List<float> hand_x, List<float> hand_y)
{
float min_x = hand_x.Min();
float min_y = hand_y.Min();
float max_x = hand_x.Max();
float max_y = hand_y.Max();
float width = max_x - min_x;
float height = max_y - min_y;
if (width == 0 || height == 0)
{
return (hand_x, hand_y);
}
float center_x = (min_x + max_x) / 2;
float center_y = (min_y + max_y) / 2;
List<float> normalized_x = new List<float>();
List<float> normalized_y = new List<float>();
for (int i = 0; i < hand_x.Count; i++)
{
normalized_x.Add((hand_x[i] - center_x) / width);
normalized_y.Add((hand_y[i] - center_y) / height);
}
return (normalized_x, normalized_y);
}
public void addLandmarks(Mediapipe.NormalizedLandmarkList poseLandmarks, Mediapipe.NormalizedLandmarkList leftHandLandmarks, Mediapipe.NormalizedLandmarkList rightHandLandmarks)
{
List<float> pose_x = new List<float>();
List<float> pose_y = new List<float>();
List<float> left_hand_x = new List<float>();
List<float> left_hand_y = new List<float>();
List<float> right_hand_x = new List<float>();
List<float> right_hand_y = new List<float>();
if (poseLandmarks != null)
{
foreach (var landmark_index in model_info.pose_landmarks)
{
pose_x.Add(poseLandmarks.Landmark[landmark_index].X);
pose_y.Add(poseLandmarks.Landmark[landmark_index].Y);
}
}
else
{
foreach (var landmark_index in model_info.pose_landmarks)
{
pose_x.Add(0);
pose_y.Add(0);
}
}
foreach (var landmark_index in model_info.hand_landmarks)
{
if (leftHandLandmarks == null)
{
left_hand_x.Add(0);
left_hand_y.Add(0);
}
else
{
left_hand_x.Add(leftHandLandmarks.Landmark[landmark_index].X);
left_hand_y.Add(leftHandLandmarks.Landmark[landmark_index].Y);
}
if (rightHandLandmarks == null)
{
right_hand_x.Add(0);
right_hand_y.Add(0);
}
else
{
right_hand_x.Add(rightHandLandmarks.Landmark[landmark_index].X);
right_hand_y.Add(rightHandLandmarks.Landmark[landmark_index].Y);
}
}
// TODO: Add normalization
(left_hand_x, left_hand_y) = normalizeHand(left_hand_x, left_hand_y);
(right_hand_x, right_hand_y) = normalizeHand(right_hand_x, right_hand_y);
if (keypoints_buffer.Count >= 10)
{
keypoints_buffer.RemoveAt(0);
}
List<float> keypoints = new List<float>();
for (int i = 0; i < pose_x.Count; i++)
{
keypoints.Add(pose_x[i]);
keypoints.Add(pose_y[i]);
}
for (int i = 0; i < left_hand_x.Count; i++)
{
keypoints.Add(left_hand_x[i]);
keypoints.Add(left_hand_y[i]);
}
for (int i = 0; i < right_hand_x.Count; i++)
{
keypoints.Add(right_hand_x[i]);
keypoints.Add(right_hand_y[i]);
}
keypoints_buffer.Add(keypoints);
}
public List<List<float>> getAllKeypoints()
{
if (keypoints_buffer.Count < 10)
{
return null;
}
return keypoints_buffer;
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 40ff941e1b34847bdb160c6950f35aec
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,16 @@
{
"name": "MediaPipeUnityScripts",
"rootNamespace": "",
"references": [
"GUID:04c4d86a70aa56c55a78c61f1ab1a56d"
],
"includePlatforms": [],
"excludePlatforms": [],
"allowUnsafeCode": false,
"overrideReferences": false,
"precompiledReferences": [],
"autoReferenced": true,
"defineConstraints": [],
"versionDefines": [],
"noEngineReferences": false
}

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: edc93f477bb73a743a97d6882ed330b3
AssemblyDefinitionImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 1c318b35315a4ef44be1df6f27b8b582
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -154,14 +154,14 @@ RectTransform:
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 1475592761}
- {fileID: 2014139443}
m_Father: {fileID: 884590458}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 100, y: 100}
m_SizeDelta: {x: 1450.822, y: 920.907}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &560904346
MonoBehaviour:
@@ -325,10 +325,8 @@ MonoBehaviour:
m_EditorClassIdentifier:
_configAsset: {fileID: 4900000, guid: 6288c43cdca97374782dac1ea87aa029, type: 3}
_screen: {fileID: 560904347}
_width: 640
_height: 480
_fps: 30
_poseLandmarkListAnnotationController: {fileID: 1475592763}
_poseLandmarkListAnnotationController: {fileID: 0}
_letter: {fileID: 2014139444}
--- !u!1 &884590454
GameObject:
m_ObjectHideFlags: 0
@@ -405,7 +403,7 @@ Canvas:
m_OverrideSorting: 0
m_OverridePixelPerfect: 0
m_SortingBucketNormalizedSize: 0
m_AdditionalShaderChannelsFlag: 0
m_AdditionalShaderChannelsFlag: 25
m_SortingLayerID: 0
m_SortingOrder: 0
m_TargetDisplay: 0
@@ -430,115 +428,6 @@ RectTransform:
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0, y: 0}
--- !u!1001 &937709944
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 1475592761}
m_Modifications:
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_RootOrder
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalPosition.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalPosition.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalPosition.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalRotation.w
value: 1
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalRotation.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalRotation.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalRotation.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1915238444563462411, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
propertyPath: m_Name
value: PoseLandmarkList Annotation
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
--- !u!4 &937709945 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 1915238444563462410, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
m_PrefabInstance: {fileID: 937709944}
m_PrefabAsset: {fileID: 0}
--- !u!1 &1475592760
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 1475592761}
- component: {fileID: 1475592763}
m_Layer: 5
m_Name: AnnotationLayer
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &1475592761
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1475592760}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 937709945}
m_Father: {fileID: 560904345}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1475592763
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1475592760}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 70c2b36b394190968977c6493e60e0af, type: 3}
m_Name:
m_EditorClassIdentifier:
annotation: {fileID: 2100643019}
_visualizeZ: 0
--- !u!1 &1522608646
GameObject:
m_ObjectHideFlags: 0
@@ -717,14 +606,140 @@ Transform:
m_Father: {fileID: 0}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0}
--- !u!114 &2100643019 stripped
MonoBehaviour:
m_CorrespondingSourceObject: {fileID: 1915238444563462421, guid: 4418f6a92856c5b51b58a36e3be7ed5c, type: 3}
m_PrefabInstance: {fileID: 937709944}
--- !u!1 &2014139442
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 2014139443}
- component: {fileID: 2014139445}
- component: {fileID: 2014139444}
m_Layer: 5
m_Name: Text (TMP)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!224 &2014139443
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2014139442}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 560904345}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: 219.5, y: 35.5}
m_SizeDelta: {x: 191.357, y: 150.453}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &2014139444
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2014139442}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 39bac9dd52c31ae7aa01a7383bc44853, type: 3}
m_Script: {fileID: 11500000, guid: f4688fdb7df04437aeb418b961361dc5, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Material: {fileID: 0}
m_Color: {r: 1, g: 1, b: 1, a: 1}
m_RaycastTarget: 1
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
m_Maskable: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_text: '?
'
m_isRightToLeft: 0
m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
m_fontSharedMaterials: []
m_fontMaterial: {fileID: 0}
m_fontMaterials: []
m_fontColor32:
serializedVersion: 2
rgba: 4278225934
m_fontColor: {r: 0.055272277, g: 0.5471698, b: 0, a: 1}
m_enableVertexGradient: 0
m_colorMode: 3
m_fontColorGradient:
topLeft: {r: 1, g: 1, b: 1, a: 1}
topRight: {r: 1, g: 1, b: 1, a: 1}
bottomLeft: {r: 1, g: 1, b: 1, a: 1}
bottomRight: {r: 1, g: 1, b: 1, a: 1}
m_fontColorGradientPreset: {fileID: 0}
m_spriteAsset: {fileID: 0}
m_tintAllSprites: 0
m_StyleSheet: {fileID: 0}
m_TextStyleHashCode: -1183493901
m_overrideHtmlColors: 0
m_faceColor:
serializedVersion: 2
rgba: 4294967295
m_fontSize: 79.46
m_fontSizeBase: 79.46
m_fontWeight: 400
m_enableAutoSizing: 0
m_fontSizeMin: 18
m_fontSizeMax: 72
m_fontStyle: 0
m_HorizontalAlignment: 1
m_VerticalAlignment: 256
m_textAlignment: 65535
m_characterSpacing: 0
m_wordSpacing: 0
m_lineSpacing: 0
m_lineSpacingMax: 0
m_paragraphSpacing: 0
m_charWidthMaxAdj: 0
m_enableWordWrapping: 1
m_wordWrappingRatios: 0.4
m_overflowMode: 0
m_linkedTextComponent: {fileID: 0}
parentLinkedComponent: {fileID: 0}
m_enableKerning: 1
m_enableExtraPadding: 0
checkPaddingRequired: 0
m_isRichText: 1
m_parseCtrlCharacters: 1
m_isOrthographic: 1
m_isCullingEnabled: 0
m_horizontalMapping: 0
m_verticalMapping: 0
m_uvLineOffset: 0
m_geometrySortingOrder: 0
m_IsTextObjectScaleStatic: 0
m_VertexBufferAutoSizeReduction: 0
m_useMaxVisibleDescender: 1
m_pageToDisplay: 1
m_margin: {x: 0, y: 0, z: 0, w: 0}
m_isUsingLegacyAnimationComponent: 0
m_isVolumetricText: 0
m_hasFontAssetChanged: 0
m_baseMaterial: {fileID: 0}
m_maskOffset: {x: 0, y: 0, z: 0, w: 0}
--- !u!222 &2014139445
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2014139442}
m_CullTransparentMesh: 1

View File

@@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 1275314861c48ed40a9f02557c8ca10d
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,19 @@
{
"name": "SignPredictor",
"rootNamespace": "",
"references": [
"GUID:6055be8ebefd69e48b49212b09b47b2f",
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
"GUID:04c4d86a70aa56c55a78c61f1ab1a56d",
"GUID:edc93f477bb73a743a97d6882ed330b3"
],
"includePlatforms": [],
"excludePlatforms": [],
"allowUnsafeCode": false,
"overrideReferences": false,
"precompiledReferences": [],
"autoReferenced": true,
"defineConstraints": [],
"versionDefines": [],
"noEngineReferences": false
}

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: d0b6b39a21908f94fbbd9f2c196a9725
AssemblyDefinitionImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,343 @@
// Copyright (c) 2021 homuler
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.
// ATTENTION!: This code is for a tutorial.
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using TMPro;
using Unity.Barracuda;
using UnityEngine;
using UnityEngine.UI;
using Debug = UnityEngine.Debug;
namespace Mediapipe.Unity.Tutorial
{
public class Wesign_extractor : MonoBehaviour
{
/// <summary>
/// Config file to set up the graph
/// </summary>
[SerializeField] private TextAsset _configAsset;
/// <summary>
/// The screen object on which the video is displayed
/// </summary>
[SerializeField] private RawImage _screen;
/// <summary>
/// MediaPipe graph
/// </summary>
private CalculatorGraph _graph;
/// <summary>
/// Resource manager for graph resources
/// </summary>
private ResourceManager _resourceManager;
/// <summary>
/// Webcam texture
/// </summary>
private WebCamTexture _webCamTexture;
/// <summary>
/// Input texture
/// </summary>
private Texture2D _inputTexture;
/// <summary>
/// Screen pixel data
/// </summary>
private Color32[] _pixelData;
/// <summary>
/// Stopwatch to give a timestamp to video frames
/// </summary>
private Stopwatch _stopwatch;
/// <summary>
/// The mediapipe stream which contains the pose landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
/// <summary>
/// The mediapipe stream which contains the left hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
/// <summary>
/// The mediapipe stream which contains the right hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
/// <summary>
/// create precense stream
/// </summary>
public OutputStream<DetectionVectorPacket, List<Detection>> _presenceStream;
/// <summary>
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
/// </summary>
private KeypointManager k;
/// <summary>
/// The worker on which we schedule the signpredictor model execution
/// </summary>
private IWorker worker;
/// <summary>
/// Width of th webcam
/// </summary>
private int _width;
/// <summary>
/// Height of the webcam
/// </summary>
private int _height;
/// <summary>
/// ?The mediapipe stream which contains the tracked detections
/// </summary>
private const string _TrackedDetectionsStreamName = "tracked_detections";
/// <summary>
/// ?The mediapipe stream which contains the tracked detections
/// </summary>
private OutputStream<DetectionVectorPacket, List<Detection>> _trackedDetectionsStream;
/// <summary>
/// The enumerator of the worker which executes the sign predictor model
/// </summary>
private IEnumerator enumerator;
/// <summary>
/// The prediction of the sign predictor model
/// </summary>
public Dictionary<char, float> letterProbabilities;
/// <summary>
/// Bool indicating whether or not the resource manager has already been initialized
/// </summary>
private static bool resourceManagerIsInitialized = false;
/// <summary>
/// an inputTensor for the sign predictor
/// </summary>
private Tensor inputTensor;
/// <summary>
/// Google Mediapipe setup & run
/// </summary>
/// <returns> IEnumerator </returns>
/// <exception cref="System.Exception"></exception>
private IEnumerator Start()
{
Debug.Log("starting ...");
// Webcam setup
if (WebCamTexture.devices.Length == 0)
{
throw new System.Exception("Web Camera devices are not found");
}
// Start the webcam
WebCamDevice webCamDevice = WebCamTexture.devices[0];
_webCamTexture = new WebCamTexture(webCamDevice.name);
_webCamTexture.Play();
yield return new WaitUntil(() => _webCamTexture.width > 16);
// Set webcam aspect ratio
_width = _webCamTexture.width;
_height = _webCamTexture.height;
float webcamAspect = (float)_webCamTexture.width / (float)_webCamTexture.height;
_screen.rectTransform.sizeDelta = new Vector2(_screen.rectTransform.sizeDelta.y * webcamAspect, (_screen.rectTransform.sizeDelta.y));
_screen.texture = _webCamTexture;
// TODO this method is kinda meh you should use
_inputTexture = new Texture2D(_width, _height, TextureFormat.RGBA32, false);
_pixelData = new Color32[_width * _height];
if (!resourceManagerIsInitialized)
{
_resourceManager = new StreamingAssetsResourceManager();
yield return _resourceManager.PrepareAssetAsync("pose_detection.bytes");
yield return _resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
yield return _resourceManager.PrepareAssetAsync("face_landmark.bytes");
yield return _resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
yield return _resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
yield return _resourceManager.PrepareAssetAsync("hand_recrop.bytes");
yield return _resourceManager.PrepareAssetAsync("handedness.txt");
resourceManagerIsInitialized = true;
}
_stopwatch = new Stopwatch();
// Setting up the graph
_graph = new CalculatorGraph(_configAsset.text);
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "pose_landmarks", "pose_landmarks_presence");
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "left_hand_landmarks", "left_hand_landmarks_presence");
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "right_hand_landmarks", "right_hand_landmarks_presence");
posestream.StartPolling().AssertOk();
leftstream.StartPolling().AssertOk();
rightstream.StartPolling().AssertOk();
_graph.StartRun().AssertOk();
_stopwatch.Start();
k = new KeypointManager();
// check if model exists at path
var model = ModelLoader.Load(Resources.Load<NNModel>("Models/Fingerspelling/model_A-L"));
worker = model.CreateWorker();
StartCoroutine(SignRecognitionCoroutine());
StartCoroutine(MediapipeCoroutine());
}
/// <summary>
/// Coroutine which executes the mediapipe pipeline
/// </summary>
/// <returns></returns>
private IEnumerator MediapipeCoroutine()
{
while (true)
{
_inputTexture.SetPixels32(_webCamTexture.GetPixels32(_pixelData));
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, _width, _height, _width * 4, _inputTexture.GetRawTextureData<byte>());
var currentTimestamp = _stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
_graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
//Debug.Log(Time.timeAsDouble + " Added new packet to mediapipe graph");
yield return new WaitForEndOfFrame();
Mediapipe.NormalizedLandmarkList _poseLandmarks = null;
Mediapipe.NormalizedLandmarkList _leftHandLandmarks = null;
Mediapipe.NormalizedLandmarkList _rightHandLandmarks = null;
//Debug.Log("Extracting keypoints");
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks, false); return true;});
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks, false); return true; });
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks, false); return true; });
//Debug.Log(Time.timeAsDouble + " Retrieved landmarks ");
k.addLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
}
}
/// <summary>
/// Coroutine which calls the sign predictor model
/// </summary>
/// <returns></returns>
private IEnumerator SignRecognitionCoroutine()
{
while (true)
{
List<List<float>> input = k.getAllKeypoints();
if (input != null)
{
//UnityEngine.Debug.Log("input: " + input.Count);
int frameCount = input.Count;
int keypoints_per_frame = input[0].Count;
// Create a tensor with the input
inputTensor = new Tensor(frameCount, keypoints_per_frame);
// Fill the tensor with the input
for (int i = 0; i < frameCount; i++)
{
for (int j = 0; j < keypoints_per_frame; j++)
{
inputTensor[i, j] = input[i][j];
}
}
int stepsPerFrame = 190;
enumerator = worker.StartManualSchedule(inputTensor);
int step = 0;
while (enumerator.MoveNext())
{
if (++step % stepsPerFrame == 0)
{
//Debug.Log(Time.timeAsDouble + " : " + step);
yield return null;
}
}
var output = worker.PeekOutput();
inputTensor.Dispose();
// Get the output as an array
float[] outputArray = output.ToReadOnlyArray();
// Calculate the softmax of the output
float max = outputArray.Max();
float[] softmaxedOutput = outputArray.Select(x => Mathf.Exp(x - max)).ToArray();
float sum = softmaxedOutput.Sum();
float[] softmaxedOutput2 = softmaxedOutput.Select(x => x / sum).ToArray();
// Get the index of the highest probability
int maxIndex = softmaxedOutput2.ToList().IndexOf(softmaxedOutput2.Max());
// Get the letter from the index
char letter = (char)(maxIndex + 65);
float accuracy = (Mathf.RoundToInt(softmaxedOutput2[maxIndex] * 100));
// Set the letterProbabilities, currently used by Courses
letterProbabilities = new Dictionary<char, float>();
for (int i = 0; i < softmaxedOutput2.Length; i++)
{
letterProbabilities.Add((char)(i + 65), softmaxedOutput2[i]);
}
}
else
{
// Wait until next frame
//Debug.Log(Time.timeAsDouble + "No landmarks!");
yield return null;
}
}
}
/// <summary>
/// Propper destruction on the Mediapipegraph
/// </summary>
private void OnDestroy()
{
if (_webCamTexture != null)
{
_webCamTexture.Stop();
}
if (_graph != null)
{
try
{
_graph.CloseInputStream("input_video").AssertOk();
_graph.WaitUntilDone().AssertOk();
}
finally
{
_graph.Dispose();
}
}
// inputTensor must still be disposed, if it exists
inputTensor?.Dispose();
worker.Dispose();
}
}
}

View File

@@ -32,6 +32,10 @@ output_stream: "face_landmarks"
output_stream: "left_hand_landmarks"
output_stream: "right_hand_landmarks"
output_stream: "pose_landmarks_presence"
output_stream: "left_hand_landmarks_presence"
output_stream: "right_hand_landmarks_presence"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
@@ -91,4 +95,22 @@ node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:segmentation_mask_rotated"
output_stream: "IMAGE:segmentation_mask"
}
node {
calculator: "PacketPresenceCalculator"
input_stream: "PACKET:pose_landmarks"
output_stream: "PRESENCE:pose_landmarks_presence"
}
node {
calculator: "PacketPresenceCalculator"
input_stream: "PACKET:left_hand_landmarks"
output_stream: "PRESENCE:left_hand_landmarks_presence"
}
node {
calculator: "PacketPresenceCalculator"
input_stream: "PACKET:right_hand_landmarks"
output_stream: "PRESENCE:right_hand_landmarks_presence"
}

View File

@@ -1,203 +0,0 @@
// Copyright (c) 2021 homuler
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.
// ATTENTION!: This code is for a tutorial.
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using Unity.VisualScripting;
using UnityEngine;
using UnityEngine.UI;
using Mediapipe.Unity.CoordinateSystem;
namespace Mediapipe.Unity.Tutorial
{
public class Wesign_extractor : MonoBehaviour
{
/// <summary>
/// Config file to set up the graph
/// </summary>
[SerializeField] private TextAsset _configAsset;
/// <summary>
/// The screen object on which the video is displayed
/// </summary>
[SerializeField] private RawImage _screen;
/// <summary>
/// width of the screen
/// </summary>
[SerializeField] private int _width;
/// <summary>
/// height of the screen
/// </summary>
[SerializeField] private int _height;
/// <summary>
/// fps of the screen
/// </summary>
[SerializeField] private int _fps;
/// <summary>
/// Landmark annotation controller to show the landmarks on the screen
/// </summary>
[SerializeField] private PoseLandmarkListAnnotationController _poseLandmarkListAnnotationController;
/// <summary>
/// MediaPipe graph
/// </summary>
private CalculatorGraph _graph;
/// <summary>
/// Resource manager for graph resources
/// </summary>
private ResourceManager _resourceManager;
/// <summary>
/// Webcam texture
/// </summary>
private WebCamTexture _webCamTexture;
/// <summary>
/// Input texture
/// </summary>
private Texture2D _inputTexture;
/// <summary>
/// Screen pixel data
/// </summary>
private Color32[] _pixelData;
/// <summary>
/// Stopwatch to give a timestamp to video frames
/// </summary>
private Stopwatch _stopwatch;
/// <summary>
/// Google Mediapipe setup & run
/// </summary>
/// <returns> IEnumerator </returns>
/// <exception cref="System.Exception"></exception>
private IEnumerator Start()
{
// Webcam setup
if (WebCamTexture.devices.Length == 0)
{
throw new System.Exception("Web Camera devices are not found");
}
var webCamDevice = WebCamTexture.devices[0];
_webCamTexture = new WebCamTexture(webCamDevice.name, _width, _height, _fps);
_webCamTexture.Play();
yield return new WaitUntil(() => _webCamTexture.width > 16);
_screen.rectTransform.sizeDelta = new Vector2(_width, _height);
_screen.texture = _webCamTexture;
// TODO this method is kinda meh you should use ImageFrame
_inputTexture = new Texture2D(_width, _height, TextureFormat.RGBA32, false);
_pixelData = new Color32[_width * _height];
//_resourceManager = new LocalResourceManager();
_resourceManager = new StreamingAssetsResourceManager();
yield return _resourceManager.PrepareAssetAsync("pose_detection.bytes");
yield return _resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
yield return _resourceManager.PrepareAssetAsync("face_landmark.bytes");
yield return _resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
yield return _resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
yield return _resourceManager.PrepareAssetAsync("hand_recrop.bytes");
yield return _resourceManager.PrepareAssetAsync("handedness.txt");
_stopwatch = new Stopwatch();
// Setting up the graph
_graph = new CalculatorGraph(_configAsset.text);
var posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "pose_landmarks");
var leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "left_hand_landmarks");
var rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(_graph, "right_hand_landmarks");
posestream.StartPolling().AssertOk();
leftstream.StartPolling().AssertOk();
rightstream.StartPolling().AssertOk();
_graph.StartRun().AssertOk();
_stopwatch.Start();
while (true)
{
_inputTexture.SetPixels32(_webCamTexture.GetPixels32(_pixelData));
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, _width, _height, _width * 4, _inputTexture.GetRawTextureData<byte>());
var currentTimestamp = _stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
_graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
yield return new WaitForEndOfFrame();
//posestream.TryGetNext(out var poseLandmarks);
if (posestream.TryGetNext(out var poseLandmarks))
{
if (poseLandmarks != null)
{
// Draw the poseLandmarks on the screen
_poseLandmarkListAnnotationController.DrawNow(poseLandmarks);
var x = poseLandmarks.Landmark[0];
UnityEngine.Debug.Log($"Pose Coordinates: {x}");
}
}
if (leftstream.TryGetNext(out var leftLandmarks))
{
if (leftLandmarks != null)
{
var x = leftLandmarks.Landmark[0];
UnityEngine.Debug.Log($"Pose left Coordinates: {x}");
}
}
if (rightstream.TryGetNext(out var rightLandmarks))
{
if (rightLandmarks != null)
{
var x = rightLandmarks.Landmark[0];
UnityEngine.Debug.Log($"Pose right Coordinates: {x}");
}
}
}
}
/// <summary>
/// Propper destruction on the Mediapipegraph
/// </summary>
private void OnDestroy()
{
if (_webCamTexture != null)
{
_webCamTexture.Stop();
}
if (_graph != null)
{
try
{
_graph.CloseInputStream("input_video").AssertOk();
_graph.WaitUntilDone().AssertOk();
}
finally
{
_graph.Dispose();
}
}
}
}
}