Resolve WES-100 "Natml integration"
This commit is contained in:
@@ -1,11 +1,5 @@
|
||||
using DigitalRuby.Tween;
|
||||
using Mediapipe.Unity.Tutorial;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using TMPro;
|
||||
using UnityEngine;
|
||||
using UnityEngine.Events;
|
||||
using UnityEngine.UI;
|
||||
|
||||
/// <summary>
|
||||
/// Class to display feedback during a course
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
"rootNamespace": "",
|
||||
"references": [
|
||||
"GUID:6055be8ebefd69e48b49212b09b47b2f",
|
||||
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
|
||||
"GUID:04c4d86a70aa56c55a78c61f1ab1a56d",
|
||||
"GUID:edc93f477bb73a743a97d6882ed330b3",
|
||||
"GUID:58e104b97fb3752438ada2902a36dcbf",
|
||||
"GUID:7f2d0ee6dd21e1d4eb25b71b7a749d25",
|
||||
"GUID:f55a02e98b01bc849b30d9650ccd8f15"
|
||||
"GUID:f55a02e98b01bc849b30d9650ccd8f15",
|
||||
"GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
|
||||
],
|
||||
"includePlatforms": [],
|
||||
"excludePlatforms": [],
|
||||
|
||||
@@ -1,334 +1,362 @@
|
||||
// Copyright (c) 2021 homuler
|
||||
//
|
||||
// Use of this source code is governed by an MIT-style
|
||||
// license that can be found in the LICENSE file or at
|
||||
// https://opensource.org/licenses/MIT.
|
||||
|
||||
// ATTENTION!: This code is for a tutorial.
|
||||
|
||||
using Mediapipe;
|
||||
using Mediapipe.Unity;
|
||||
using NatML;
|
||||
using NatML.Features;
|
||||
using NatML.Internal;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using Unity.Barracuda;
|
||||
using System.Threading.Tasks;
|
||||
using UnityEngine;
|
||||
using UnityEngine.UI;
|
||||
|
||||
namespace Mediapipe.Unity.Tutorial
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
public class SignPredictor : MonoBehaviour
|
||||
{
|
||||
public class SignPredictor : MonoBehaviour
|
||||
/// <summary>
|
||||
/// Predictor class which is used to predict the sign using an MLEdgeModel
|
||||
/// </summary>
|
||||
public class NatMLSignPredictor : IMLPredictor<List<float>>
|
||||
{
|
||||
/// <summary>
|
||||
/// ModelList, used to change model using ModelIndex
|
||||
/// The MLEdgeModel used for predictions
|
||||
/// </summary>
|
||||
public ModelList modelList;
|
||||
private readonly MLEdgeModel edgeModel;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model info file
|
||||
/// The type used to create features which are input for the model
|
||||
/// </summary>
|
||||
public TextAsset modelInfoFile;
|
||||
private MLFeatureType featureType;
|
||||
|
||||
/// <summary>
|
||||
/// Config file to set up the graph
|
||||
/// Creation of a NatMLSignPredictor instance
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private TextAsset configAsset;
|
||||
|
||||
/// <summary>
|
||||
/// Index to indicate which camera is being used
|
||||
/// </summary>
|
||||
private int camdex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen;
|
||||
|
||||
/// <summary>
|
||||
/// A secondary optional screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen2;
|
||||
|
||||
/// <summary>
|
||||
/// MediaPipe graph
|
||||
/// </summary>
|
||||
private CalculatorGraph graph;
|
||||
|
||||
/// <summary>
|
||||
/// Resource manager for graph resources
|
||||
/// </summary>
|
||||
private ResourceManager resourceManager;
|
||||
|
||||
/// <summary>
|
||||
/// Webcam texture
|
||||
/// </summary>
|
||||
private WebCamTexture webcamTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Input texture
|
||||
/// </summary>
|
||||
private Texture2D inputTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Screen pixel data
|
||||
/// </summary>
|
||||
private Color32[] pixelData;
|
||||
|
||||
/// <summary>
|
||||
/// Stopwatch to give a timestamp to video frames
|
||||
/// </summary>
|
||||
private Stopwatch stopwatch;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the pose landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the left hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the right hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
|
||||
|
||||
/// <summary>
|
||||
/// create precense stream
|
||||
/// </summary>
|
||||
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
|
||||
|
||||
/// <summary>
|
||||
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
|
||||
/// </summary>
|
||||
private KeypointManager keypointManager;
|
||||
|
||||
/// <summary>
|
||||
/// The worker on which we schedule the signpredictor model execution
|
||||
/// </summary>
|
||||
private IWorker worker;
|
||||
|
||||
/// <summary>
|
||||
/// Width of th webcam
|
||||
/// </summary>
|
||||
private int width;
|
||||
|
||||
/// <summary>
|
||||
/// Height of the webcam
|
||||
/// </summary>
|
||||
private int height;
|
||||
|
||||
/// <summary>
|
||||
/// The enumerator of the worker which executes the sign predictor model
|
||||
/// </summary>
|
||||
private IEnumerator enumerator;
|
||||
|
||||
/// <summary>
|
||||
/// The prediction of the sign predictor model
|
||||
/// </summary>
|
||||
public Dictionary<string, float> learnableProbabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Bool indicating whether or not the resource manager has already been initialized
|
||||
/// </summary>
|
||||
private static bool resourceManagerIsInitialized = false;
|
||||
|
||||
/// <summary>
|
||||
/// an inputTensor for the sign predictor
|
||||
/// </summary>
|
||||
private Tensor inputTensor;
|
||||
|
||||
public List<Listener> listeners = new List<Listener>();
|
||||
|
||||
/// <summary>
|
||||
/// Google Mediapipe setup & run
|
||||
/// </summary>
|
||||
/// <returns>IEnumerator</returns>
|
||||
/// <exception cref="System.Exception"></exception>
|
||||
private IEnumerator Start()
|
||||
/// <param name="edgeModel"></param>
|
||||
public NatMLSignPredictor(MLEdgeModel edgeModel)
|
||||
{
|
||||
// Webcam setup
|
||||
if (WebCamTexture.devices.Length == 0)
|
||||
{
|
||||
throw new System.Exception("Web Camera devices are not found");
|
||||
}
|
||||
// Start the webcam
|
||||
WebCamDevice webCamDevice = WebCamTexture.devices[0];
|
||||
webcamTexture = new WebCamTexture(webCamDevice.name);
|
||||
|
||||
webcamTexture.Play();
|
||||
|
||||
|
||||
yield return new WaitUntil(() => webcamTexture.width > 16);
|
||||
|
||||
// Set webcam aspect ratio
|
||||
width = webcamTexture.width;
|
||||
height = webcamTexture.height;
|
||||
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
|
||||
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
|
||||
screen.texture = webcamTexture;
|
||||
if (screen2 != null)
|
||||
{
|
||||
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
|
||||
}
|
||||
|
||||
if (modelList.GetCurrentModel() != null)
|
||||
{
|
||||
// TODO this method is kinda meh you should use
|
||||
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
|
||||
pixelData = new Color32[width * height];
|
||||
|
||||
if (!resourceManagerIsInitialized)
|
||||
{
|
||||
resourceManager = new StreamingAssetsResourceManager();
|
||||
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("handedness.txt");
|
||||
resourceManagerIsInitialized = true;
|
||||
}
|
||||
|
||||
stopwatch = new Stopwatch();
|
||||
|
||||
// Setting up the graph
|
||||
graph = new CalculatorGraph(configAsset.text);
|
||||
|
||||
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
|
||||
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
|
||||
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
|
||||
|
||||
posestream.StartPolling().AssertOk();
|
||||
leftstream.StartPolling().AssertOk();
|
||||
rightstream.StartPolling().AssertOk();
|
||||
|
||||
graph.StartRun().AssertOk();
|
||||
stopwatch.Start();
|
||||
|
||||
|
||||
keypointManager = new KeypointManager(modelInfoFile);
|
||||
// check if model exists at path
|
||||
//var model = ModelLoader.Load(Resources.Load<NNModel>("Models/Fingerspelling/model_A-L"));
|
||||
worker = modelList.GetCurrentModel().CreateWorker();
|
||||
|
||||
StartCoroutine(SignRecognitionCoroutine());
|
||||
StartCoroutine(MediapipeCoroutine());
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Called at the start of course/Minigame, will set the model before the start of SIgnPredictor is called.
|
||||
/// </summary>
|
||||
/// <param name="index">The index of the model to be used</param>
|
||||
public void SetModel(ModelIndex index)
|
||||
{
|
||||
this.modelList.SetCurrentModel(index);
|
||||
this.edgeModel = edgeModel;
|
||||
featureType = edgeModel.inputs[0];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which executes the mediapipe pipeline
|
||||
/// Predicts the sign using the MLEdgeModel
|
||||
/// </summary>
|
||||
/// <param name="inputs"></param>
|
||||
/// <returns></returns>
|
||||
private IEnumerator MediapipeCoroutine()
|
||||
public List<float> Predict(params MLFeature[] inputs)
|
||||
{
|
||||
while (true)
|
||||
List<float> predictions = null;
|
||||
IMLEdgeFeature iedgeFeature = (IMLEdgeFeature)inputs[0];
|
||||
MLEdgeFeature edgeFeature = iedgeFeature.Create(featureType);
|
||||
MLFeatureCollection<MLEdgeFeature> result = edgeModel.Predict(edgeFeature);
|
||||
if (0 < result.Count)
|
||||
{
|
||||
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
|
||||
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
|
||||
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
|
||||
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
|
||||
//Debug.Log(Time.timeAsDouble + " Added new packet to mediapipe graph");
|
||||
yield return new WaitForEndOfFrame();
|
||||
|
||||
NormalizedLandmarkList _poseLandmarks = null;
|
||||
NormalizedLandmarkList _leftHandLandmarks = null;
|
||||
NormalizedLandmarkList _rightHandLandmarks = null;
|
||||
|
||||
//Debug.Log("Extracting keypoints");
|
||||
|
||||
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks, false); return true; });
|
||||
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks, false); return true; });
|
||||
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks, false); return true; });
|
||||
//Debug.Log(Time.timeAsDouble + " Retrieved landmarks ");
|
||||
|
||||
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
|
||||
predictions = new MLArrayFeature<float>(result[0]).Flatten().ToArray().ToList();
|
||||
predictions = predictions.ConvertAll((c) => Mathf.Exp(c));
|
||||
float sum = predictions.Sum();
|
||||
predictions = predictions.ConvertAll((c) => c / sum);
|
||||
}
|
||||
edgeFeature.Dispose();
|
||||
result.Dispose();
|
||||
return predictions;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which calls the sign predictor model
|
||||
/// Disposing the MLEdgeModel
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator SignRecognitionCoroutine()
|
||||
public void Dispose()
|
||||
{
|
||||
while (true)
|
||||
edgeModel.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public List<Listener> listeners = new List<Listener>();
|
||||
|
||||
/// <summary>
|
||||
/// Predictor which is used to create the asyncPredictor (should not be used if asyncPredictor exists)
|
||||
/// </summary>
|
||||
private NatMLSignPredictor predictor;
|
||||
|
||||
/// <summary>
|
||||
/// The asynchronous predictor which is used to predict the sign using an MLEdgemodel
|
||||
/// </summary>
|
||||
private MLAsyncPredictor<List<float>> asyncPredictor;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model used in the SignPredictor
|
||||
/// </summary>
|
||||
private MLEdgeModel model;
|
||||
|
||||
/// <summary>
|
||||
/// Modellist used to change model using ModelIndex
|
||||
/// </summary>
|
||||
public ModelList modelList;
|
||||
|
||||
/// <summary>
|
||||
/// Chosen model data based on the operating system
|
||||
/// </summary>
|
||||
private MLModelData modelData;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model info file
|
||||
/// </summary>
|
||||
public TextAsset modelInfoFile;
|
||||
|
||||
/// <summary>
|
||||
/// Config file to set up the graph
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private TextAsset configAsset;
|
||||
|
||||
/// <summary>
|
||||
/// Index to indicate which camera is being used
|
||||
/// </summary>
|
||||
private int camdex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen;
|
||||
|
||||
/// <summary>
|
||||
/// A secondary optional screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen2;
|
||||
|
||||
/// <summary>
|
||||
/// MediaPipe graph
|
||||
/// </summary>
|
||||
private CalculatorGraph graph;
|
||||
|
||||
/// <summary>
|
||||
/// Resource manager for graph resources
|
||||
/// </summary>
|
||||
private ResourceManager resourceManager;
|
||||
|
||||
/// <summary>
|
||||
/// Webcam texture
|
||||
/// </summary>
|
||||
private WebCamTexture webcamTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Input texture
|
||||
/// </summary>
|
||||
private Texture2D inputTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Screen pixel data
|
||||
/// </summary>
|
||||
private Color32[] pixelData;
|
||||
|
||||
/// <summary>
|
||||
/// Stopwatch to give a timestamp to video frames
|
||||
/// </summary>
|
||||
private Stopwatch stopwatch;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the pose landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the left hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the right hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
|
||||
|
||||
/// <summary>
|
||||
/// create precense stream
|
||||
/// </summary>
|
||||
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
|
||||
|
||||
/// <summary>
|
||||
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
|
||||
/// </summary>
|
||||
private KeypointManager keypointManager;
|
||||
|
||||
/// <summary>
|
||||
/// Width of th webcam
|
||||
/// </summary>
|
||||
private int width;
|
||||
|
||||
/// <summary>
|
||||
/// Height of the webcam
|
||||
/// </summary>
|
||||
private int height;
|
||||
|
||||
/// <summary>
|
||||
/// The prediction of the sign predictor model
|
||||
/// </summary>
|
||||
public Dictionary<string, float> learnableProbabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Bool indicating whether or not the resource manager has already been initialized
|
||||
/// </summary>
|
||||
private static bool resourceManagerIsInitialized = false;
|
||||
|
||||
/// <summary>
|
||||
/// Google Mediapipe setup & run
|
||||
/// </summary>
|
||||
/// <returns>IEnumerator</returns>
|
||||
/// <exception cref="System.Exception"></exception>
|
||||
private IEnumerator Start()
|
||||
{
|
||||
// Webcam setup
|
||||
if (WebCamTexture.devices.Length == 0)
|
||||
{
|
||||
throw new System.Exception("Web Camera devices are not found");
|
||||
}
|
||||
// Start the webcam
|
||||
WebCamDevice webCamDevice = WebCamTexture.devices[0];
|
||||
webcamTexture = new WebCamTexture(webCamDevice.name);
|
||||
|
||||
webcamTexture.Play();
|
||||
|
||||
yield return new WaitUntil(() => webcamTexture.width > 16);
|
||||
|
||||
// Set webcam aspect ratio
|
||||
width = webcamTexture.width;
|
||||
height = webcamTexture.height;
|
||||
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
|
||||
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
|
||||
screen.texture = webcamTexture;
|
||||
if (screen2 != null)
|
||||
{
|
||||
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
|
||||
}
|
||||
|
||||
// TODO this method is kinda meh you should use
|
||||
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
|
||||
pixelData = new Color32[width * height];
|
||||
|
||||
if (!resourceManagerIsInitialized)
|
||||
{
|
||||
resourceManager = new StreamingAssetsResourceManager();
|
||||
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("handedness.txt");
|
||||
resourceManagerIsInitialized = true;
|
||||
}
|
||||
|
||||
stopwatch = new Stopwatch();
|
||||
|
||||
// Setting up the graph
|
||||
graph = new CalculatorGraph(configAsset.text);
|
||||
|
||||
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
|
||||
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
|
||||
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
|
||||
|
||||
posestream.StartPolling().AssertOk();
|
||||
leftstream.StartPolling().AssertOk();
|
||||
rightstream.StartPolling().AssertOk();
|
||||
|
||||
graph.StartRun().AssertOk();
|
||||
stopwatch.Start();
|
||||
|
||||
// Creating a KeypointManager
|
||||
keypointManager = new KeypointManager(modelInfoFile);
|
||||
|
||||
// Check if a model is ready to load
|
||||
yield return new WaitUntil(() => modelList.HasValidModel());
|
||||
|
||||
// Create Model
|
||||
Task<MLEdgeModel> t = Task.Run(() => MLEdgeModel.Create(modelList.GetCurrentModel()));
|
||||
yield return new WaitUntil(() => t.IsCompleted);
|
||||
model = t.Result;
|
||||
predictor = new NatMLSignPredictor(model);
|
||||
asyncPredictor = predictor.ToAsync();
|
||||
|
||||
// Start the Coroutine
|
||||
StartCoroutine(SignRecognitionCoroutine());
|
||||
StartCoroutine(MediapipeCoroutine());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which executes the mediapipe pipeline
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator MediapipeCoroutine()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
|
||||
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
|
||||
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
|
||||
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
|
||||
yield return new WaitForEndOfFrame();
|
||||
|
||||
NormalizedLandmarkList _poseLandmarks = null;
|
||||
NormalizedLandmarkList _leftHandLandmarks = null;
|
||||
NormalizedLandmarkList _rightHandLandmarks = null;
|
||||
|
||||
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks); return true; });
|
||||
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks); return true; });
|
||||
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks); return true; });
|
||||
|
||||
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which calls the sign predictor model
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator SignRecognitionCoroutine()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
List<List<float>> inputData = keypointManager.GetKeypoints();
|
||||
if (inputData != null && asyncPredictor.readyForPrediction)
|
||||
{
|
||||
List<List<float>> input = keypointManager.GetKeypoints();
|
||||
if (input != null)
|
||||
// Getting the size of the input data
|
||||
int framecount = inputData.Count;
|
||||
int keypointsPerFrame = inputData[0].Count;
|
||||
|
||||
// Creating ArrayFeature
|
||||
int[] shape = { framecount, keypointsPerFrame };
|
||||
float[] input = new float[framecount * keypointsPerFrame];
|
||||
int i = 0;
|
||||
inputData.ForEach((e) => e.ForEach((f) => input[i++] = f));
|
||||
MLArrayFeature<float> feature = new MLArrayFeature<float>(input, shape);
|
||||
|
||||
// Predicting
|
||||
Task<List<float>> task = Task.Run(async () => await asyncPredictor.Predict(feature));
|
||||
yield return new WaitUntil(() => task.IsCompleted);
|
||||
List<float> result = task.Result;
|
||||
if (0 < result.Count)
|
||||
{
|
||||
|
||||
//UnityEngine.Debug.Log("input: " + input.Count);
|
||||
|
||||
int frameCount = input.Count;
|
||||
int keypoints_per_frame = input[0].Count;
|
||||
|
||||
// Create a tensor with the input
|
||||
inputTensor = new Tensor(frameCount, keypoints_per_frame);
|
||||
|
||||
// Fill the tensor with the input
|
||||
for (int i = 0; i < frameCount; i++)
|
||||
{
|
||||
for (int j = 0; j < keypoints_per_frame; j++)
|
||||
{
|
||||
inputTensor[i, j] = input[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
int stepsPerFrame = 190;
|
||||
enumerator = worker.StartManualSchedule(inputTensor);
|
||||
int step = 0;
|
||||
while (enumerator.MoveNext())
|
||||
{
|
||||
if (++step % stepsPerFrame == 0)
|
||||
{
|
||||
//Debug.Log(Time.timeAsDouble + " : " + step);
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
|
||||
var output = worker.PeekOutput();
|
||||
|
||||
inputTensor.Dispose();
|
||||
|
||||
// Get the output as an array
|
||||
float[] outputArray = output.ToReadOnlyArray();
|
||||
//Debug.Log($"out = [{outputArray.Aggregate(" ", (t, f) => $"{t}{f} ")}]");
|
||||
|
||||
// Calculate the softmax of the output
|
||||
float max = outputArray.Max();
|
||||
float[] softmaxedOutput = outputArray.Select(x => Mathf.Exp(x - max)).ToArray();
|
||||
float sum = softmaxedOutput.Sum();
|
||||
float[] softmaxedOutput2 = softmaxedOutput.Select(x => x / sum).ToArray();
|
||||
|
||||
// Get the index of the highest probability
|
||||
int maxIndex = softmaxedOutput2.ToList().IndexOf(softmaxedOutput2.Max());
|
||||
|
||||
// Get the letter from the index
|
||||
char letter = (char)(maxIndex + 65);
|
||||
float accuracy = (Mathf.RoundToInt(softmaxedOutput2[maxIndex] * 100));
|
||||
|
||||
// Set the letterProbabilities, currently used by Courses
|
||||
learnableProbabilities = new Dictionary<string, float>();
|
||||
for (int i = 0; i < softmaxedOutput2.Length; i++)
|
||||
|
||||
// Temporary fix
|
||||
List<string> signs = new List<string>()
|
||||
{
|
||||
learnableProbabilities.Add(((char)(i + 65)).ToString(), softmaxedOutput2[i]);
|
||||
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
|
||||
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
|
||||
};
|
||||
|
||||
|
||||
|
||||
for (int j = 0; j < result.Count; j++)
|
||||
{
|
||||
learnableProbabilities.Add(signs[j].ToUpper(), result[j]);
|
||||
}
|
||||
//Debug.Log($"prob = [{learnableProbabilities.Aggregate(" ", (t, kv) => $"{t}{kv.Key}:{kv.Value} ")}]");
|
||||
foreach(Listener listener in listeners)
|
||||
foreach (Listener listener in listeners)
|
||||
{
|
||||
yield return listener.ProcessIncomingCall();
|
||||
}
|
||||
@@ -339,77 +367,85 @@ namespace Mediapipe.Unity.Tutorial
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Propper destruction on the Mediapipegraph
|
||||
/// </summary>
|
||||
private void OnDestroy()
|
||||
{
|
||||
if (webcamTexture != null)
|
||||
{
|
||||
webcamTexture.Stop();
|
||||
}
|
||||
|
||||
if (graph != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
graph.CloseInputStream("input_video").AssertOk();
|
||||
graph.WaitUntilDone().AssertOk();
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
||||
graph.Dispose();
|
||||
}
|
||||
}
|
||||
// inputTensor must still be disposed, if it exists
|
||||
inputTensor?.Dispose();
|
||||
worker?.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
|
||||
/// </summary>
|
||||
public void SwapCam()
|
||||
{
|
||||
if (WebCamTexture.devices.Length > 0)
|
||||
{
|
||||
// Stop the old camera
|
||||
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
|
||||
if (webcamTexture.isPlaying)
|
||||
{
|
||||
screen.texture = null;
|
||||
webcamTexture.Stop();
|
||||
webcamTexture = null;
|
||||
}
|
||||
// Find the new camera
|
||||
camdex += 1;
|
||||
camdex %= WebCamTexture.devices.Length;
|
||||
// Start the new camera
|
||||
WebCamDevice device = WebCamTexture.devices[camdex];
|
||||
webcamTexture = new WebCamTexture(device.name);
|
||||
screen.texture = webcamTexture;
|
||||
|
||||
webcamTexture.Play();
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Swaps the display screens
|
||||
/// </summary>
|
||||
public void SwapScreen()
|
||||
{
|
||||
if(screen2.texture == null && screen.texture != null)
|
||||
{
|
||||
screen2.texture = webcamTexture;
|
||||
screen.texture = null;
|
||||
}
|
||||
else if (screen2.texture != null && screen.texture == null)
|
||||
{
|
||||
screen.texture = webcamTexture;
|
||||
screen2.texture = null;
|
||||
}
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Propper destruction on the Mediapipegraph
|
||||
/// </summary>
|
||||
private void OnDestroy()
|
||||
{
|
||||
if (webcamTexture != null)
|
||||
{
|
||||
webcamTexture.Stop();
|
||||
}
|
||||
|
||||
if (graph != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
graph.CloseInputStream("input_video").AssertOk();
|
||||
graph.WaitUntilDone().AssertOk();
|
||||
}
|
||||
finally
|
||||
{
|
||||
graph.Dispose();
|
||||
}
|
||||
}
|
||||
if (asyncPredictor != null)
|
||||
{
|
||||
asyncPredictor.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
|
||||
/// </summary>
|
||||
public void SwapCam()
|
||||
{
|
||||
if (WebCamTexture.devices.Length > 0)
|
||||
{
|
||||
// Stop the old camera
|
||||
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
|
||||
if (webcamTexture.isPlaying)
|
||||
{
|
||||
screen.texture = null;
|
||||
webcamTexture.Stop();
|
||||
webcamTexture = null;
|
||||
}
|
||||
// Find the new camera
|
||||
camdex += 1;
|
||||
camdex %= WebCamTexture.devices.Length;
|
||||
// Start the new camera
|
||||
WebCamDevice device = WebCamTexture.devices[camdex];
|
||||
webcamTexture = new WebCamTexture(device.name);
|
||||
screen.texture = webcamTexture;
|
||||
|
||||
webcamTexture.Play();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Swaps the display screens
|
||||
/// </summary>
|
||||
public void SwapScreen()
|
||||
{
|
||||
if (screen2.texture == null && screen.texture != null)
|
||||
{
|
||||
screen2.texture = webcamTexture;
|
||||
screen.texture = null;
|
||||
}
|
||||
else if (screen2.texture != null && screen.texture == null)
|
||||
{
|
||||
screen.texture = webcamTexture;
|
||||
screen2.texture = null;
|
||||
}
|
||||
}
|
||||
public void ChangeModel(ModelIndex index)
|
||||
{
|
||||
this.modelList.SetCurrentModel(index);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user