Resolve WES-100 "Natml integration"
This commit is contained in:
@@ -2,7 +2,8 @@
|
||||
"name": "InterfacesScripts",
|
||||
"rootNamespace": "",
|
||||
"references": [
|
||||
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a"
|
||||
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
|
||||
"GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
|
||||
],
|
||||
"includePlatforms": [],
|
||||
"excludePlatforms": [],
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 7f2d0ee6dd21e1d4eb25b71b7a749d25
|
||||
folderAsset: yes
|
||||
DefaultImporter:
|
||||
AssemblyDefinitionImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
|
||||
@@ -6,6 +6,6 @@ using UnityEngine;
|
||||
/// </summary>
|
||||
public enum ModelIndex
|
||||
{
|
||||
FINGERSPELLING,
|
||||
NONE
|
||||
NONE,
|
||||
FINGERSPELLING
|
||||
}
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
using NatML;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using UnityEngine;
|
||||
using Unity.Barracuda;
|
||||
/// <summary>
|
||||
/// This scriptable will hold tupples of Courseindices and models
|
||||
/// </summary>
|
||||
@@ -22,28 +21,49 @@ public class ModelList : ScriptableObject
|
||||
/// <summary>
|
||||
/// The model itself
|
||||
/// </summary>
|
||||
public NNModel model;
|
||||
public MLModelData modelWINDOWS;
|
||||
/// <summary>
|
||||
/// The model itself
|
||||
/// </summary>
|
||||
public MLModelData modelMAC;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Index of the currently active model
|
||||
/// </summary>
|
||||
public int currentModelIndex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// A list of all the models
|
||||
/// </summary>
|
||||
public List<ModelTuple> models = new List<ModelTuple>();
|
||||
|
||||
/// <summary>
|
||||
/// Index of the currently active model
|
||||
/// </summary>
|
||||
public int currentModelIndex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Get a model by modelindex
|
||||
/// </summary>
|
||||
/// <param name="modelIndex">ModelIndex of the model</param>
|
||||
/// <returns>Model associated with this index, null if no model was found</returns>
|
||||
public NNModel GetCurrentModel()
|
||||
public MLModelData GetCurrentModel()
|
||||
{
|
||||
return models.Find(x => x.model == models[currentModelIndex].model)?.model;
|
||||
|
||||
// Select Model based on OS
|
||||
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
|
||||
return models.Find(x => x.modelWINDOWS == models[currentModelIndex].modelWINDOWS)?.modelWINDOWS;
|
||||
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
|
||||
return models.Find(x => x.modelMAC == models[currentModelIndex].modelMAC)?.modelMAC;
|
||||
#endif
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Function to check if the modelIndex has been set
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public bool HasValidModel()
|
||||
{
|
||||
return models[currentModelIndex].index != (int)ModelIndex.NONE;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -27,6 +27,7 @@ public class Theme : ScriptableObject
|
||||
/// </summary>
|
||||
public ModelIndex modelIndex;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// List of all learnable words/letters
|
||||
/// </summary>
|
||||
|
||||
@@ -7,10 +7,4 @@ ScriptedImporter:
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
|
||||
optimizeModel: 1
|
||||
forceArbitraryBatchSize: 1
|
||||
treatErrorsAsWarnings: 0
|
||||
importMode: 1
|
||||
weightsTypeMode: 0
|
||||
activationTypeMode: 0
|
||||
script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}
|
||||
|
||||
BIN
Assets/Common/Models/FingerSpelling/model_A-Z2.onnx
Normal file
BIN
Assets/Common/Models/FingerSpelling/model_A-Z2.onnx
Normal file
Binary file not shown.
10
Assets/Common/Models/FingerSpelling/model_A-Z2.onnx.meta
Normal file
10
Assets/Common/Models/FingerSpelling/model_A-Z2.onnx.meta
Normal file
@@ -0,0 +1,10 @@
|
||||
fileFormatVersion: 2
|
||||
guid: fdbf401e965a6bf4a87637cd519f2715
|
||||
ScriptedImporter:
|
||||
internalIDToNameTable: []
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}
|
||||
@@ -15,7 +15,7 @@ MonoBehaviour:
|
||||
title: Handalfabet
|
||||
description: Van A tot Z
|
||||
index: 0
|
||||
model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
|
||||
modelIndex: 1
|
||||
learnables:
|
||||
- name: A
|
||||
image: {fileID: 21300000, guid: 4eb4ef55f866f114dafb722f4bd05c76, type: 3}
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
"UnityEditor.TestRunner",
|
||||
"CommonScripts",
|
||||
"InterfacesScripts",
|
||||
"Unity.Barracuda",
|
||||
"SignPredictor"
|
||||
"SignPredictor",
|
||||
"NatML.ML"
|
||||
],
|
||||
"includePlatforms": [
|
||||
"Editor"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
using NatML;
|
||||
using NUnit.Framework;
|
||||
using Unity.Barracuda;
|
||||
using UnityEngine;
|
||||
/// <summary>
|
||||
/// Test the ModelList class
|
||||
@@ -45,7 +45,11 @@ public class ModelListTest
|
||||
ModelIndex value = (ModelIndex)random.Next(modelList.models.Count);
|
||||
modelList.SetCurrentModel(value);
|
||||
|
||||
Assert.AreEqual(modelList.models[modelList.currentModelIndex].model, modelList.GetCurrentModel());
|
||||
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
|
||||
Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelWINDOWS, modelList.GetCurrentModel());
|
||||
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
|
||||
Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelMAC, modelList.GetCurrentModel());
|
||||
#endif
|
||||
|
||||
// Check if empty model fails gracefully (returns null)
|
||||
Assert.IsNull(ScriptableObject.CreateInstance<ModelList>().GetCurrentModel());
|
||||
@@ -69,7 +73,11 @@ public class ModelListTest
|
||||
ModelList.ModelTuple m = modelList.models[modelList.currentModelIndex];
|
||||
|
||||
Assert.AreEqual(m.index, value);
|
||||
Assert.IsTrue(m.model is NNModel || m.model is null);
|
||||
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
|
||||
Assert.IsTrue(m.modelWINDOWS is MLModelData || m.modelWINDOWS is null);
|
||||
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
|
||||
Assert.IsTrue(m.modelMAC is MLModelData || m.modelMAC is null);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ModelList emptyList = ScriptableObject.CreateInstance<ModelList>();
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
"Unity.TextMeshPro",
|
||||
"AccountsScripts",
|
||||
"InterfacesScripts",
|
||||
"Tween",
|
||||
"SignPredictor"
|
||||
"SignPredictor",
|
||||
"NatML.ML",
|
||||
"Tween"
|
||||
],
|
||||
"includePlatforms": [],
|
||||
"excludePlatforms": [],
|
||||
|
||||
@@ -152,8 +152,7 @@ public class CoursesController : AbstractFeedback
|
||||
void Start()
|
||||
{
|
||||
StartCourseController();
|
||||
|
||||
signPredictor.SetModel(course.theme.modelIndex);
|
||||
signPredictor.ChangeModel(course.theme.modelIndex);
|
||||
AddSelfAsListener();
|
||||
}
|
||||
/// <summary>
|
||||
|
||||
@@ -38,7 +38,7 @@ RenderSettings:
|
||||
m_ReflectionIntensity: 1
|
||||
m_CustomReflection: {fileID: 0}
|
||||
m_Sun: {fileID: 0}
|
||||
m_IndirectSpecularColor: {r: 0.37311918, g: 0.3807398, b: 0.35872716, a: 1}
|
||||
m_IndirectSpecularColor: {r: 0.37311953, g: 0.38074014, b: 0.3587274, a: 1}
|
||||
m_UseRadianceAmbientProbe: 0
|
||||
--- !u!157 &3
|
||||
LightmapSettings:
|
||||
@@ -6416,472 +6416,3 @@ CanvasRenderer:
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 2039368310}
|
||||
m_CullTransparentMesh: 1
|
||||
--- !u!114 &5233312447201393291
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447201393293}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: 67db9e8f0e2ae9c40bc1e2b64352a6b4, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
m_Navigation:
|
||||
m_Mode: 3
|
||||
m_WrapAround: 0
|
||||
m_SelectOnUp: {fileID: 0}
|
||||
m_SelectOnDown: {fileID: 0}
|
||||
m_SelectOnLeft: {fileID: 0}
|
||||
m_SelectOnRight: {fileID: 0}
|
||||
m_Transition: 1
|
||||
m_Colors:
|
||||
m_NormalColor: {r: 1, g: 1, b: 1, a: 1}
|
||||
m_HighlightedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
|
||||
m_PressedColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 1}
|
||||
m_SelectedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
|
||||
m_DisabledColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 0.5019608}
|
||||
m_ColorMultiplier: 1
|
||||
m_FadeDuration: 0.1
|
||||
m_SpriteState:
|
||||
m_HighlightedSprite: {fileID: 0}
|
||||
m_PressedSprite: {fileID: 0}
|
||||
m_SelectedSprite: {fileID: 0}
|
||||
m_DisabledSprite: {fileID: 0}
|
||||
m_AnimationTriggers:
|
||||
m_NormalTrigger: Normal
|
||||
m_HighlightedTrigger: Highlighted
|
||||
m_PressedTrigger: Pressed
|
||||
m_SelectedTrigger: Selected
|
||||
m_DisabledTrigger: Disabled
|
||||
m_Interactable: 1
|
||||
m_TargetGraphic: {fileID: 0}
|
||||
m_FillRect: {fileID: 5233312447919013132}
|
||||
m_HandleRect: {fileID: 0}
|
||||
m_Direction: 0
|
||||
m_MinValue: 0
|
||||
m_MaxValue: 1
|
||||
m_WholeNumbers: 0
|
||||
m_Value: 0
|
||||
m_OnValueChanged:
|
||||
m_PersistentCalls:
|
||||
m_Calls: []
|
||||
--- !u!224 &5233312447201393292
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447201393293}
|
||||
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children:
|
||||
- {fileID: 5233312448534255807}
|
||||
- {fileID: 5233312448785575104}
|
||||
m_Father: {fileID: 5233312447513285389}
|
||||
m_RootOrder: 1
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0, y: 0}
|
||||
m_AnchorMax: {x: 1, y: 0}
|
||||
m_AnchoredPosition: {x: 0, y: 0}
|
||||
m_SizeDelta: {x: 0, y: 50}
|
||||
m_Pivot: {x: 0.5, y: 0}
|
||||
--- !u!1 &5233312447201393293
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312447201393292}
|
||||
- component: {fileID: 5233312447201393291}
|
||||
m_Layer: 5
|
||||
m_Name: Progress
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!114 &5233312447513285388
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447513285390}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: 44e682a32ee15cc489bf50f3a06f717b, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
feedbackText: {fileID: 0}
|
||||
feedbackProgress: {fileID: 0}
|
||||
feedbackProgressImage: {fileID: 0}
|
||||
signPredictor: {fileID: 1991376311}
|
||||
--- !u!224 &5233312447513285389
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447513285390}
|
||||
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children:
|
||||
- {fileID: 5233312448025626847}
|
||||
- {fileID: 5233312447201393292}
|
||||
m_Father: {fileID: 0}
|
||||
m_RootOrder: 5
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0.5, y: 0}
|
||||
m_AnchorMax: {x: 0.5, y: 0}
|
||||
m_AnchoredPosition: {x: 960, y: 200}
|
||||
m_SizeDelta: {x: 500, y: 150}
|
||||
m_Pivot: {x: 0.5, y: 0}
|
||||
--- !u!1 &5233312447513285390
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312447513285389}
|
||||
- component: {fileID: 5233312447513285388}
|
||||
m_Layer: 5
|
||||
m_Name: Feedback
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!224 &5233312447919013132
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447919013135}
|
||||
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children: []
|
||||
m_Father: {fileID: 5233312448785575104}
|
||||
m_RootOrder: 0
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0, y: 0}
|
||||
m_AnchorMax: {x: 0, y: 0}
|
||||
m_AnchoredPosition: {x: 0, y: 0}
|
||||
m_SizeDelta: {x: 10, y: 0}
|
||||
m_Pivot: {x: 0.5, y: 0.5}
|
||||
--- !u!222 &5233312447919013133
|
||||
CanvasRenderer:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447919013135}
|
||||
m_CullTransparentMesh: 1
|
||||
--- !u!114 &5233312447919013134
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312447919013135}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
m_Material: {fileID: 0}
|
||||
m_Color: {r: 1, g: 0, b: 0, a: 1}
|
||||
m_RaycastTarget: 1
|
||||
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
|
||||
m_Maskable: 1
|
||||
m_OnCullStateChanged:
|
||||
m_PersistentCalls:
|
||||
m_Calls: []
|
||||
m_Sprite: {fileID: 10905, guid: 0000000000000000f000000000000000, type: 0}
|
||||
m_Type: 1
|
||||
m_PreserveAspect: 0
|
||||
m_FillCenter: 1
|
||||
m_FillMethod: 4
|
||||
m_FillAmount: 1
|
||||
m_FillClockwise: 1
|
||||
m_FillOrigin: 0
|
||||
m_UseSpriteMesh: 0
|
||||
m_PixelsPerUnitMultiplier: 1
|
||||
--- !u!1 &5233312447919013135
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312447919013132}
|
||||
- component: {fileID: 5233312447919013133}
|
||||
- component: {fileID: 5233312447919013134}
|
||||
m_Layer: 5
|
||||
m_Name: Fill
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!222 &5233312448025626832
|
||||
CanvasRenderer:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448025626834}
|
||||
m_CullTransparentMesh: 1
|
||||
--- !u!114 &5233312448025626833
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448025626834}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: f4688fdb7df04437aeb418b961361dc5, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
m_Material: {fileID: 0}
|
||||
m_Color: {r: 1, g: 1, b: 1, a: 1}
|
||||
m_RaycastTarget: 1
|
||||
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
|
||||
m_Maskable: 1
|
||||
m_OnCullStateChanged:
|
||||
m_PersistentCalls:
|
||||
m_Calls: []
|
||||
m_text: Detecteren ...
|
||||
m_isRightToLeft: 0
|
||||
m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
|
||||
m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
|
||||
m_fontSharedMaterials: []
|
||||
m_fontMaterial: {fileID: 0}
|
||||
m_fontMaterials: []
|
||||
m_fontColor32:
|
||||
serializedVersion: 2
|
||||
rgba: 4282188031
|
||||
m_fontColor: {r: 0.5803922, g: 0.58431375, b: 0.6, a: 1}
|
||||
m_enableVertexGradient: 0
|
||||
m_colorMode: 3
|
||||
m_fontColorGradient:
|
||||
topLeft: {r: 1, g: 1, b: 1, a: 1}
|
||||
topRight: {r: 1, g: 1, b: 1, a: 1}
|
||||
bottomLeft: {r: 1, g: 1, b: 1, a: 1}
|
||||
bottomRight: {r: 1, g: 1, b: 1, a: 1}
|
||||
m_fontColorGradientPreset: {fileID: 0}
|
||||
m_spriteAsset: {fileID: 0}
|
||||
m_tintAllSprites: 0
|
||||
m_StyleSheet: {fileID: 0}
|
||||
m_TextStyleHashCode: -1183493901
|
||||
m_overrideHtmlColors: 0
|
||||
m_faceColor:
|
||||
serializedVersion: 2
|
||||
rgba: 4294967295
|
||||
m_fontSize: 48
|
||||
m_fontSizeBase: 48
|
||||
m_fontWeight: 400
|
||||
m_enableAutoSizing: 0
|
||||
m_fontSizeMin: 18
|
||||
m_fontSizeMax: 72
|
||||
m_fontStyle: 1
|
||||
m_HorizontalAlignment: 2
|
||||
m_VerticalAlignment: 512
|
||||
m_textAlignment: 65535
|
||||
m_characterSpacing: 0
|
||||
m_wordSpacing: 0
|
||||
m_lineSpacing: 0
|
||||
m_lineSpacingMax: 0
|
||||
m_paragraphSpacing: 0
|
||||
m_charWidthMaxAdj: 0
|
||||
m_enableWordWrapping: 1
|
||||
m_wordWrappingRatios: 0.4
|
||||
m_overflowMode: 0
|
||||
m_linkedTextComponent: {fileID: 0}
|
||||
parentLinkedComponent: {fileID: 0}
|
||||
m_enableKerning: 1
|
||||
m_enableExtraPadding: 0
|
||||
checkPaddingRequired: 0
|
||||
m_isRichText: 1
|
||||
m_parseCtrlCharacters: 1
|
||||
m_isOrthographic: 1
|
||||
m_isCullingEnabled: 0
|
||||
m_horizontalMapping: 0
|
||||
m_verticalMapping: 0
|
||||
m_uvLineOffset: 0
|
||||
m_geometrySortingOrder: 0
|
||||
m_IsTextObjectScaleStatic: 0
|
||||
m_VertexBufferAutoSizeReduction: 0
|
||||
m_useMaxVisibleDescender: 1
|
||||
m_pageToDisplay: 1
|
||||
m_margin: {x: 0, y: 0, z: 0, w: 0}
|
||||
m_isUsingLegacyAnimationComponent: 0
|
||||
m_isVolumetricText: 0
|
||||
m_hasFontAssetChanged: 0
|
||||
m_baseMaterial: {fileID: 0}
|
||||
m_maskOffset: {x: 0, y: 0, z: 0, w: 0}
|
||||
--- !u!1 &5233312448025626834
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312448025626847}
|
||||
- component: {fileID: 5233312448025626832}
|
||||
- component: {fileID: 5233312448025626833}
|
||||
m_Layer: 5
|
||||
m_Name: Text
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!224 &5233312448025626847
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448025626834}
|
||||
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children: []
|
||||
m_Father: {fileID: 5233312447513285389}
|
||||
m_RootOrder: 0
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0.5, y: 1}
|
||||
m_AnchorMax: {x: 0.5, y: 1}
|
||||
m_AnchoredPosition: {x: 0, y: 0}
|
||||
m_SizeDelta: {x: 500, y: 100}
|
||||
m_Pivot: {x: 0.5, y: 1}
|
||||
--- !u!1 &5233312448534255792
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312448534255807}
|
||||
- component: {fileID: 5233312448534255805}
|
||||
- component: {fileID: 5233312448534255806}
|
||||
m_Layer: 5
|
||||
m_Name: Background
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!222 &5233312448534255805
|
||||
CanvasRenderer:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448534255792}
|
||||
m_CullTransparentMesh: 1
|
||||
--- !u!114 &5233312448534255806
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448534255792}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
m_Material: {fileID: 0}
|
||||
m_Color: {r: 1, g: 1, b: 1, a: 1}
|
||||
m_RaycastTarget: 1
|
||||
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
|
||||
m_Maskable: 1
|
||||
m_OnCullStateChanged:
|
||||
m_PersistentCalls:
|
||||
m_Calls: []
|
||||
m_Sprite: {fileID: 10907, guid: 0000000000000000f000000000000000, type: 0}
|
||||
m_Type: 1
|
||||
m_PreserveAspect: 0
|
||||
m_FillCenter: 1
|
||||
m_FillMethod: 4
|
||||
m_FillAmount: 1
|
||||
m_FillClockwise: 1
|
||||
m_FillOrigin: 0
|
||||
m_UseSpriteMesh: 0
|
||||
m_PixelsPerUnitMultiplier: 1
|
||||
--- !u!224 &5233312448534255807
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448534255792}
|
||||
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children: []
|
||||
m_Father: {fileID: 5233312447201393292}
|
||||
m_RootOrder: 0
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0, y: 0}
|
||||
m_AnchorMax: {x: 1, y: 1}
|
||||
m_AnchoredPosition: {x: 0, y: 0}
|
||||
m_SizeDelta: {x: 0, y: 0}
|
||||
m_Pivot: {x: 0.5, y: 0.5}
|
||||
--- !u!224 &5233312448785575104
|
||||
RectTransform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 5233312448785575105}
|
||||
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
|
||||
m_LocalPosition: {x: 0, y: 0, z: 0}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children:
|
||||
- {fileID: 5233312447919013132}
|
||||
m_Father: {fileID: 5233312447201393292}
|
||||
m_RootOrder: 1
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
m_AnchorMin: {x: 0, y: 0}
|
||||
m_AnchorMax: {x: 1, y: 1}
|
||||
m_AnchoredPosition: {x: 0, y: 0}
|
||||
m_SizeDelta: {x: 0, y: 0}
|
||||
m_Pivot: {x: 0.5, y: 0.5}
|
||||
--- !u!1 &5233312448785575105
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 5233312448785575104}
|
||||
m_Layer: 5
|
||||
m_Name: Fill Area
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
|
||||
@@ -244,7 +244,7 @@ public class HangmanController : AbstractFeedback
|
||||
{
|
||||
StartController();
|
||||
|
||||
signPredictor.SetModel(ModelIndex.FINGERSPELLING);
|
||||
signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
|
||||
AddSelfAsListener();
|
||||
}
|
||||
/// <summary>
|
||||
|
||||
@@ -74,12 +74,15 @@ public class KeypointManager
|
||||
}
|
||||
|
||||
|
||||
if (width > height){
|
||||
delta_x = ((float)0.1)*width;
|
||||
delta_y = delta_x + ((width - height)/2);
|
||||
}else{
|
||||
delta_y = ((float)0.1)*height;
|
||||
delta_x = delta_y + ((height - width)/2);
|
||||
if (width > height)
|
||||
{
|
||||
delta_x = ((float)0.1) * width;
|
||||
delta_y = delta_x + ((width - height) / 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
delta_y = ((float)0.1) * height;
|
||||
delta_x = delta_y + ((height - width) / 2);
|
||||
}
|
||||
|
||||
float starting_x = min_x - delta_x;
|
||||
@@ -124,10 +127,10 @@ public class KeypointManager
|
||||
float eye_left_x = pose_x[1];
|
||||
float eye_left_y = pose_y[1];
|
||||
|
||||
float starting_x = shoulder_center_x - (bbox_size/2) * shoulder_distance;
|
||||
float starting_y = eye_left_y - shoulder_distance/2;
|
||||
float starting_x = shoulder_center_x - (bbox_size / 2) * shoulder_distance;
|
||||
float starting_y = eye_left_y - shoulder_distance / 2;
|
||||
|
||||
float ending_x = shoulder_center_x + (bbox_size/2) * shoulder_distance;
|
||||
float ending_x = shoulder_center_x + (bbox_size / 2) * shoulder_distance;
|
||||
float ending_y = starting_y + (bbox_size - ((float)0.5)) * shoulder_distance;
|
||||
|
||||
float bbox_center_x = (starting_x + ending_x) / 2;
|
||||
|
||||
@@ -15,6 +15,8 @@ MonoBehaviour:
|
||||
currentModelIndex: 0
|
||||
models:
|
||||
- index: 0
|
||||
model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
|
||||
modelWINDOWS: {fileID: 0}
|
||||
modelMAC: {fileID: 0}
|
||||
- index: 1
|
||||
model: {fileID: 0}
|
||||
modelWINDOWS: {fileID: 8538825877217656561, guid: fdbf401e965a6bf4a87637cd519f2715, type: 3}
|
||||
modelMAC: {fileID: 0}
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
using DigitalRuby.Tween;
|
||||
using Mediapipe.Unity.Tutorial;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using TMPro;
|
||||
using UnityEngine;
|
||||
using UnityEngine.Events;
|
||||
using UnityEngine.UI;
|
||||
|
||||
/// <summary>
|
||||
/// Class to display feedback during a course
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
"rootNamespace": "",
|
||||
"references": [
|
||||
"GUID:6055be8ebefd69e48b49212b09b47b2f",
|
||||
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
|
||||
"GUID:04c4d86a70aa56c55a78c61f1ab1a56d",
|
||||
"GUID:edc93f477bb73a743a97d6882ed330b3",
|
||||
"GUID:58e104b97fb3752438ada2902a36dcbf",
|
||||
"GUID:7f2d0ee6dd21e1d4eb25b71b7a749d25",
|
||||
"GUID:f55a02e98b01bc849b30d9650ccd8f15"
|
||||
"GUID:f55a02e98b01bc849b30d9650ccd8f15",
|
||||
"GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
|
||||
],
|
||||
"includePlatforms": [],
|
||||
"excludePlatforms": [],
|
||||
|
||||
@@ -1,334 +1,362 @@
|
||||
// Copyright (c) 2021 homuler
|
||||
//
|
||||
// Use of this source code is governed by an MIT-style
|
||||
// license that can be found in the LICENSE file or at
|
||||
// https://opensource.org/licenses/MIT.
|
||||
|
||||
// ATTENTION!: This code is for a tutorial.
|
||||
|
||||
using Mediapipe;
|
||||
using Mediapipe.Unity;
|
||||
using NatML;
|
||||
using NatML.Features;
|
||||
using NatML.Internal;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using Unity.Barracuda;
|
||||
using System.Threading.Tasks;
|
||||
using UnityEngine;
|
||||
using UnityEngine.UI;
|
||||
|
||||
namespace Mediapipe.Unity.Tutorial
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
public class SignPredictor : MonoBehaviour
|
||||
{
|
||||
public class SignPredictor : MonoBehaviour
|
||||
/// <summary>
|
||||
/// Predictor class which is used to predict the sign using an MLEdgeModel
|
||||
/// </summary>
|
||||
public class NatMLSignPredictor : IMLPredictor<List<float>>
|
||||
{
|
||||
/// <summary>
|
||||
/// ModelList, used to change model using ModelIndex
|
||||
/// The MLEdgeModel used for predictions
|
||||
/// </summary>
|
||||
public ModelList modelList;
|
||||
private readonly MLEdgeModel edgeModel;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model info file
|
||||
/// The type used to create features which are input for the model
|
||||
/// </summary>
|
||||
public TextAsset modelInfoFile;
|
||||
private MLFeatureType featureType;
|
||||
|
||||
/// <summary>
|
||||
/// Config file to set up the graph
|
||||
/// Creation of a NatMLSignPredictor instance
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private TextAsset configAsset;
|
||||
|
||||
/// <summary>
|
||||
/// Index to indicate which camera is being used
|
||||
/// </summary>
|
||||
private int camdex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen;
|
||||
|
||||
/// <summary>
|
||||
/// A secondary optional screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen2;
|
||||
|
||||
/// <summary>
|
||||
/// MediaPipe graph
|
||||
/// </summary>
|
||||
private CalculatorGraph graph;
|
||||
|
||||
/// <summary>
|
||||
/// Resource manager for graph resources
|
||||
/// </summary>
|
||||
private ResourceManager resourceManager;
|
||||
|
||||
/// <summary>
|
||||
/// Webcam texture
|
||||
/// </summary>
|
||||
private WebCamTexture webcamTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Input texture
|
||||
/// </summary>
|
||||
private Texture2D inputTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Screen pixel data
|
||||
/// </summary>
|
||||
private Color32[] pixelData;
|
||||
|
||||
/// <summary>
|
||||
/// Stopwatch to give a timestamp to video frames
|
||||
/// </summary>
|
||||
private Stopwatch stopwatch;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the pose landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the left hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the right hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
|
||||
|
||||
/// <summary>
|
||||
/// create precense stream
|
||||
/// </summary>
|
||||
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
|
||||
|
||||
/// <summary>
|
||||
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
|
||||
/// </summary>
|
||||
private KeypointManager keypointManager;
|
||||
|
||||
/// <summary>
|
||||
/// The worker on which we schedule the signpredictor model execution
|
||||
/// </summary>
|
||||
private IWorker worker;
|
||||
|
||||
/// <summary>
|
||||
/// Width of th webcam
|
||||
/// </summary>
|
||||
private int width;
|
||||
|
||||
/// <summary>
|
||||
/// Height of the webcam
|
||||
/// </summary>
|
||||
private int height;
|
||||
|
||||
/// <summary>
|
||||
/// The enumerator of the worker which executes the sign predictor model
|
||||
/// </summary>
|
||||
private IEnumerator enumerator;
|
||||
|
||||
/// <summary>
|
||||
/// The prediction of the sign predictor model
|
||||
/// </summary>
|
||||
public Dictionary<string, float> learnableProbabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Bool indicating whether or not the resource manager has already been initialized
|
||||
/// </summary>
|
||||
private static bool resourceManagerIsInitialized = false;
|
||||
|
||||
/// <summary>
|
||||
/// an inputTensor for the sign predictor
|
||||
/// </summary>
|
||||
private Tensor inputTensor;
|
||||
|
||||
public List<Listener> listeners = new List<Listener>();
|
||||
|
||||
/// <summary>
|
||||
/// Google Mediapipe setup & run
|
||||
/// </summary>
|
||||
/// <returns>IEnumerator</returns>
|
||||
/// <exception cref="System.Exception"></exception>
|
||||
private IEnumerator Start()
|
||||
/// <param name="edgeModel"></param>
|
||||
public NatMLSignPredictor(MLEdgeModel edgeModel)
|
||||
{
|
||||
// Webcam setup
|
||||
if (WebCamTexture.devices.Length == 0)
|
||||
{
|
||||
throw new System.Exception("Web Camera devices are not found");
|
||||
}
|
||||
// Start the webcam
|
||||
WebCamDevice webCamDevice = WebCamTexture.devices[0];
|
||||
webcamTexture = new WebCamTexture(webCamDevice.name);
|
||||
|
||||
webcamTexture.Play();
|
||||
|
||||
|
||||
yield return new WaitUntil(() => webcamTexture.width > 16);
|
||||
|
||||
// Set webcam aspect ratio
|
||||
width = webcamTexture.width;
|
||||
height = webcamTexture.height;
|
||||
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
|
||||
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
|
||||
screen.texture = webcamTexture;
|
||||
if (screen2 != null)
|
||||
{
|
||||
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
|
||||
}
|
||||
|
||||
if (modelList.GetCurrentModel() != null)
|
||||
{
|
||||
// TODO this method is kinda meh you should use
|
||||
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
|
||||
pixelData = new Color32[width * height];
|
||||
|
||||
if (!resourceManagerIsInitialized)
|
||||
{
|
||||
resourceManager = new StreamingAssetsResourceManager();
|
||||
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("handedness.txt");
|
||||
resourceManagerIsInitialized = true;
|
||||
}
|
||||
|
||||
stopwatch = new Stopwatch();
|
||||
|
||||
// Setting up the graph
|
||||
graph = new CalculatorGraph(configAsset.text);
|
||||
|
||||
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
|
||||
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
|
||||
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
|
||||
|
||||
posestream.StartPolling().AssertOk();
|
||||
leftstream.StartPolling().AssertOk();
|
||||
rightstream.StartPolling().AssertOk();
|
||||
|
||||
graph.StartRun().AssertOk();
|
||||
stopwatch.Start();
|
||||
|
||||
|
||||
keypointManager = new KeypointManager(modelInfoFile);
|
||||
// check if model exists at path
|
||||
//var model = ModelLoader.Load(Resources.Load<NNModel>("Models/Fingerspelling/model_A-L"));
|
||||
worker = modelList.GetCurrentModel().CreateWorker();
|
||||
|
||||
StartCoroutine(SignRecognitionCoroutine());
|
||||
StartCoroutine(MediapipeCoroutine());
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Called at the start of course/Minigame, will set the model before the start of SIgnPredictor is called.
|
||||
/// </summary>
|
||||
/// <param name="index">The index of the model to be used</param>
|
||||
public void SetModel(ModelIndex index)
|
||||
{
|
||||
this.modelList.SetCurrentModel(index);
|
||||
this.edgeModel = edgeModel;
|
||||
featureType = edgeModel.inputs[0];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which executes the mediapipe pipeline
|
||||
/// Predicts the sign using the MLEdgeModel
|
||||
/// </summary>
|
||||
/// <param name="inputs"></param>
|
||||
/// <returns></returns>
|
||||
private IEnumerator MediapipeCoroutine()
|
||||
public List<float> Predict(params MLFeature[] inputs)
|
||||
{
|
||||
while (true)
|
||||
List<float> predictions = null;
|
||||
IMLEdgeFeature iedgeFeature = (IMLEdgeFeature)inputs[0];
|
||||
MLEdgeFeature edgeFeature = iedgeFeature.Create(featureType);
|
||||
MLFeatureCollection<MLEdgeFeature> result = edgeModel.Predict(edgeFeature);
|
||||
if (0 < result.Count)
|
||||
{
|
||||
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
|
||||
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
|
||||
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
|
||||
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
|
||||
//Debug.Log(Time.timeAsDouble + " Added new packet to mediapipe graph");
|
||||
yield return new WaitForEndOfFrame();
|
||||
|
||||
NormalizedLandmarkList _poseLandmarks = null;
|
||||
NormalizedLandmarkList _leftHandLandmarks = null;
|
||||
NormalizedLandmarkList _rightHandLandmarks = null;
|
||||
|
||||
//Debug.Log("Extracting keypoints");
|
||||
|
||||
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks, false); return true; });
|
||||
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks, false); return true; });
|
||||
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks, false); return true; });
|
||||
//Debug.Log(Time.timeAsDouble + " Retrieved landmarks ");
|
||||
|
||||
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
|
||||
predictions = new MLArrayFeature<float>(result[0]).Flatten().ToArray().ToList();
|
||||
predictions = predictions.ConvertAll((c) => Mathf.Exp(c));
|
||||
float sum = predictions.Sum();
|
||||
predictions = predictions.ConvertAll((c) => c / sum);
|
||||
}
|
||||
edgeFeature.Dispose();
|
||||
result.Dispose();
|
||||
return predictions;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which calls the sign predictor model
|
||||
/// Disposing the MLEdgeModel
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator SignRecognitionCoroutine()
|
||||
public void Dispose()
|
||||
{
|
||||
while (true)
|
||||
edgeModel.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public List<Listener> listeners = new List<Listener>();
|
||||
|
||||
/// <summary>
|
||||
/// Predictor which is used to create the asyncPredictor (should not be used if asyncPredictor exists)
|
||||
/// </summary>
|
||||
private NatMLSignPredictor predictor;
|
||||
|
||||
/// <summary>
|
||||
/// The asynchronous predictor which is used to predict the sign using an MLEdgemodel
|
||||
/// </summary>
|
||||
private MLAsyncPredictor<List<float>> asyncPredictor;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model used in the SignPredictor
|
||||
/// </summary>
|
||||
private MLEdgeModel model;
|
||||
|
||||
/// <summary>
|
||||
/// Modellist used to change model using ModelIndex
|
||||
/// </summary>
|
||||
public ModelList modelList;
|
||||
|
||||
/// <summary>
|
||||
/// Chosen model data based on the operating system
|
||||
/// </summary>
|
||||
private MLModelData modelData;
|
||||
|
||||
/// <summary>
|
||||
/// Reference to the model info file
|
||||
/// </summary>
|
||||
public TextAsset modelInfoFile;
|
||||
|
||||
/// <summary>
|
||||
/// Config file to set up the graph
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private TextAsset configAsset;
|
||||
|
||||
/// <summary>
|
||||
/// Index to indicate which camera is being used
|
||||
/// </summary>
|
||||
private int camdex = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen;
|
||||
|
||||
/// <summary>
|
||||
/// A secondary optional screen object on which the video is displayed
|
||||
/// </summary>
|
||||
[SerializeField]
|
||||
private RawImage screen2;
|
||||
|
||||
/// <summary>
|
||||
/// MediaPipe graph
|
||||
/// </summary>
|
||||
private CalculatorGraph graph;
|
||||
|
||||
/// <summary>
|
||||
/// Resource manager for graph resources
|
||||
/// </summary>
|
||||
private ResourceManager resourceManager;
|
||||
|
||||
/// <summary>
|
||||
/// Webcam texture
|
||||
/// </summary>
|
||||
private WebCamTexture webcamTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Input texture
|
||||
/// </summary>
|
||||
private Texture2D inputTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Screen pixel data
|
||||
/// </summary>
|
||||
private Color32[] pixelData;
|
||||
|
||||
/// <summary>
|
||||
/// Stopwatch to give a timestamp to video frames
|
||||
/// </summary>
|
||||
private Stopwatch stopwatch;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the pose landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the left hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
|
||||
|
||||
/// <summary>
|
||||
/// The mediapipe stream which contains the right hand landmarks
|
||||
/// </summary>
|
||||
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
|
||||
|
||||
/// <summary>
|
||||
/// create precense stream
|
||||
/// </summary>
|
||||
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
|
||||
|
||||
/// <summary>
|
||||
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
|
||||
/// </summary>
|
||||
private KeypointManager keypointManager;
|
||||
|
||||
/// <summary>
|
||||
/// Width of th webcam
|
||||
/// </summary>
|
||||
private int width;
|
||||
|
||||
/// <summary>
|
||||
/// Height of the webcam
|
||||
/// </summary>
|
||||
private int height;
|
||||
|
||||
/// <summary>
|
||||
/// The prediction of the sign predictor model
|
||||
/// </summary>
|
||||
public Dictionary<string, float> learnableProbabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Bool indicating whether or not the resource manager has already been initialized
|
||||
/// </summary>
|
||||
private static bool resourceManagerIsInitialized = false;
|
||||
|
||||
/// <summary>
|
||||
/// Google Mediapipe setup & run
|
||||
/// </summary>
|
||||
/// <returns>IEnumerator</returns>
|
||||
/// <exception cref="System.Exception"></exception>
|
||||
private IEnumerator Start()
|
||||
{
|
||||
// Webcam setup
|
||||
if (WebCamTexture.devices.Length == 0)
|
||||
{
|
||||
throw new System.Exception("Web Camera devices are not found");
|
||||
}
|
||||
// Start the webcam
|
||||
WebCamDevice webCamDevice = WebCamTexture.devices[0];
|
||||
webcamTexture = new WebCamTexture(webCamDevice.name);
|
||||
|
||||
webcamTexture.Play();
|
||||
|
||||
yield return new WaitUntil(() => webcamTexture.width > 16);
|
||||
|
||||
// Set webcam aspect ratio
|
||||
width = webcamTexture.width;
|
||||
height = webcamTexture.height;
|
||||
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
|
||||
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
|
||||
screen.texture = webcamTexture;
|
||||
if (screen2 != null)
|
||||
{
|
||||
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
|
||||
}
|
||||
|
||||
// TODO this method is kinda meh you should use
|
||||
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
|
||||
pixelData = new Color32[width * height];
|
||||
|
||||
if (!resourceManagerIsInitialized)
|
||||
{
|
||||
resourceManager = new StreamingAssetsResourceManager();
|
||||
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
|
||||
yield return resourceManager.PrepareAssetAsync("handedness.txt");
|
||||
resourceManagerIsInitialized = true;
|
||||
}
|
||||
|
||||
stopwatch = new Stopwatch();
|
||||
|
||||
// Setting up the graph
|
||||
graph = new CalculatorGraph(configAsset.text);
|
||||
|
||||
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
|
||||
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
|
||||
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
|
||||
|
||||
posestream.StartPolling().AssertOk();
|
||||
leftstream.StartPolling().AssertOk();
|
||||
rightstream.StartPolling().AssertOk();
|
||||
|
||||
graph.StartRun().AssertOk();
|
||||
stopwatch.Start();
|
||||
|
||||
// Creating a KeypointManager
|
||||
keypointManager = new KeypointManager(modelInfoFile);
|
||||
|
||||
// Check if a model is ready to load
|
||||
yield return new WaitUntil(() => modelList.HasValidModel());
|
||||
|
||||
// Create Model
|
||||
Task<MLEdgeModel> t = Task.Run(() => MLEdgeModel.Create(modelList.GetCurrentModel()));
|
||||
yield return new WaitUntil(() => t.IsCompleted);
|
||||
model = t.Result;
|
||||
predictor = new NatMLSignPredictor(model);
|
||||
asyncPredictor = predictor.ToAsync();
|
||||
|
||||
// Start the Coroutine
|
||||
StartCoroutine(SignRecognitionCoroutine());
|
||||
StartCoroutine(MediapipeCoroutine());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which executes the mediapipe pipeline
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator MediapipeCoroutine()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
|
||||
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
|
||||
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
|
||||
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
|
||||
yield return new WaitForEndOfFrame();
|
||||
|
||||
NormalizedLandmarkList _poseLandmarks = null;
|
||||
NormalizedLandmarkList _leftHandLandmarks = null;
|
||||
NormalizedLandmarkList _rightHandLandmarks = null;
|
||||
|
||||
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks); return true; });
|
||||
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks); return true; });
|
||||
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks); return true; });
|
||||
|
||||
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coroutine which calls the sign predictor model
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private IEnumerator SignRecognitionCoroutine()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
List<List<float>> inputData = keypointManager.GetKeypoints();
|
||||
if (inputData != null && asyncPredictor.readyForPrediction)
|
||||
{
|
||||
List<List<float>> input = keypointManager.GetKeypoints();
|
||||
if (input != null)
|
||||
// Getting the size of the input data
|
||||
int framecount = inputData.Count;
|
||||
int keypointsPerFrame = inputData[0].Count;
|
||||
|
||||
// Creating ArrayFeature
|
||||
int[] shape = { framecount, keypointsPerFrame };
|
||||
float[] input = new float[framecount * keypointsPerFrame];
|
||||
int i = 0;
|
||||
inputData.ForEach((e) => e.ForEach((f) => input[i++] = f));
|
||||
MLArrayFeature<float> feature = new MLArrayFeature<float>(input, shape);
|
||||
|
||||
// Predicting
|
||||
Task<List<float>> task = Task.Run(async () => await asyncPredictor.Predict(feature));
|
||||
yield return new WaitUntil(() => task.IsCompleted);
|
||||
List<float> result = task.Result;
|
||||
if (0 < result.Count)
|
||||
{
|
||||
|
||||
//UnityEngine.Debug.Log("input: " + input.Count);
|
||||
|
||||
int frameCount = input.Count;
|
||||
int keypoints_per_frame = input[0].Count;
|
||||
|
||||
// Create a tensor with the input
|
||||
inputTensor = new Tensor(frameCount, keypoints_per_frame);
|
||||
|
||||
// Fill the tensor with the input
|
||||
for (int i = 0; i < frameCount; i++)
|
||||
{
|
||||
for (int j = 0; j < keypoints_per_frame; j++)
|
||||
{
|
||||
inputTensor[i, j] = input[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
int stepsPerFrame = 190;
|
||||
enumerator = worker.StartManualSchedule(inputTensor);
|
||||
int step = 0;
|
||||
while (enumerator.MoveNext())
|
||||
{
|
||||
if (++step % stepsPerFrame == 0)
|
||||
{
|
||||
//Debug.Log(Time.timeAsDouble + " : " + step);
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
|
||||
var output = worker.PeekOutput();
|
||||
|
||||
inputTensor.Dispose();
|
||||
|
||||
// Get the output as an array
|
||||
float[] outputArray = output.ToReadOnlyArray();
|
||||
//Debug.Log($"out = [{outputArray.Aggregate(" ", (t, f) => $"{t}{f} ")}]");
|
||||
|
||||
// Calculate the softmax of the output
|
||||
float max = outputArray.Max();
|
||||
float[] softmaxedOutput = outputArray.Select(x => Mathf.Exp(x - max)).ToArray();
|
||||
float sum = softmaxedOutput.Sum();
|
||||
float[] softmaxedOutput2 = softmaxedOutput.Select(x => x / sum).ToArray();
|
||||
|
||||
// Get the index of the highest probability
|
||||
int maxIndex = softmaxedOutput2.ToList().IndexOf(softmaxedOutput2.Max());
|
||||
|
||||
// Get the letter from the index
|
||||
char letter = (char)(maxIndex + 65);
|
||||
float accuracy = (Mathf.RoundToInt(softmaxedOutput2[maxIndex] * 100));
|
||||
|
||||
// Set the letterProbabilities, currently used by Courses
|
||||
learnableProbabilities = new Dictionary<string, float>();
|
||||
for (int i = 0; i < softmaxedOutput2.Length; i++)
|
||||
|
||||
// Temporary fix
|
||||
List<string> signs = new List<string>()
|
||||
{
|
||||
learnableProbabilities.Add(((char)(i + 65)).ToString(), softmaxedOutput2[i]);
|
||||
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
|
||||
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
|
||||
};
|
||||
|
||||
|
||||
|
||||
for (int j = 0; j < result.Count; j++)
|
||||
{
|
||||
learnableProbabilities.Add(signs[j].ToUpper(), result[j]);
|
||||
}
|
||||
//Debug.Log($"prob = [{learnableProbabilities.Aggregate(" ", (t, kv) => $"{t}{kv.Key}:{kv.Value} ")}]");
|
||||
foreach(Listener listener in listeners)
|
||||
foreach (Listener listener in listeners)
|
||||
{
|
||||
yield return listener.ProcessIncomingCall();
|
||||
}
|
||||
@@ -339,77 +367,85 @@ namespace Mediapipe.Unity.Tutorial
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Propper destruction on the Mediapipegraph
|
||||
/// </summary>
|
||||
private void OnDestroy()
|
||||
{
|
||||
if (webcamTexture != null)
|
||||
{
|
||||
webcamTexture.Stop();
|
||||
}
|
||||
|
||||
if (graph != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
graph.CloseInputStream("input_video").AssertOk();
|
||||
graph.WaitUntilDone().AssertOk();
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
||||
graph.Dispose();
|
||||
}
|
||||
}
|
||||
// inputTensor must still be disposed, if it exists
|
||||
inputTensor?.Dispose();
|
||||
worker?.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
|
||||
/// </summary>
|
||||
public void SwapCam()
|
||||
{
|
||||
if (WebCamTexture.devices.Length > 0)
|
||||
{
|
||||
// Stop the old camera
|
||||
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
|
||||
if (webcamTexture.isPlaying)
|
||||
{
|
||||
screen.texture = null;
|
||||
webcamTexture.Stop();
|
||||
webcamTexture = null;
|
||||
}
|
||||
// Find the new camera
|
||||
camdex += 1;
|
||||
camdex %= WebCamTexture.devices.Length;
|
||||
// Start the new camera
|
||||
WebCamDevice device = WebCamTexture.devices[camdex];
|
||||
webcamTexture = new WebCamTexture(device.name);
|
||||
screen.texture = webcamTexture;
|
||||
|
||||
webcamTexture.Play();
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Swaps the display screens
|
||||
/// </summary>
|
||||
public void SwapScreen()
|
||||
{
|
||||
if(screen2.texture == null && screen.texture != null)
|
||||
{
|
||||
screen2.texture = webcamTexture;
|
||||
screen.texture = null;
|
||||
}
|
||||
else if (screen2.texture != null && screen.texture == null)
|
||||
{
|
||||
screen.texture = webcamTexture;
|
||||
screen2.texture = null;
|
||||
}
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Propper destruction on the Mediapipegraph
|
||||
/// </summary>
|
||||
private void OnDestroy()
|
||||
{
|
||||
if (webcamTexture != null)
|
||||
{
|
||||
webcamTexture.Stop();
|
||||
}
|
||||
|
||||
if (graph != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
graph.CloseInputStream("input_video").AssertOk();
|
||||
graph.WaitUntilDone().AssertOk();
|
||||
}
|
||||
finally
|
||||
{
|
||||
graph.Dispose();
|
||||
}
|
||||
}
|
||||
if (asyncPredictor != null)
|
||||
{
|
||||
asyncPredictor.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
|
||||
/// </summary>
|
||||
public void SwapCam()
|
||||
{
|
||||
if (WebCamTexture.devices.Length > 0)
|
||||
{
|
||||
// Stop the old camera
|
||||
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
|
||||
if (webcamTexture.isPlaying)
|
||||
{
|
||||
screen.texture = null;
|
||||
webcamTexture.Stop();
|
||||
webcamTexture = null;
|
||||
}
|
||||
// Find the new camera
|
||||
camdex += 1;
|
||||
camdex %= WebCamTexture.devices.Length;
|
||||
// Start the new camera
|
||||
WebCamDevice device = WebCamTexture.devices[camdex];
|
||||
webcamTexture = new WebCamTexture(device.name);
|
||||
screen.texture = webcamTexture;
|
||||
|
||||
webcamTexture.Play();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Swaps the display screens
|
||||
/// </summary>
|
||||
public void SwapScreen()
|
||||
{
|
||||
if (screen2.texture == null && screen.texture != null)
|
||||
{
|
||||
screen2.texture = webcamTexture;
|
||||
screen.texture = null;
|
||||
}
|
||||
else if (screen2.texture != null && screen.texture == null)
|
||||
{
|
||||
screen.texture = webcamTexture;
|
||||
screen2.texture = null;
|
||||
}
|
||||
}
|
||||
public void ChangeModel(ModelIndex index)
|
||||
{
|
||||
this.modelList.SetCurrentModel(index);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -179,7 +179,7 @@ public partial class SpellingBeeController : AbstractFeedback
|
||||
{
|
||||
StartController();
|
||||
|
||||
signPredictor.SetModel(currentTheme.modelIndex);
|
||||
signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
|
||||
AddSelfAsListener();
|
||||
}
|
||||
/// <summary>
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f6ebab52a13ea425ba87006839f1d776
|
||||
folderAsset: yes
|
||||
DefaultImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,148 +0,0 @@
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Onnx;
|
||||
using UnityEditor;
|
||||
using UnityEngine.Analytics;
|
||||
|
||||
namespace Unity.Barracuda.Editor
|
||||
{
|
||||
internal class BarracudaAnalytics
|
||||
{
|
||||
static bool s_EventRegistered = false;
|
||||
const int k_MaxEventsPerHour = 1000;
|
||||
const int k_MaxNumberOfElements = 1000;
|
||||
const string k_VendorKey = "unity.barracuda";
|
||||
const string k_ImportEventName = "uBarracudaImport";
|
||||
|
||||
static bool EnableAnalytics()
|
||||
{
|
||||
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_ImportEventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
|
||||
if (result == AnalyticsResult.Ok)
|
||||
s_EventRegistered = true;
|
||||
|
||||
return s_EventRegistered;
|
||||
}
|
||||
|
||||
struct BarracudaImportAnalyticsData
|
||||
{
|
||||
public string model_type;
|
||||
public string original_layers;
|
||||
public string imported_layers;
|
||||
public string import_warnings;
|
||||
}
|
||||
|
||||
public static void SendBarracudaImportEvent(object originalModel, Model importedModel)
|
||||
{
|
||||
//The event shouldn't be able to report if this is disabled but if we know we're not going to report
|
||||
//Lets early out and not waste time gathering all the data
|
||||
if (!EditorAnalytics.enabled)
|
||||
return;
|
||||
|
||||
if (!EnableAnalytics())
|
||||
return;
|
||||
|
||||
|
||||
var data = new BarracudaImportAnalyticsData();
|
||||
|
||||
try
|
||||
{
|
||||
data.original_layers = AnalyzeONNXModel(originalModel);
|
||||
data.imported_layers = AnalyzeNNModel(importedModel);
|
||||
data.model_type = string.IsNullOrEmpty(data.original_layers) ? "NN" : "ONNX";
|
||||
data.import_warnings = AnalyzeWarnings(importedModel);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
D.LogError($"Failed collecting Barracuda analytics: {e}");
|
||||
}
|
||||
|
||||
EditorAnalytics.SendEventWithLimit(k_ImportEventName, data);
|
||||
}
|
||||
|
||||
static string AnalyzeONNXModel(object originalModel)
|
||||
{
|
||||
if (!(originalModel is ModelProto))
|
||||
return "";
|
||||
|
||||
var layers = new Dictionary<string, int>();
|
||||
|
||||
var onnxModel = originalModel as ModelProto;
|
||||
foreach (var node in onnxModel.Graph.Node)
|
||||
{
|
||||
var layerDescription = node.OpType;
|
||||
|
||||
if (!layers.ContainsKey(layerDescription))
|
||||
layers[layerDescription] = 1;
|
||||
else
|
||||
layers[layerDescription] += 1;
|
||||
}
|
||||
|
||||
return DictionaryToJson(layers);
|
||||
}
|
||||
|
||||
static string AnalyzeNNModel(Model importedModel)
|
||||
{
|
||||
var layers = new Dictionary<string, int>();
|
||||
|
||||
foreach (Layer layer in importedModel.layers)
|
||||
{
|
||||
var layerDescription = LayerToString(layer);
|
||||
|
||||
if (!layers.ContainsKey(layerDescription))
|
||||
layers[layerDescription] = 1;
|
||||
else
|
||||
layers[layerDescription] += 1;
|
||||
}
|
||||
|
||||
return DictionaryToJson(layers);
|
||||
}
|
||||
|
||||
static string LayerToString(Layer layer)
|
||||
{
|
||||
var layerDescription = layer.type.ToString();
|
||||
|
||||
if (layer.type == Layer.Type.Conv2D || layer.type == Layer.Type.Conv2DTrans ||
|
||||
layer.type == Layer.Type.Conv3D || layer.type == Layer.Type.Conv3DTrans ||
|
||||
layer.type == Layer.Type.DepthwiseConv2D)
|
||||
{
|
||||
layerDescription += "_" + ConvShapeToString(layer);
|
||||
}
|
||||
|
||||
if (layer.activation != Layer.Activation.None)
|
||||
layerDescription += "_" + layer.activation.ToString();
|
||||
|
||||
return layerDescription;
|
||||
}
|
||||
|
||||
static string ConvShapeToString(Layer layer)
|
||||
{
|
||||
if (layer.type == Layer.Type.Conv2D ||
|
||||
layer.type == Layer.Type.DepthwiseConv2D ||
|
||||
layer.type == Layer.Type.Conv2DTrans)
|
||||
return string.Join("_",
|
||||
layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
|
||||
$"{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
|
||||
|
||||
if (layer.type == Layer.Type.Conv3D ||
|
||||
layer.type == Layer.Type.Conv3DTrans)
|
||||
return string.Join("_",
|
||||
layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
|
||||
$"{it.shape.kernelSpatialDepth}x{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
static string AnalyzeWarnings(Model importedModel)
|
||||
{
|
||||
return "[" + string.Join(",",importedModel.Warnings.Select(item => $"'{item.LayerName}:{item.Message}'")) + "]";
|
||||
}
|
||||
|
||||
static string DictionaryToJson(Dictionary<string, int> dict)
|
||||
{
|
||||
var entries = dict.Select(d => $"\"{d.Key}\":{string.Join(",", d.Value)}");
|
||||
return "{" + string.Join(",", entries) + "}";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 92cb0e57f8c0c4255a2d2d93f844424d
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 2.3 KiB |
@@ -1,106 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 8682ff569c4c7457a8a8e3a527aad537
|
||||
TextureImporter:
|
||||
fileIDToRecycleName: {}
|
||||
externalObjects: {}
|
||||
serializedVersion: 4
|
||||
mipmaps:
|
||||
mipMapMode: 0
|
||||
enableMipMap: 0
|
||||
sRGBTexture: 0
|
||||
linearTexture: 0
|
||||
fadeOut: 0
|
||||
borderMipMap: 0
|
||||
mipMapsPreserveCoverage: 0
|
||||
alphaTestReferenceValue: 0.5
|
||||
mipMapFadeDistanceStart: 1
|
||||
mipMapFadeDistanceEnd: 3
|
||||
bumpmap:
|
||||
convertToNormalMap: 0
|
||||
externalNormalMap: 0
|
||||
heightScale: 0.25
|
||||
normalMapFilter: 0
|
||||
isReadable: 0
|
||||
grayScaleToAlpha: 0
|
||||
generateCubemap: 6
|
||||
cubemapConvolution: 0
|
||||
seamlessCubemap: 0
|
||||
textureFormat: 1
|
||||
maxTextureSize: 2048
|
||||
textureSettings:
|
||||
serializedVersion: 2
|
||||
filterMode: -1
|
||||
aniso: 1
|
||||
mipBias: -1
|
||||
wrapU: 1
|
||||
wrapV: 1
|
||||
wrapW: -1
|
||||
nPOTScale: 0
|
||||
lightmap: 0
|
||||
compressionQuality: 50
|
||||
spriteMode: 0
|
||||
spriteExtrude: 1
|
||||
spriteMeshType: 1
|
||||
alignment: 0
|
||||
spritePivot: {x: 0.5, y: 0.5}
|
||||
spritePixelsToUnits: 100
|
||||
spriteBorder: {x: 0, y: 0, z: 0, w: 0}
|
||||
spriteGenerateFallbackPhysicsShape: 1
|
||||
alphaUsage: 1
|
||||
alphaIsTransparency: 1
|
||||
spriteTessellationDetail: -1
|
||||
textureType: 2
|
||||
textureShape: 1
|
||||
maxTextureSizeSet: 0
|
||||
compressionQualitySet: 0
|
||||
textureFormatSet: 0
|
||||
platformSettings:
|
||||
- buildTarget: DefaultTexturePlatform
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 1
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- buildTarget: Standalone
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 1
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- buildTarget: iPhone
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 1
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- buildTarget: Android
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 1
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
spriteSheet:
|
||||
serializedVersion: 2
|
||||
sprites: []
|
||||
outline: []
|
||||
physicsShape: []
|
||||
spritePackingTag:
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,63 +0,0 @@
|
||||
using System.IO;
|
||||
using Unity.Barracuda.Editor;
|
||||
using UnityEditor;
|
||||
using UnityEngine;
|
||||
#if UNITY_2020_2_OR_NEWER
|
||||
using UnityEditor.AssetImporters;
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#else
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#endif
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
/// <summary>
|
||||
/// Asset Importer of barracuda models.
|
||||
/// </summary>
|
||||
[ScriptedImporter(3, new[] {"nn"})]
|
||||
public class NNModelImporter : ScriptedImporter {
|
||||
private const string iconName = "NNModelIcon";
|
||||
|
||||
private Texture2D iconTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Scripted importer callback
|
||||
/// </summary>
|
||||
/// <param name="ctx">Asset import context</param>
|
||||
public override void OnImportAsset(AssetImportContext ctx)
|
||||
{
|
||||
var model = File.ReadAllBytes(ctx.assetPath);
|
||||
|
||||
// Analyze model and send analytics if enabled
|
||||
var nnModel = ModelLoader.Load(ctx.assetPath, skipWeights:true);
|
||||
BarracudaAnalytics.SendBarracudaImportEvent(null, nnModel);
|
||||
|
||||
var assetData = ScriptableObject.CreateInstance<NNModelData>();
|
||||
assetData.Value = model;
|
||||
assetData.name = "Data";
|
||||
assetData.hideFlags = HideFlags.HideInHierarchy;
|
||||
|
||||
var asset = ScriptableObject.CreateInstance<NNModel>();
|
||||
asset.modelData = assetData;
|
||||
ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
|
||||
ctx.AddObjectToAsset("model data", assetData);
|
||||
|
||||
ctx.SetMainObject(asset);
|
||||
}
|
||||
|
||||
private Texture2D LoadIconTexture()
|
||||
{
|
||||
if (iconTexture == null)
|
||||
{
|
||||
string[] allCandidates = AssetDatabase.FindAssets(iconName);
|
||||
|
||||
if (allCandidates.Length > 0)
|
||||
{
|
||||
iconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
|
||||
}
|
||||
}
|
||||
return iconTexture;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 19ed1486aa27d4903b34839f37b8f69f
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 4.6 KiB |
@@ -1,165 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 44179f4142e33e24ca4feb8dfe55e56c
|
||||
TextureImporter:
|
||||
fileIDToRecycleName: {}
|
||||
externalObjects: {}
|
||||
serializedVersion: 9
|
||||
mipmaps:
|
||||
mipMapMode: 0
|
||||
enableMipMap: 0
|
||||
sRGBTexture: 1
|
||||
linearTexture: 0
|
||||
fadeOut: 0
|
||||
borderMipMap: 0
|
||||
mipMapsPreserveCoverage: 0
|
||||
alphaTestReferenceValue: 0.5
|
||||
mipMapFadeDistanceStart: 1
|
||||
mipMapFadeDistanceEnd: 3
|
||||
bumpmap:
|
||||
convertToNormalMap: 0
|
||||
externalNormalMap: 0
|
||||
heightScale: 0.25
|
||||
normalMapFilter: 0
|
||||
isReadable: 0
|
||||
streamingMipmaps: 0
|
||||
streamingMipmapsPriority: 0
|
||||
grayScaleToAlpha: 0
|
||||
generateCubemap: 6
|
||||
cubemapConvolution: 0
|
||||
seamlessCubemap: 0
|
||||
textureFormat: 1
|
||||
maxTextureSize: 2048
|
||||
textureSettings:
|
||||
serializedVersion: 2
|
||||
filterMode: -1
|
||||
aniso: -1
|
||||
mipBias: -100
|
||||
wrapU: -1
|
||||
wrapV: -1
|
||||
wrapW: -1
|
||||
nPOTScale: 1
|
||||
lightmap: 0
|
||||
compressionQuality: 50
|
||||
spriteMode: 0
|
||||
spriteExtrude: 1
|
||||
spriteMeshType: 1
|
||||
alignment: 0
|
||||
spritePivot: {x: 0.5, y: 0.5}
|
||||
spritePixelsToUnits: 100
|
||||
spriteBorder: {x: 0, y: 0, z: 0, w: 0}
|
||||
spriteGenerateFallbackPhysicsShape: 1
|
||||
alphaUsage: 1
|
||||
alphaIsTransparency: 0
|
||||
spriteTessellationDetail: -1
|
||||
textureType: 0
|
||||
textureShape: 1
|
||||
singleChannelComponent: 0
|
||||
maxTextureSizeSet: 0
|
||||
compressionQualitySet: 0
|
||||
textureFormatSet: 0
|
||||
platformSettings:
|
||||
- serializedVersion: 2
|
||||
buildTarget: DefaultTexturePlatform
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: Standalone
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: iPhone
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: tvOS
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: Android
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: PS4
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: Windows Store Apps
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
- serializedVersion: 2
|
||||
buildTarget: WebGL
|
||||
maxTextureSize: 2048
|
||||
resizeAlgorithm: 0
|
||||
textureFormat: -1
|
||||
textureCompression: 0
|
||||
compressionQuality: 50
|
||||
crunchedCompression: 0
|
||||
allowsAlphaSplitting: 0
|
||||
overridden: 0
|
||||
androidETC2FallbackOverride: 0
|
||||
spriteSheet:
|
||||
serializedVersion: 2
|
||||
sprites: []
|
||||
outline: []
|
||||
physicsShape: []
|
||||
bones: []
|
||||
spriteID:
|
||||
vertices: []
|
||||
indices:
|
||||
edges: []
|
||||
weights: []
|
||||
spritePackingTag:
|
||||
pSDRemoveMatte: 0
|
||||
pSDShowRemoveMatteOption: 0
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,106 +0,0 @@
|
||||
using UnityEngine;
|
||||
using UnityEditor;
|
||||
#if UNITY_2020_2_OR_NEWER
|
||||
using UnityEditor.AssetImporters;
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#else
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#endif
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Unity.Barracuda.Editor;
|
||||
using Unity.Barracuda.ONNX;
|
||||
|
||||
[assembly: InternalsVisibleToAttribute("Barracuda.EditorTests")]
|
||||
[assembly: InternalsVisibleToAttribute("Unity.Barracuda.Tests")]
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
/// <summary>
|
||||
/// Asset Importer for Open Neural Network Exchange (ONNX) files.
|
||||
/// For more information about ONNX file format see: https://github.com/onnx/onnx
|
||||
/// </summary>
|
||||
[ScriptedImporter(34, new[] { "onnx" })]
|
||||
public class ONNXModelImporter : ScriptedImporter
|
||||
{
|
||||
// Configuration
|
||||
/// <summary>
|
||||
/// Enable ONNX model optimization during import. Set via importer UI
|
||||
/// </summary>
|
||||
public bool optimizeModel = true;
|
||||
|
||||
/// <summary>
|
||||
/// Fix batch size for ONNX models. Set via importer UI
|
||||
/// </summary>
|
||||
public bool forceArbitraryBatchSize = true;
|
||||
|
||||
/// <summary>
|
||||
/// Treat errors as warnings. Set via importer UI
|
||||
/// </summary>
|
||||
public bool treatErrorsAsWarnings = false;
|
||||
|
||||
[SerializeField, HideInInspector]
|
||||
internal ONNXModelConverter.ImportMode importMode = ONNXModelConverter.ImportMode.Standard;
|
||||
|
||||
[SerializeField, HideInInspector]
|
||||
internal ONNXModelConverter.DataTypeMode weightsTypeMode = ONNXModelConverter.DataTypeMode.Default;
|
||||
[SerializeField, HideInInspector]
|
||||
internal ONNXModelConverter.DataTypeMode activationTypeMode = ONNXModelConverter.DataTypeMode.Default;
|
||||
|
||||
internal const string iconName = "ONNXModelIcon";
|
||||
|
||||
|
||||
private Texture2D m_IconTexture;
|
||||
|
||||
/// <summary>
|
||||
/// Scripted importer callback
|
||||
/// </summary>
|
||||
/// <param name="ctx">Asset import context</param>
|
||||
public override void OnImportAsset(AssetImportContext ctx)
|
||||
{
|
||||
ONNXModelConverter.ModelImported += BarracudaAnalytics.SendBarracudaImportEvent;
|
||||
var converter = new ONNXModelConverter(optimizeModel, treatErrorsAsWarnings, forceArbitraryBatchSize, importMode);
|
||||
|
||||
var model = converter.Convert(ctx.assetPath);
|
||||
|
||||
if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceHalf)
|
||||
model.ConvertWeights(DataType.Half);
|
||||
else if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceFloat)
|
||||
model.ConvertWeights(DataType.Float);
|
||||
|
||||
NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var writer = new BinaryWriter(memoryStream))
|
||||
{
|
||||
ModelWriter.Save(writer, model);
|
||||
assetData.Value = memoryStream.ToArray();
|
||||
}
|
||||
assetData.name = "Data";
|
||||
assetData.hideFlags = HideFlags.HideInHierarchy;
|
||||
|
||||
NNModel asset = ScriptableObject.CreateInstance<NNModel>();
|
||||
asset.modelData = assetData;
|
||||
|
||||
ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
|
||||
ctx.AddObjectToAsset("model data", assetData);
|
||||
|
||||
ctx.SetMainObject(asset);
|
||||
}
|
||||
|
||||
// Icon helper
|
||||
private Texture2D LoadIconTexture()
|
||||
{
|
||||
if (m_IconTexture == null)
|
||||
{
|
||||
string[] allCandidates = AssetDatabase.FindAssets(iconName);
|
||||
|
||||
if (allCandidates.Length > 0)
|
||||
{
|
||||
m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
|
||||
}
|
||||
}
|
||||
return m_IconTexture;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 683b6cb6d0a474744822c888b46772c9
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,461 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using UnityEditor;
|
||||
#if UNITY_2020_2_OR_NEWER
|
||||
using UnityEditor.AssetImporters;
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#else
|
||||
using UnityEditor.Experimental.AssetImporters;
|
||||
#endif
|
||||
using UnityEngine;
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Reflection;
|
||||
using Unity.Barracuda.ONNX;
|
||||
using ImportMode=Unity.Barracuda.ONNX.ONNXModelConverter.ImportMode;
|
||||
using DataTypeMode=Unity.Barracuda.ONNX.ONNXModelConverter.DataTypeMode;
|
||||
|
||||
namespace Unity.Barracuda.Editor
|
||||
{
|
||||
/// <summary>
|
||||
/// Asset Importer Editor of ONNX models
|
||||
/// </summary>
|
||||
[CustomEditor(typeof(ONNXModelImporter))]
|
||||
[CanEditMultipleObjects]
|
||||
public class ONNXModelImporterEditor : ScriptedImporterEditor
|
||||
{
|
||||
static PropertyInfo s_InspectorModeInfo;
|
||||
static ONNXModelImporterEditor()
|
||||
{
|
||||
s_InspectorModeInfo = typeof(SerializedObject).GetProperty("inspectorMode", BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scripted importer editor UI callback
|
||||
/// </summary>
|
||||
public override void OnInspectorGUI()
|
||||
{
|
||||
var onnxModelImporter = target as ONNXModelImporter;
|
||||
if (onnxModelImporter == null)
|
||||
return;
|
||||
|
||||
InspectorMode inspectorMode = InspectorMode.Normal;
|
||||
if (s_InspectorModeInfo != null)
|
||||
inspectorMode = (InspectorMode)s_InspectorModeInfo.GetValue(assetSerializedObject);
|
||||
|
||||
serializedObject.Update();
|
||||
|
||||
bool debugView = inspectorMode != InspectorMode.Normal;
|
||||
SerializedProperty iterator = serializedObject.GetIterator();
|
||||
for (bool enterChildren = true; iterator.NextVisible(enterChildren); enterChildren = false)
|
||||
{
|
||||
if (iterator.propertyPath != "m_Script")
|
||||
EditorGUILayout.PropertyField(iterator, true);
|
||||
}
|
||||
|
||||
// Additional options exposed from ImportMode
|
||||
SerializedProperty importModeProperty = serializedObject.FindProperty(nameof(onnxModelImporter.importMode));
|
||||
bool skipMetadataImport = ((ImportMode)importModeProperty.intValue).HasFlag(ImportMode.SkipMetadataImport);
|
||||
if (EditorGUILayout.Toggle("Skip Metadata Import", skipMetadataImport) != skipMetadataImport)
|
||||
{
|
||||
importModeProperty.intValue ^= (int)ImportMode.SkipMetadataImport;
|
||||
}
|
||||
|
||||
if (debugView)
|
||||
{
|
||||
importModeProperty.intValue = (int)(ImportMode)EditorGUILayout.EnumFlagsField("Import Mode", (ImportMode)importModeProperty.intValue);
|
||||
|
||||
SerializedProperty weightsTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.weightsTypeMode));
|
||||
SerializedProperty activationTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.activationTypeMode));
|
||||
weightsTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Weights type", (DataTypeMode)weightsTypeMode.intValue);
|
||||
activationTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Activation type", (DataTypeMode)activationTypeMode.intValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (onnxModelImporter.optimizeModel)
|
||||
EditorGUILayout.HelpBox("Model optimizations are on\nRemove and re-import model if you observe incorrect behavior", MessageType.Info);
|
||||
|
||||
if (onnxModelImporter.importMode == ImportMode.Legacy)
|
||||
EditorGUILayout.HelpBox("Legacy importer is in use", MessageType.Warning);
|
||||
}
|
||||
|
||||
serializedObject.ApplyModifiedProperties();
|
||||
|
||||
ApplyRevertGUI();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Asset Importer Editor of NNModel (the serialized file generated by ONNXModelImporter)
|
||||
/// </summary>
|
||||
[CustomEditor(typeof(NNModel))]
|
||||
public class NNModelEditor : UnityEditor.Editor
|
||||
{
|
||||
// Use a static store for the foldouts, so it applies to all inspectors
|
||||
static Dictionary<string, bool> s_UIHelperFoldouts = new Dictionary<string, bool>();
|
||||
|
||||
private Model m_Model;
|
||||
private List<string> m_Inputs = new List<string>();
|
||||
private List<string> m_InputsDesc = new List<string>();
|
||||
private List<string> m_Outputs = new List<string>();
|
||||
private List<string> m_OutputsDesc = new List<string>();
|
||||
private List<string> m_Memories = new List<string>();
|
||||
private List<string> m_MemoriesDesc = new List<string>();
|
||||
private List<string> m_Layers = new List<string>();
|
||||
private List<string> m_LayersDesc = new List<string>();
|
||||
private List<string> m_Constants = new List<string>();
|
||||
private List<string> m_ConstantsDesc = new List<string>();
|
||||
|
||||
Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
|
||||
Vector2 m_MetadataScrollPosition = Vector2.zero;
|
||||
// warnings
|
||||
private Dictionary<string, string> m_WarningsNeutral = new Dictionary<string, string>();
|
||||
private Dictionary<string, string> m_WarningsInfo = new Dictionary<string, string>();
|
||||
private Dictionary<string, string> m_WarningsWarning = new Dictionary<string, string>();
|
||||
private Dictionary<string, string> m_WarningsError = new Dictionary<string, string>();
|
||||
private Vector2 m_WarningsNeutralScrollPosition = Vector2.zero;
|
||||
private Vector2 m_WarningsInfoScrollPosition = Vector2.zero;
|
||||
private Vector2 m_WarningsWarningScrollPosition = Vector2.zero;
|
||||
private Vector2 m_WarningsErrorScrollPosition = Vector2.zero;
|
||||
|
||||
|
||||
private long m_NumEmbeddedWeights;
|
||||
private long m_NumConstantWeights;
|
||||
private long m_TotalWeightsSizeInBytes;
|
||||
|
||||
private Vector2 m_InputsScrollPosition = Vector2.zero;
|
||||
private Vector2 m_OutputsScrollPosition = Vector2.zero;
|
||||
private Vector2 m_MemoriesScrollPosition = Vector2.zero;
|
||||
private Vector2 m_LayerScrollPosition = Vector2.zero;
|
||||
private Vector2 m_ConstantScrollPosition = Vector2.zero;
|
||||
private const float k_Space = 5f;
|
||||
|
||||
private Texture2D m_IconTexture;
|
||||
private Texture2D LoadIconTexture()
|
||||
{
|
||||
if (m_IconTexture != null)
|
||||
return m_IconTexture;
|
||||
|
||||
string[] allCandidates = AssetDatabase.FindAssets(ONNXModelImporter.iconName);
|
||||
if (allCandidates.Length > 0)
|
||||
m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
|
||||
|
||||
return m_IconTexture;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Editor static preview rendering callback
|
||||
/// </summary>
|
||||
/// <param name="assetPath">Asset path</param>
|
||||
/// <param name="subAssets">Child assets</param>
|
||||
/// <param name="width">width</param>
|
||||
/// <param name="height">height</param>
|
||||
/// <returns></returns>
|
||||
public override Texture2D RenderStaticPreview(string assetPath, UnityEngine.Object[] subAssets, int width, int height)
|
||||
{
|
||||
Texture2D icon = LoadIconTexture();
|
||||
if (icon == null)
|
||||
return null;
|
||||
Texture2D tex = new Texture2D(width, height);
|
||||
EditorUtility.CopySerialized(icon, tex);
|
||||
return tex;
|
||||
}
|
||||
|
||||
private void AddDimension(StringBuilder stringBuilder, string name, int value, bool lastDim=false)
|
||||
{
|
||||
string strValue = (value >= 1) ? value.ToString() : "*";
|
||||
stringBuilder.AppendFormat("{0}:{1}", name, strValue);
|
||||
if (!lastDim)
|
||||
stringBuilder.Append(", ");
|
||||
}
|
||||
|
||||
private string GetUIStringFromShape(int[] shape)
|
||||
{
|
||||
StringBuilder stringBuilder = new StringBuilder("shape: (", 50);
|
||||
if (shape.Length == 8)
|
||||
{
|
||||
bool is8D = (shape[0] > 1 || shape[1] > 1 || shape[3] > 1 || shape[4] > 1);
|
||||
if (is8D) AddDimension(stringBuilder, "s", shape[0]);
|
||||
if (is8D) AddDimension(stringBuilder, "r", shape[1]);
|
||||
AddDimension(stringBuilder, "n", shape[2]);
|
||||
if (is8D) AddDimension(stringBuilder, "t", shape[3]);
|
||||
if (is8D) AddDimension(stringBuilder, "d", shape[4]);
|
||||
AddDimension(stringBuilder, "h", shape[5]);
|
||||
AddDimension(stringBuilder, "w", shape[6]);
|
||||
AddDimension(stringBuilder, "c", shape[7], true);
|
||||
}
|
||||
else
|
||||
{
|
||||
UnityEngine.Debug.Assert(shape.Length == 4);
|
||||
AddDimension(stringBuilder, "n", shape[0]);
|
||||
AddDimension(stringBuilder, "h", shape[1]);
|
||||
AddDimension(stringBuilder, "w", shape[2]);
|
||||
AddDimension(stringBuilder, "c", shape[3], true);
|
||||
}
|
||||
stringBuilder.Append(")");
|
||||
return stringBuilder.ToString();
|
||||
}
|
||||
|
||||
void OnEnable()
|
||||
{
|
||||
var nnModel = target as NNModel;
|
||||
if (nnModel == null)
|
||||
return;
|
||||
if (nnModel.modelData == null)
|
||||
return;
|
||||
|
||||
m_Model = nnModel.GetDeserializedModel();
|
||||
if (m_Model == null)
|
||||
return;
|
||||
|
||||
m_Inputs = m_Model.inputs.Select(i => i.name).ToList();
|
||||
m_InputsDesc = m_Model.inputs.Select(i => GetUIStringFromShape(i.shape)).ToList();
|
||||
m_Outputs = m_Model.outputs.ToList();
|
||||
|
||||
bool allKnownInputShapes = true;
|
||||
var inputShapes = new Dictionary<string, TensorShape>();
|
||||
foreach (var i in m_Model.inputs)
|
||||
{
|
||||
allKnownInputShapes = allKnownInputShapes && ModelAnalyzer.IsInputShapeAcceptablyKnowForShapeInference(i);
|
||||
if (!allKnownInputShapes)
|
||||
break;
|
||||
inputShapes.Add(i.name, new TensorShape(i.shape));
|
||||
}
|
||||
if (allKnownInputShapes)
|
||||
{
|
||||
m_OutputsDesc = m_Model.outputs.Select(i => {
|
||||
string output = "shape: (n:*, h:*, w:*, c:*)";
|
||||
try
|
||||
{
|
||||
TensorShape shape;
|
||||
if (ModelAnalyzer.TryGetOutputTensorShape(m_Model, inputShapes, i, out shape))
|
||||
output = GetUIStringFromShape(shape.ToArray());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Debug.LogError($"Unexpected error while evaluating model output {i}. {e}");
|
||||
}
|
||||
return output; }).ToList();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_OutputsDesc = m_Model.outputs.Select(i => "shape: (n:*, h:*, w:*, c:*)").ToList();
|
||||
}
|
||||
|
||||
m_Memories = m_Model.memories.Select(i => i.input).ToList();
|
||||
m_MemoriesDesc = m_Model.memories.Select(i => $"shape:{i.shape.ToString()} output:{i.output}").ToList();
|
||||
|
||||
var layers = m_Model.layers.Where(i => i.type != Layer.Type.Load);
|
||||
var constants = m_Model.layers.Where(i => i.type == Layer.Type.Load);
|
||||
|
||||
m_Layers = layers.Select(i => i.type.ToString()).ToList();
|
||||
m_LayersDesc = layers.Select(i => i.ToString()).ToList();
|
||||
m_Constants = constants.Select(i => i.type.ToString()).ToList();
|
||||
m_ConstantsDesc = constants.Select(i => i.ToString()).ToList();
|
||||
|
||||
m_NumEmbeddedWeights = layers.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
|
||||
m_NumConstantWeights = constants.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
|
||||
|
||||
// weights are not loaded for UI, recompute size
|
||||
m_TotalWeightsSizeInBytes = 0;
|
||||
for (var l = 0; l < m_Model.layers.Count; ++l)
|
||||
for (var d = 0; d < m_Model.layers[l].datasets.Length; ++d)
|
||||
m_TotalWeightsSizeInBytes += m_Model.layers[l].datasets[d].length * m_Model.layers[l].datasets[d].itemSizeInBytes;
|
||||
|
||||
m_Metadata = new Dictionary<string, string>(m_Model.Metadata);
|
||||
|
||||
for (int i = 0; i < m_Model.Warnings.Count; i++)
|
||||
{
|
||||
var warning = m_Model.Warnings[i].LayerName;
|
||||
var warningDesc = m_Model.Warnings[i].Message;
|
||||
MessageType messageType = MessageType.Warning;
|
||||
if(warningDesc.StartsWith("MessageType"))
|
||||
{
|
||||
messageType = (MessageType)(warningDesc[12] - '0');
|
||||
warningDesc = warningDesc.Substring(13);
|
||||
}
|
||||
|
||||
switch (messageType)
|
||||
{
|
||||
case MessageType.None:
|
||||
m_WarningsNeutral[warning] = warningDesc;
|
||||
break;
|
||||
case MessageType.Info:
|
||||
m_WarningsInfo[warning] = warningDesc;
|
||||
break;
|
||||
case MessageType.Warning:
|
||||
m_WarningsWarning[warning] = warningDesc;
|
||||
break;
|
||||
case MessageType.Error:
|
||||
m_WarningsError[warning] = warningDesc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void OpenNNModelAsTempFileButton(NNModel nnModel)
|
||||
{
|
||||
if (nnModel == null)
|
||||
return;
|
||||
if (nnModel.modelData == null)
|
||||
return;
|
||||
|
||||
if (GUILayout.Button("Open imported NN model as temp file"))
|
||||
{
|
||||
string tempPath = Application.temporaryCachePath;
|
||||
string filePath = Path.Combine(tempPath, nnModel.name);
|
||||
string filePathWithExtension = Path.ChangeExtension(filePath, "nn");
|
||||
File.WriteAllBytes(filePathWithExtension, nnModel.modelData.Value);
|
||||
System.Diagnostics.Process.Start(filePathWithExtension);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Editor UI rendering callback
|
||||
/// </summary>
|
||||
public override void OnInspectorGUI()
|
||||
{
|
||||
if (m_Model == null)
|
||||
return;
|
||||
|
||||
// HACK: When inspector settings are applied and the file is re-imported there doesn't seem to be a clean way to
|
||||
// get a notification from Unity, so we detect this change
|
||||
var nnModel = target as NNModel;
|
||||
if (nnModel && m_Model != nnModel.GetDeserializedModel())
|
||||
OnEnable(); // Model data changed underneath while inspector was active, so reload
|
||||
|
||||
GUI.enabled = true;
|
||||
OpenNNModelAsTempFileButton(nnModel);
|
||||
GUILayout.Label($"Source: {m_Model.IrSource}");
|
||||
GUILayout.Label($"Version: {m_Model.IrVersion}");
|
||||
GUILayout.Label($"Producer Name: {m_Model.ProducerName}");
|
||||
|
||||
if (m_Metadata.Any())
|
||||
{
|
||||
ListUIHelper($"Metadata {m_Metadata.Count}",
|
||||
m_Metadata.Keys.ToList(), m_Metadata.Values.ToList(), ref m_MetadataScrollPosition);
|
||||
}
|
||||
|
||||
if(m_WarningsError.Any())
|
||||
{
|
||||
ListUIHelper($"Errors {m_WarningsError.Count.ToString()}", m_WarningsError.Keys.ToList(), m_WarningsError.Values.ToList(), ref m_WarningsErrorScrollPosition);
|
||||
EditorGUILayout.HelpBox("Model contains errors. Behavior might be incorrect", MessageType.Error, true);
|
||||
}
|
||||
if(m_WarningsWarning.Any())
|
||||
{
|
||||
ListUIHelper($"Warnings {m_WarningsWarning.Count.ToString()}", m_WarningsWarning.Keys.ToList(), m_WarningsWarning.Values.ToList(), ref m_WarningsWarningScrollPosition);
|
||||
EditorGUILayout.HelpBox("Model contains warnings. Behavior might be incorrect", MessageType.Warning, true);
|
||||
}
|
||||
if(m_WarningsInfo.Any())
|
||||
{
|
||||
ListUIHelper($"Information: ", m_WarningsInfo.Keys.ToList(), m_WarningsInfo.Values.ToList(), ref m_WarningsInfoScrollPosition);
|
||||
EditorGUILayout.HelpBox("Model contains import information.", MessageType.Info, true);
|
||||
}
|
||||
if(m_WarningsNeutral.Any())
|
||||
{
|
||||
ListUIHelper($"Comments: ", m_WarningsNeutral.Keys.ToList(), m_WarningsNeutral.Values.ToList(), ref m_WarningsNeutralScrollPosition);
|
||||
}
|
||||
var constantWeightInfo = m_Constants.Count > 0 ? $" using {m_NumConstantWeights:n0} weights" : "";
|
||||
ListUIHelper($"Inputs ({m_Inputs.Count})", m_Inputs, m_InputsDesc, ref m_InputsScrollPosition);
|
||||
ListUIHelper($"Outputs ({m_Outputs.Count})", m_Outputs, m_OutputsDesc, ref m_OutputsScrollPosition);
|
||||
ListUIHelper($"Memories ({m_Memories.Count})", m_Memories, m_MemoriesDesc, ref m_MemoriesScrollPosition);
|
||||
ListUIHelper($"Layers ({m_Layers.Count} using {m_NumEmbeddedWeights:n0} embedded weights)", m_Layers, m_LayersDesc, ref m_LayerScrollPosition, m_Constants.Count == 0 ? 1.5f: 1f);
|
||||
ListUIHelper($"Constants ({m_Constants.Count}{constantWeightInfo})", m_Constants, m_ConstantsDesc, ref m_ConstantScrollPosition);
|
||||
|
||||
GUILayout.Label($"Total weight size: {m_TotalWeightsSizeInBytes:n0} bytes");
|
||||
}
|
||||
|
||||
private static void ListUIHelper(string sectionTitle, IReadOnlyList<string> names, IReadOnlyList<string> descriptions, ref Vector2 scrollPosition, float maxHeightMultiplier = 1f)
|
||||
{
|
||||
int n = names.Count();
|
||||
UnityEngine.Debug.Assert(descriptions.Count == n);
|
||||
if (descriptions.Count < n)
|
||||
return;
|
||||
|
||||
GUILayout.Space(k_Space);
|
||||
if (!s_UIHelperFoldouts.TryGetValue(sectionTitle, out bool foldout))
|
||||
foldout = true;
|
||||
|
||||
foldout = EditorGUILayout.Foldout(foldout, sectionTitle, true, EditorStyles.foldoutHeader);
|
||||
s_UIHelperFoldouts[sectionTitle] = foldout;
|
||||
if (foldout)
|
||||
{
|
||||
// GUILayout.Label(sectionTitle, EditorStyles.boldLabel);
|
||||
float height = Mathf.Min(n * 20f + 2f, 150f * maxHeightMultiplier);
|
||||
if (n == 0)
|
||||
return;
|
||||
|
||||
scrollPosition = GUILayout.BeginScrollView(scrollPosition, GUI.skin.box, GUILayout.MinHeight(height));
|
||||
Event e = Event.current;
|
||||
float lineHeight = 16.0f;
|
||||
|
||||
StringBuilder fullText = new StringBuilder();
|
||||
fullText.Append(sectionTitle);
|
||||
fullText.AppendLine();
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
string name = names[i];
|
||||
string description = descriptions[i];
|
||||
fullText.Append($"{name} {description}");
|
||||
fullText.AppendLine();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
Rect r = EditorGUILayout.GetControlRect(false, lineHeight);
|
||||
|
||||
string name = names[i];
|
||||
string description = descriptions[i];
|
||||
|
||||
// Context menu, "Copy"
|
||||
if (e.type == EventType.ContextClick && r.Contains(e.mousePosition))
|
||||
{
|
||||
e.Use();
|
||||
var menu = new GenericMenu();
|
||||
|
||||
// need to copy current value to be used in delegate
|
||||
// (C# closures close over variables, not their values)
|
||||
menu.AddItem(new GUIContent($"Copy current line"), false, delegate
|
||||
{
|
||||
EditorGUIUtility.systemCopyBuffer = $"{name} {description}";
|
||||
});
|
||||
menu.AddItem(new GUIContent($"Copy section"), false, delegate
|
||||
{
|
||||
EditorGUIUtility.systemCopyBuffer = fullText.ToString();
|
||||
});
|
||||
menu.ShowAsContext();
|
||||
}
|
||||
|
||||
// Color even line for readability
|
||||
if (e.type == EventType.Repaint)
|
||||
{
|
||||
GUIStyle st = "CN EntryBackEven";
|
||||
if ((i & 1) == 0)
|
||||
st.Draw(r, false, false, false, false);
|
||||
}
|
||||
|
||||
// layer name on the right side
|
||||
Rect locRect = r;
|
||||
locRect.xMax = locRect.xMin;
|
||||
GUIContent gc = new GUIContent(name.ToString(CultureInfo.InvariantCulture));
|
||||
|
||||
// calculate size so we can left-align it
|
||||
Vector2 size = EditorStyles.miniBoldLabel.CalcSize(gc);
|
||||
locRect.xMax += size.x;
|
||||
GUI.Label(locRect, gc, EditorStyles.miniBoldLabel);
|
||||
locRect.xMax += 2;
|
||||
|
||||
// message
|
||||
Rect msgRect = r;
|
||||
msgRect.xMin = locRect.xMax;
|
||||
GUI.Label(msgRect, new GUIContent(description.ToString(CultureInfo.InvariantCulture)), EditorStyles.miniLabel);
|
||||
}
|
||||
|
||||
GUILayout.EndScrollView();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 08ecb3218a86c6741aed5b2a299b203b
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"name": "Unity.Barracuda.Editor",
|
||||
"references": [
|
||||
"Unity.Barracuda",
|
||||
"Unity.Barracuda.ONNX"
|
||||
],
|
||||
"optionalUnityReferences": [],
|
||||
"includePlatforms": [
|
||||
"Editor"
|
||||
],
|
||||
"excludePlatforms": [],
|
||||
"allowUnsafeCode": false,
|
||||
"overrideReferences": false,
|
||||
"precompiledReferences": [],
|
||||
"autoReferenced": true,
|
||||
"defineConstraints": []
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 9f1e7d835703842dda0e25142ed6c3c9
|
||||
AssemblyDefinitionImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,8 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: a03a1fa0e3b784e19a9e9d31b945b252
|
||||
folderAsset: yes
|
||||
DefaultImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,8 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5bec48e8f6ff349488387cf35fbae752
|
||||
folderAsset: yes
|
||||
DefaultImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,7 +0,0 @@
|
||||
using System.Reflection;
|
||||
|
||||
// DON'T EDIT
|
||||
// Will be replaced by Tools/Build/build.py
|
||||
[assembly: AssemblyVersion("3.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("3.0.0.0")]
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f7f9574517c146ada866c486dc392731
|
||||
timeCreated: 1533296387
|
||||
@@ -1,8 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 12a6bedd18899cd4189f66d8188f29ff
|
||||
folderAsset: yes
|
||||
DefaultImporter:
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 67f00a1befd4144eca5685250d893f09
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,194 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq; // ToList()
|
||||
using UnityEngine;
|
||||
using UnityEngine.Assertions;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
|
||||
internal class BarracudaBackendsFactory
|
||||
{
|
||||
public static WorkerFactory.Type ResolveAutoType(WorkerFactory.Type type)
|
||||
{
|
||||
if (type != WorkerFactory.Type.Auto)
|
||||
return type;
|
||||
return GetBestTypeForDevice(WorkerFactory.Device.Auto);
|
||||
}
|
||||
|
||||
internal static WorkerFactory.Type GetBestTypeForDevice(WorkerFactory.Device device)
|
||||
{
|
||||
switch (device)
|
||||
{
|
||||
case WorkerFactory.Device.Auto:
|
||||
case WorkerFactory.Device.GPU:
|
||||
return WorkerFactory.Type.ComputePrecompiled;
|
||||
default:
|
||||
return WorkerFactory.Type.CSharpBurst;
|
||||
}
|
||||
}
|
||||
|
||||
internal static WorkerFactory.Type ValidateType(WorkerFactory.Type type)
|
||||
{
|
||||
type = ResolveAutoType(type);
|
||||
Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
|
||||
|
||||
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !ComputeShaderSingleton.Instance.supported)
|
||||
{
|
||||
type = WorkerFactory.Type.PixelShader;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
private static IOps CreateOps(WorkerFactory.Type type, ITensorAllocator allocator, bool verbose)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case WorkerFactory.Type.ComputePrecompiled:
|
||||
return new PrecompiledComputeOps(allocator, verbose);
|
||||
|
||||
case WorkerFactory.Type.Compute:
|
||||
return new ComputeOps(allocator, verbose);
|
||||
|
||||
case WorkerFactory.Type.ComputeRef:
|
||||
return new ReferenceComputeOps(allocator);
|
||||
|
||||
case WorkerFactory.Type.PixelShader:
|
||||
return new PixelShaderOps(allocator);
|
||||
|
||||
case WorkerFactory.Type.CSharpBurst:
|
||||
return new BurstCPUOps(allocator);
|
||||
|
||||
case WorkerFactory.Type.CSharp:
|
||||
return new UnsafeArrayCPUOps(allocator);
|
||||
|
||||
default:
|
||||
return new ReferenceCPUOps(allocator);
|
||||
}
|
||||
}
|
||||
|
||||
internal static IWorker CreateWorker(WorkerFactory.Type type, Model model, string[] additionalOutputs, string[] trimOutputs, WorkerFactory.WorkerConfiguration workerConfiguration, IModelExecutionsReporter modelExecutionsReporter = null)
|
||||
{
|
||||
type = ResolveAutoType(type);
|
||||
var compareAgainstType = ResolveAutoType(workerConfiguration.compareAgainstType);
|
||||
Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
|
||||
Assert.AreNotEqual(compareAgainstType, WorkerFactory.Type.Auto);
|
||||
|
||||
bool compare = type != compareAgainstType;
|
||||
|
||||
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !SystemInfo.supportsComputeShaders && !Application.isEditor)
|
||||
{
|
||||
type = WorkerFactory.Type.PixelShader;
|
||||
}
|
||||
|
||||
IVars vars;
|
||||
// PixelShader worker uses Blit/Textures, cannot re-use vars unless the dispatch mechanism allows rendering to sub part of the texture
|
||||
if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
|
||||
vars = new GenericVarsWithReuse();
|
||||
else
|
||||
{
|
||||
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) || WorkerFactory.IsType(compareAgainstType, WorkerFactory.Device.GPU))
|
||||
vars = new ComputeVarsWithSharedModel();
|
||||
else
|
||||
vars = new DefaultVars();
|
||||
}
|
||||
|
||||
ITensorAllocator allocator = vars.GetAllocator();
|
||||
if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
|
||||
allocator = new TensorCachingByShapeAllocator();
|
||||
|
||||
if (workerConfiguration.verbose)
|
||||
D.Log($"Storage type: {vars.GetType()}. Allocator type: {allocator.GetType()}.");
|
||||
|
||||
IOps ops = CreateOps(type, allocator, workerConfiguration.verbose);
|
||||
|
||||
if (compare)
|
||||
ops = new CompareOps(ops,
|
||||
CreateOps(compareAgainstType, allocator, workerConfiguration.verbose), workerConfiguration.compareLogLevel, workerConfiguration.compareEpsilon);
|
||||
|
||||
if (workerConfiguration.verbose || modelExecutionsReporter != null)
|
||||
ops = new VerboseOps(ops, workerConfiguration.verbose);
|
||||
|
||||
if (Application.isEditor || modelExecutionsReporter != null)
|
||||
ops = new StatsOps(ops);
|
||||
|
||||
model = ValidateModel(
|
||||
PatchModel(model, additionalOutputs, trimOutputs));
|
||||
|
||||
ops.SetModelExecutionsReporter(modelExecutionsReporter);
|
||||
return new GenericWorker(model, ops, vars, workerConfiguration.verbose, workerConfiguration.takeoverWeights);
|
||||
}
|
||||
|
||||
internal static Model PatchModel(Model model, string[] additionalOutputs, string[] trimOutputs = null)
|
||||
{
|
||||
bool trimModel = trimOutputs != null;
|
||||
|
||||
if (trimOutputs != null)
|
||||
{
|
||||
foreach (var o in trimOutputs.Except(model.outputs))
|
||||
if (additionalOutputs == null || !additionalOutputs.Contains(o))
|
||||
D.LogWarning($"Output specified in trimOutputs was not found in the model: {o}");
|
||||
|
||||
var newModel = model.ShallowCopy();
|
||||
newModel.outputs = trimOutputs.Intersect(model.outputs).ToList();
|
||||
model = newModel;
|
||||
}
|
||||
|
||||
if (additionalOutputs != null)
|
||||
{
|
||||
foreach (var o in additionalOutputs.Except(model.layers.Select(l => l.name)))
|
||||
D.LogWarning($"Layer specified in additionalOutputs was not found in the model: {o}");
|
||||
|
||||
// 'new' means that output name does not yet exist in model.outputs
|
||||
// 'valid' means that output name matches one of the existing model.layer names
|
||||
var newAndValidAdditionalOutputs =
|
||||
additionalOutputs.Except(model.outputs).Intersect(model.layers.Select(l => l.name));
|
||||
|
||||
var newModel = model.ShallowCopy();
|
||||
newModel.outputs.AddRange(newAndValidAdditionalOutputs);
|
||||
model = newModel;
|
||||
}
|
||||
|
||||
if (trimModel)
|
||||
{
|
||||
var newModel = model.ShallowCopy();
|
||||
var upstream = ModelAnalyzer.FindUpstreamLayers(model, newModel.outputs.ToArray());
|
||||
foreach (var l in model.layers)
|
||||
if (!upstream.Contains(l))
|
||||
newModel.layers.Remove(l);
|
||||
|
||||
model = newModel;
|
||||
}
|
||||
|
||||
model = ModelOptimizer.RemoveNoop(model);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
internal static Model ValidateModel(Model model)
|
||||
{
|
||||
// validate, model contains no broken links
|
||||
var brokenLinks = ModelAnalyzer.FindBrokenLinks(model);
|
||||
if (brokenLinks.Length > 0)
|
||||
D.LogWarning($"Model contains {brokenLinks.Length} broken links: {string.Join(",", brokenLinks)}");
|
||||
|
||||
// validate, all model outputs are unique
|
||||
// https://stackoverflow.com/questions/18547354/c-sharp-linq-find-duplicates-in-list
|
||||
var duplicateOutputs = model.outputs.GroupBy(x => x)
|
||||
.Where(g => g.Count() > 1)
|
||||
.Select(y => y.Key);
|
||||
foreach (var o in duplicateOutputs)
|
||||
D.LogWarning($"Output is specified more than once in the model: {o}");
|
||||
|
||||
// validate, model contains no unconnected layers
|
||||
var unconnectedOutputs = ModelAnalyzer.FindUnconnectedOutputs(model);
|
||||
foreach (var o in unconnectedOutputs)
|
||||
D.LogWarning($"Layer is specified as output, but is missing in the model: {o}");
|
||||
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 355dc370391814b1c874848bb843b91c
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,245 +0,0 @@
|
||||
using System.Threading;
|
||||
using UnityEngine;
|
||||
using Unity.Jobs;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
// BarracudaBurstCPU.Core.cs -- definition of class BurstCPUOps, Pin(), BurstTensorData
|
||||
// BarracudaBurstCPU.Ops.cs -- impl. IOps, job schedulers
|
||||
// BarracudaBurstCPU.Jobs.cs -- impl. jobs
|
||||
|
||||
/// <summary>
|
||||
/// Burst specific internal `Tensor` data storage
|
||||
/// </summary>
|
||||
public class BurstTensorData : UnsafeArrayTensorData, IDependableTensorData
|
||||
{
|
||||
private JobHandle m_ReadFence;
|
||||
private JobHandle m_WriteFence;
|
||||
private bool m_SafeToDispose = true;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public JobHandle fence { get { return m_ReadFence; } set { m_ReadFence = value; m_WriteFence = value; m_SafeToDispose = false; } }
|
||||
|
||||
/// <inheritdoc/>
|
||||
public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = BurstCPUOps.Dependencies(value, m_WriteFence); m_SafeToDispose = false; } }
|
||||
|
||||
/// <inheritdoc/>
|
||||
public unsafe void* rawPtr => array.RawAddressAt(offset);
|
||||
|
||||
/// <summary>
|
||||
/// Creates new array
|
||||
/// </summary>
|
||||
/// <param name="count">count</param>
|
||||
public BurstTensorData(int count, DataType dataType) : base(count, dataType)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates new array
|
||||
/// </summary>
|
||||
/// <param name="shape">shape</param>
|
||||
public BurstTensorData(TensorShape shape, DataType dataType) : base(shape, dataType)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses shared array
|
||||
/// </summary>
|
||||
/// <param name="sharedArray">shared array</param>
|
||||
public BurstTensorData(ArrayTensorData sharedArray) : base(sharedArray)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses shared array
|
||||
/// </summary>
|
||||
/// <param name="sharedArray">shared array</param>
|
||||
public BurstTensorData(SharedArrayTensorData sharedArray) : base(sharedArray)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Uses unsafe array
|
||||
/// </summary>
|
||||
/// <param name="unsafeArray">unsafe array</param>
|
||||
public BurstTensorData(UnsafeArrayTensorData unsafeArray) : base(unsafeArray.array, unsafeArray.offset, unsafeArray.count, unsafeArray.m_Readonly)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Finalizer
|
||||
/// </summary>
|
||||
~BurstTensorData()
|
||||
{
|
||||
if (!m_SafeToDispose)
|
||||
D.LogWarning($"Found unreferenced, but undisposed Tensor data that potentially participates in an unfinished job and might lead to hazardous memory overwrites: {ToString()}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Dispose contents
|
||||
/// </summary>
|
||||
public override void Dispose()
|
||||
{
|
||||
// It isn't safe to Complete jobs from a finalizer thread, so
|
||||
if (Thread.CurrentThread == BurstCPUOps.MainThread)
|
||||
CompleteAllPendingOperations();
|
||||
|
||||
base.Dispose();
|
||||
}
|
||||
|
||||
internal void CompleteAllPendingOperations()
|
||||
{
|
||||
fence.Complete();
|
||||
reuse.Complete();
|
||||
m_SafeToDispose = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reserve (allocate) storage for `count` elements
|
||||
/// </summary>
|
||||
/// <param name="count">count</param>
|
||||
public override void Reserve(int count)
|
||||
{
|
||||
if (count > maxCapacity)
|
||||
{
|
||||
// going to reallocate memory in base.Reserve()
|
||||
// thus need to finish current work
|
||||
CompleteAllPendingOperations();
|
||||
}
|
||||
|
||||
base.Reserve(count);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Upload data to internal storage
|
||||
/// </summary>
|
||||
/// <param name="data">data</param>
|
||||
/// <param name="shape">shape</param>
|
||||
/// <param name="managedBufferStartIndex">`data` start index</param>
|
||||
public override void Upload(float[] data, TensorShape shape, int managedBufferStartIndex = 0)
|
||||
{
|
||||
CompleteAllPendingOperations();
|
||||
base.Upload(data, shape, managedBufferStartIndex);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Return data from internal storage
|
||||
/// </summary>
|
||||
/// <param name="shape">shape</param>
|
||||
/// <returns>managed array</returns>
|
||||
public override float[] Download(TensorShape shape)
|
||||
{
|
||||
// Download() as optimization gives direct access to the internal buffer
|
||||
// thus need to prepare internal buffer for potential writes
|
||||
CompleteAllPendingOperations();
|
||||
return base.Download(shape);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Return shared array from internal storage
|
||||
/// </summary>
|
||||
/// <returns>shared array from internal storage</returns>
|
||||
public override BarracudaArray SharedAccess(out int offset)
|
||||
{
|
||||
// SharedAccess() by design gives direct access to the interna
|
||||
// thus need to prepare internal buffer for potential writes
|
||||
CompleteAllPendingOperations();
|
||||
return base.SharedAccess(out offset);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Schedule async internal data download
|
||||
/// </summary>
|
||||
/// <param name="count">count to download</param>
|
||||
/// <returns>`true` if download is completed</returns>
|
||||
public override bool ScheduleAsyncDownload(int count)
|
||||
{
|
||||
return fence.IsCompleted;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Object summary as string
|
||||
/// </summary>
|
||||
/// <returns>object summary</returns>
|
||||
public override string ToString()
|
||||
{
|
||||
string readyToRead = m_SafeToDispose ? "true": "unknown";
|
||||
string readyForReuse = m_SafeToDispose ? "true": "unknown";
|
||||
try
|
||||
{
|
||||
readyToRead = fence.IsCompleted.ToString();
|
||||
readyForReuse = reuse.IsCompleted.ToString();
|
||||
}
|
||||
catch (UnityException) {}
|
||||
return string.Format("(CPU burst: {0} length: {1} offset: {2} uploaded: {3} ready-to-read: {4} ready-for-reuse: {5})",
|
||||
GetHashCode(), m_Array?.Length, m_Offset, m_Count, readyToRead, readyForReuse);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Burst specific implementation of `IOps`
|
||||
/// </summary>
|
||||
public partial class BurstCPUOps : UnsafeArrayCPUOps
|
||||
{
|
||||
/// <summary>
|
||||
/// Create `BurstCPUOps`
|
||||
/// </summary>
|
||||
/// <param name="allocator">allocator</param>
|
||||
public BurstCPUOps(ITensorAllocator allocator = null)
|
||||
: base(allocator)
|
||||
{
|
||||
if (PreferBLAS == BLAS.Native && !blas.IsNative())
|
||||
PreferBLAS = BLAS.Disabled;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pin `Tensor` to Burst backend device, if `uploadCache` is false, data is not uploaded to device
|
||||
/// </summary>
|
||||
/// <param name="X">`Tensor`</param>
|
||||
/// <param name="uploadCache">`bool`</param>
|
||||
/// <returns>`BurstTensorData`</returns>
|
||||
new public static BurstTensorData Pin(Tensor X, bool uploadCache = true)
|
||||
{
|
||||
X.FlushCache(uploadCache);
|
||||
|
||||
var onDevice = X.tensorOnDevice as BurstTensorData;
|
||||
if (onDevice == null)
|
||||
{
|
||||
// try to adopt CPU arrays
|
||||
var asUnsafeArray = X.tensorOnDevice as UnsafeArrayTensorData;
|
||||
var asSharedArray = X.tensorOnDevice as SharedArrayTensorData;
|
||||
var asArray = X.tensorOnDevice as ArrayTensorData;
|
||||
if (asUnsafeArray != null) X.AttachToDevice(new BurstTensorData(asUnsafeArray));
|
||||
else if (asSharedArray != null) X.AttachToDevice(new BurstTensorData(asSharedArray));
|
||||
else if (asArray != null) X.AttachToDevice(new BurstTensorData(asArray));
|
||||
else
|
||||
{
|
||||
if (uploadCache)
|
||||
X.UploadToDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array and upload
|
||||
else
|
||||
X.AllocateOnDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array but do not upload
|
||||
}
|
||||
}
|
||||
|
||||
return X.tensorOnDevice as BurstTensorData;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Prepare `Tensor` for use with Burst backend
|
||||
/// </summary>
|
||||
/// <param name="X">`Tensor`</param>
|
||||
/// <returns>`Tensor`</returns>
|
||||
public override Tensor Prepare(Tensor X)
|
||||
{
|
||||
Pin(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
public override Tensor PrepareNoAlloc(Tensor X)
|
||||
{
|
||||
Pin(X, uploadCache: false);
|
||||
return X;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f44c1c453c1754aaeb1e8608df82452b
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,471 +0,0 @@
|
||||
using UnityEngine;
|
||||
using UnityEngine.Assertions;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Unity.Collections;
|
||||
using Unity.Collections.LowLevel.Unsafe;
|
||||
using Unity.Jobs;
|
||||
using Unity.Mathematics;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
//#region Job output context helper
|
||||
|
||||
internal static class BurstSchedulingHelper
|
||||
{
|
||||
#region Private scheduling helpers with pointer aliasing verification
|
||||
|
||||
private static unsafe JobHandle ScheduleXSBOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrX,
|
||||
void* ptrS,
|
||||
void* ptrB,
|
||||
void* ptrO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
|
||||
{
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
|
||||
jobDataInternalCopy.S = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrS};
|
||||
jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
private static unsafe JobHandle ScheduleXBOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrX,
|
||||
void* ptrB,
|
||||
void* ptrO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
|
||||
{
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
|
||||
jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrX,
|
||||
void* ptrO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrX,
|
||||
void* ptrO)
|
||||
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
Assert.IsTrue(ptrO != ptrX);
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrO)
|
||||
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
|
||||
{
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
|
||||
JobHandle fenceBeforeJobStart,
|
||||
void* ptrO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
|
||||
{
|
||||
T jobDataInternalCopy = jobData;
|
||||
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
|
||||
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private fencing helper for readability
|
||||
private static JobHandle GetFenceBeforeJobStartXSBO(
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinS,
|
||||
IDependableMemoryResource pinB,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
return BurstCPUOps.Dependencies(pinX.fence, pinS.fence, pinB.fence, pinO.reuse);
|
||||
}
|
||||
|
||||
private static JobHandle GetFenceBeforeJobStartXBO(
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinB,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
return BurstCPUOps.Dependencies(pinX.fence, pinB.fence, pinO.reuse);
|
||||
}
|
||||
|
||||
private static JobHandle GetFenceBeforeJobStartXO(
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
return BurstCPUOps.Dependencies(pinX.fence, pinO.reuse);
|
||||
}
|
||||
|
||||
private static void SetXSBOFences(this JobHandle jobFence,
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinS,
|
||||
IDependableMemoryResource pinB,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
pinX.reuse = jobFence;
|
||||
pinS.reuse = jobFence;
|
||||
pinB.reuse = jobFence;
|
||||
pinO.fence = jobFence;
|
||||
}
|
||||
|
||||
private static void SetXBOFences(this JobHandle jobFence,
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinB,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
pinX.reuse = jobFence;
|
||||
pinB.reuse = jobFence;
|
||||
pinO.fence = jobFence;
|
||||
}
|
||||
|
||||
private static void SetXOFences(this JobHandle jobFence,
|
||||
IDependableMemoryResource pinX,
|
||||
IDependableMemoryResource pinO)
|
||||
{
|
||||
pinX.reuse = jobFence;
|
||||
pinO.fence = jobFence;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Immediate scheduling helper
|
||||
internal enum FencingHelperMode
|
||||
{
|
||||
UpdateResourcesFencesOnScheduling,
|
||||
CustomResourcesFencesHandling,
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleXSBO<T>(this T jobData,
|
||||
IDependableMemoryResource rX,
|
||||
IDependableMemoryResource rS,
|
||||
IDependableMemoryResource rB,
|
||||
IDependableMemoryResource rO,
|
||||
int arrayLength, int innerloopBatchCount,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
|
||||
{
|
||||
var fenceBeforeJobStart = GetFenceBeforeJobStartXSBO(rX, rS, rB, rO);
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
jobFence = ScheduleXSBOInternal(jobData, fenceBeforeJobStart, rX.rawPtr, rS.rawPtr, rB.rawPtr, rO.rawPtr, arrayLength, innerloopBatchCount);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
jobFence.SetXSBOFences(rX, rS, rB, rO);
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleXBO<T>(this T jobData,
|
||||
IDependableMemoryResource X,
|
||||
IDependableMemoryResource B,
|
||||
IDependableMemoryResource O,
|
||||
int arrayLength, int innerloopBatchCount,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
|
||||
{
|
||||
var fenceBeforeJobStart = GetFenceBeforeJobStartXBO(X, B, O);
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
jobFence = ScheduleXBOInternal(jobData, fenceBeforeJobStart, X.rawPtr, B.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
jobFence.SetXBOFences(X, B, O);
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleO<T>(this T jobData,
|
||||
IDependableMemoryResource O,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
|
||||
{
|
||||
var fenceBeforeJobStart = O.reuse;
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, O.rawPtr);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
O.fence = jobFence;
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
|
||||
IDependableMemoryResource X,
|
||||
IDependableMemoryResource O,
|
||||
int arrayLength, int innerloopBatchCount,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
jobFence.SetXOFences(X, O);
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleO<T>(this T jobData,
|
||||
BurstTensorData pinO,
|
||||
int offsetO,
|
||||
int arrayLength, int innerloopBatchCount,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
|
||||
{
|
||||
var fenceBeforeJobStart = pinO.reuse;
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
|
||||
jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, ptrO, arrayLength, innerloopBatchCount);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
pinO.fence = jobFence;
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
|
||||
BurstTensorData pinX,
|
||||
int offsetX,
|
||||
BurstTensorData pinO,
|
||||
int offsetO,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(pinX, pinO);
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
void* ptrX = pinX.array.RawAddressAt(pinX.offset+offsetX);
|
||||
void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
|
||||
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, ptrX, ptrO);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
jobFence.SetXOFences(pinX, pinO);
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
|
||||
IDependableMemoryResource X,
|
||||
IDependableMemoryResource O,
|
||||
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
|
||||
|
||||
JobHandle jobFence;
|
||||
{
|
||||
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr);
|
||||
}
|
||||
|
||||
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
jobFence.SetXOFences(X, O);
|
||||
}
|
||||
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
#region Schedulling helper for parrallel jobs
|
||||
|
||||
internal struct ParallelJobsContext : IDisposable
|
||||
{
|
||||
internal static Dictionary<IDependableMemoryResource, JobHandle> s_ReadDependencyTracker =
|
||||
new Dictionary<IDependableMemoryResource, JobHandle>(100);
|
||||
|
||||
private readonly IDependableMemoryResource outputResource;
|
||||
private JobHandle combinedJobFence;
|
||||
|
||||
public ParallelJobsContext(IDependableMemoryResource output)
|
||||
{
|
||||
outputResource = output;
|
||||
combinedJobFence = new JobHandle();
|
||||
Assert.AreEqual(0, s_ReadDependencyTracker.Count,
|
||||
"s_ReadDependencyTracker should be empty meaning ParrallelJobs was not disposed properly.");
|
||||
}
|
||||
|
||||
//For now only CopyStrideJobHelper and tests need ParallelJobsContext. If this code need to be duplicated for more case in the future:
|
||||
//- Maybe add generic version by having CopyStrideJobHelper and other helper struct implement an interface (but beware of GC).
|
||||
//- Or make ParallelJobsContext partial and code generated by jobs template.
|
||||
public JobHandle ScheduleXO(
|
||||
BurstCPUOps.CopyStrideJobHelper jobData,//See comment above.
|
||||
BurstTensorData pinX, int offsetX,
|
||||
BurstTensorData pinO, int offsetO)
|
||||
{
|
||||
Assert.IsTrue(pinO == outputResource);
|
||||
var jobFence = jobData.ScheduleXO(pinX, offsetX, pinO, offsetO, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
|
||||
TrackJobReadDependencies(pinX, jobFence);
|
||||
AddJobDependencyToOutputFence(jobFence);
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
public JobHandle ScheduleXO<T>(
|
||||
T jobData,
|
||||
BurstTensorData pinX,
|
||||
BurstTensorData pinO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
|
||||
{
|
||||
Assert.IsTrue(pinO == outputResource);
|
||||
var jobFence = jobData.ScheduleXO(pinX, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
|
||||
TrackJobReadDependencies(pinX, jobFence);
|
||||
AddJobDependencyToOutputFence(jobFence);
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
|
||||
public JobHandle ScheduleXBO<T>(
|
||||
T jobData,
|
||||
BurstTensorData pinX,
|
||||
BurstTensorData pinB,
|
||||
BurstTensorData pinO,
|
||||
int arrayLength, int innerloopBatchCount)
|
||||
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
|
||||
{
|
||||
Assert.IsTrue(pinO == outputResource);
|
||||
var jobFence = jobData.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
|
||||
TrackJobReadDependencies(pinX, jobFence);
|
||||
TrackJobReadDependencies(pinB, jobFence);
|
||||
AddJobDependencyToOutputFence(jobFence);
|
||||
return jobFence;
|
||||
}
|
||||
|
||||
internal void AddJobDependencyToOutputFence(JobHandle jobFence)
|
||||
{
|
||||
//Once all jobs writing to O will be done, further jobs will be able to read from O.
|
||||
//We combine job fences from all job writing to O here and assign to O.fence in Dispose().
|
||||
combinedJobFence = JobHandle.CombineDependencies(combinedJobFence, jobFence);
|
||||
}
|
||||
|
||||
internal void TrackJobReadDependencies(IDependableMemoryResource T, JobHandle jobFence)
|
||||
{
|
||||
//Once all jobs reading from T will be done, further jobs will be able to write to T.
|
||||
//We combine job fences from all jobs reading from T here and assign to T.reuse in Dispose().
|
||||
if (T != null)
|
||||
{
|
||||
if (s_ReadDependencyTracker.ContainsKey(T))
|
||||
s_ReadDependencyTracker[T] = JobHandle.CombineDependencies(s_ReadDependencyTracker[T], jobFence);
|
||||
else
|
||||
s_ReadDependencyTracker[T] = jobFence;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
foreach (var key in s_ReadDependencyTracker.Keys)
|
||||
{
|
||||
key.reuse = s_ReadDependencyTracker[key];
|
||||
}
|
||||
outputResource.fence = combinedJobFence;
|
||||
s_ReadDependencyTracker.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Memory allocation wrapper usable by job fencing helpers
|
||||
|
||||
internal unsafe class FencedMemoryAlloc : IDependableMemoryResource
|
||||
{
|
||||
private JobHandle m_ReadFence;
|
||||
private JobHandle m_WriteFence;
|
||||
private void* data;
|
||||
public void* rawPtr => data;
|
||||
public half* halfdata { get { Assert.AreEqual(DataType.Half, type); return (half*) data; } }
|
||||
public float* floatdata { get { Assert.AreEqual(DataType.Float, type);return (float*) data; } }
|
||||
public DataType type;
|
||||
public int elementCount;
|
||||
public int elementSize;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public JobHandle fence { get { return m_ReadFence; } set { m_ReadFence = value; m_WriteFence = value; } }
|
||||
|
||||
/// <inheritdoc/>
|
||||
public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = value; } }
|
||||
|
||||
public void Allocate(int numElement, DataType dataType, int alignment, Allocator allocator)
|
||||
{
|
||||
m_ReadFence = new JobHandle();
|
||||
m_WriteFence = new JobHandle();
|
||||
elementCount = numElement;
|
||||
elementSize = BarracudaArray.DataItemSize(dataType);
|
||||
type = dataType;
|
||||
Assert.IsTrue(data == null, "Please call ClearState() when freeing underlying memory.");
|
||||
Assert.IsTrue(alignment % elementSize == 0);
|
||||
data = UnsafeUtility.Malloc(elementCount * elementSize, alignment, allocator);
|
||||
Assert.IsTrue(data != null);
|
||||
}
|
||||
|
||||
public void ClearState()
|
||||
{
|
||||
m_ReadFence = new JobHandle();
|
||||
m_WriteFence = new JobHandle();
|
||||
elementCount = 0;
|
||||
elementSize = 0;
|
||||
type = DataType.Float;
|
||||
data = null;
|
||||
}
|
||||
|
||||
public FencedMemoryAlloc()
|
||||
{
|
||||
ClearState();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
} // namespace Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5071bbeadb81d034f827f20e95c52ee6
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5211ff135b3b87f42be25a8505a28df7
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: d05274a6ecc82404abe715a573ea8e74
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,864 +0,0 @@
|
||||
// This is auto-generated -- do not modify directly
|
||||
using UnityEngine;
|
||||
using System;
|
||||
using Unity.Burst;
|
||||
using Unity.Burst.Intrinsics;
|
||||
using Unity.Collections;
|
||||
using Unity.Jobs;
|
||||
using Unity.Mathematics;
|
||||
using static Unity.Burst.Intrinsics.X86.Avx;
|
||||
using static Unity.Burst.Intrinsics.X86.Fma;
|
||||
using Unity.Collections.LowLevel.Unsafe;
|
||||
using Unity.Jobs.LowLevel.Unsafe;
|
||||
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
public partial class BurstCPUOps
|
||||
{
|
||||
#region Dense/Conv jobs declaration for mode: _Full_Float
|
||||
|
||||
internal partial struct DepthwiseConv2DJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinS = Pin(S);
|
||||
var pinB = Pin(B);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool WHalf = pinS.array.Type == DataType.Half;
|
||||
bool BHalf = pinB.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
|
||||
if (AHalf && WHalf)
|
||||
{
|
||||
var job = new DepthwiseConv2DJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && WHalf)
|
||||
{
|
||||
var job = new DepthwiseConv2DJob_ActAsFloat_WeightAsHalf();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && !WHalf)
|
||||
{
|
||||
var job = new DepthwiseConv2DJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else //if (AHalf && !WHalf)
|
||||
{
|
||||
UnityEngine.Assertions.Assert.IsTrue(false, "DepthwiseConv2DJob does not support activation as half while weights are floats.");
|
||||
return new JobHandle();
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct DepthwiseConv2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public DepthwiseConv2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.kernelCount * sizeof(float);
|
||||
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
// reset accumulators to 0
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
// gather X * K results in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
float* dst = outputAccumulators;
|
||||
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
float* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
|
||||
|
||||
int k = 0;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
|
||||
*dst += (float)((*src) * (*kernel));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
|
||||
*dst += (float)((*src) * (*kernel));
|
||||
}
|
||||
}
|
||||
|
||||
{ // write accumulators to memory and add bias
|
||||
int k = 0;
|
||||
float* src = outputAccumulators;
|
||||
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
float* bias = Bptr;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
|
||||
*dst = (float)((*src) + (*bias));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
|
||||
*dst = (float)((*src) + (*bias));
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct Dense3JobHelper
|
||||
{
|
||||
public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinS = Pin(S);
|
||||
var pinB = Pin(B);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool WHalf = pinS.array.Type == DataType.Half;
|
||||
bool BHalf = pinB.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
|
||||
if (AHalf && WHalf)
|
||||
{
|
||||
var job = new Dense3Job_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && WHalf)
|
||||
{
|
||||
var job = new Dense3Job_ActAsFloat_WeightAsHalf();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && !WHalf)
|
||||
{
|
||||
var job = new Dense3Job_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else //if (AHalf && !WHalf)
|
||||
{
|
||||
UnityEngine.Assertions.Assert.IsTrue(false, "Dense3Job does not support activation as half while weights are floats.");
|
||||
return new JobHandle();
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct Dense3Job_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public Dense3JobHelper data;
|
||||
|
||||
public const int blockSize = 16;
|
||||
public void Execute(int threadID)
|
||||
{
|
||||
float* A = this.Xptr;
|
||||
float* B = this.Sptr;
|
||||
float* C = this.Bptr;
|
||||
float* S = this.Optr;
|
||||
int AM = data.AM;
|
||||
int BM = data.BM;
|
||||
int SM = data.SM;
|
||||
int AN = data.AN;
|
||||
int BN = data.BN;
|
||||
int SN = data.SN;
|
||||
|
||||
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
|
||||
|
||||
int batch = (threadID / dispatchThreadXY);
|
||||
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
|
||||
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
|
||||
|
||||
int batchOffSetA = (batch * AM * AN);
|
||||
int batchOffSetS = (batch * SM * SN);
|
||||
|
||||
int rowA = i * blockSize;
|
||||
int colB = j * blockSize;
|
||||
|
||||
unsafe
|
||||
{
|
||||
float* blockTempA = null;
|
||||
float* blockTempB = null;
|
||||
float* blockTempS = null;
|
||||
|
||||
float* blockS = S + rowA + SM * colB + batchOffSetS;
|
||||
int strideS = SM;
|
||||
|
||||
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
|
||||
{
|
||||
blockTempS = AllocBlock(blockSize, blockSize);
|
||||
strideS = blockSize;
|
||||
blockS = blockTempS;
|
||||
}
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
|
||||
|
||||
for (int l = 0; l < AN; l += blockSize) // inner-loop
|
||||
{
|
||||
float* blockA = A + rowA + AM * l + batchOffSetA;
|
||||
float* blockB = B + l * BN + colB;
|
||||
int strideA = AM;
|
||||
int strideB = BN;
|
||||
|
||||
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
|
||||
{
|
||||
if (blockTempA == null)
|
||||
blockTempA = AllocBlock(blockSize, blockSize);
|
||||
strideA = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
|
||||
|
||||
blockA = blockTempA;
|
||||
}
|
||||
|
||||
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
|
||||
{
|
||||
if (blockTempB == null)
|
||||
blockTempB = AllocBlock(blockSize, blockSize);
|
||||
strideB = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempB[x + blockSize * y] = (float)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
|
||||
|
||||
blockB = blockTempB;
|
||||
}
|
||||
|
||||
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
|
||||
}
|
||||
|
||||
if (blockS == blockTempS) // copy back
|
||||
{
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
{
|
||||
if (((rowA + x) < SM) && ((colB + y) < SN))
|
||||
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
|
||||
}
|
||||
}
|
||||
|
||||
FreeBlock(blockTempA);
|
||||
FreeBlock(blockTempB);
|
||||
FreeBlock(blockTempS);
|
||||
}
|
||||
}
|
||||
|
||||
static void MultiplyBlockUnrollHx16(float* Ap, int Astride, float* Bp, int Bstride, float* Sp, int Sstride)
|
||||
{
|
||||
for (int i = 0; i < blockSize; i++)
|
||||
{
|
||||
float sum0 = *(Sp + i + Sstride * 0);
|
||||
float sum1 = *(Sp + i + Sstride * 1);
|
||||
float sum2 = *(Sp + i + Sstride * 2);
|
||||
float sum3 = *(Sp + i + Sstride * 3);
|
||||
float sum4 = *(Sp + i + Sstride * 4);
|
||||
float sum5 = *(Sp + i + Sstride * 5);
|
||||
float sum6 = *(Sp + i + Sstride * 6);
|
||||
float sum7 = *(Sp + i + Sstride * 7);
|
||||
float sum8 = *(Sp + i + Sstride * 8);
|
||||
float sum9 = *(Sp + i + Sstride * 9);
|
||||
float sumA = *(Sp + i + Sstride * 10);
|
||||
float sumB = *(Sp + i + Sstride * 11);
|
||||
float sumC = *(Sp + i + Sstride * 12);
|
||||
float sumD = *(Sp + i + Sstride * 13);
|
||||
float sumE = *(Sp + i + Sstride * 14);
|
||||
float sumF = *(Sp + i + Sstride * 15);
|
||||
|
||||
for (int l = 0; l < blockSize; l++)
|
||||
{
|
||||
float A = *(Ap + i + Astride * l);
|
||||
|
||||
float B0 = *(Bp + l * Bstride + 0);
|
||||
float B1 = *(Bp + l * Bstride + 1);
|
||||
float B2 = *(Bp + l * Bstride + 2);
|
||||
float B3 = *(Bp + l * Bstride + 3);
|
||||
float B4 = *(Bp + l * Bstride + 4);
|
||||
float B5 = *(Bp + l * Bstride + 5);
|
||||
float B6 = *(Bp + l * Bstride + 6);
|
||||
float B7 = *(Bp + l * Bstride + 7);
|
||||
float B8 = *(Bp + l * Bstride + 8);
|
||||
float B9 = *(Bp + l * Bstride + 9);
|
||||
float BA = *(Bp + l * Bstride + 10);
|
||||
float BB = *(Bp + l * Bstride + 11);
|
||||
float BC = *(Bp + l * Bstride + 12);
|
||||
float BD = *(Bp + l * Bstride + 13);
|
||||
float BE = *(Bp + l * Bstride + 14);
|
||||
float BF = *(Bp + l * Bstride + 15);
|
||||
|
||||
|
||||
sum0 += A * B0;
|
||||
sum1 += A * B1;
|
||||
sum2 += A * B2;
|
||||
sum3 += A * B3;
|
||||
sum4 += A * B4;
|
||||
sum5 += A * B5;
|
||||
sum6 += A * B6;
|
||||
sum7 += A * B7;
|
||||
sum8 += A * B8;
|
||||
sum9 += A * B9;
|
||||
sumA += A * BA;
|
||||
sumB += A * BB;
|
||||
sumC += A * BC;
|
||||
sumD += A * BD;
|
||||
sumE += A * BE;
|
||||
sumF += A * BF;
|
||||
}
|
||||
|
||||
*(Sp + i + Sstride * 0 ) = (float)(sum0);
|
||||
*(Sp + i + Sstride * 1 ) = (float)(sum1);
|
||||
*(Sp + i + Sstride * 2 ) = (float)(sum2);
|
||||
*(Sp + i + Sstride * 3 ) = (float)(sum3);
|
||||
*(Sp + i + Sstride * 4 ) = (float)(sum4);
|
||||
*(Sp + i + Sstride * 5 ) = (float)(sum5);
|
||||
*(Sp + i + Sstride * 6 ) = (float)(sum6);
|
||||
*(Sp + i + Sstride * 7 ) = (float)(sum7);
|
||||
*(Sp + i + Sstride * 8 ) = (float)(sum8);
|
||||
*(Sp + i + Sstride * 9 ) = (float)(sum9);
|
||||
*(Sp + i + Sstride * 10) = (float)(sumA);
|
||||
*(Sp + i + Sstride * 11) = (float)(sumB);
|
||||
*(Sp + i + Sstride * 12) = (float)(sumC);
|
||||
*(Sp + i + Sstride * 13) = (float)(sumD);
|
||||
*(Sp + i + Sstride * 14) = (float)(sumE);
|
||||
*(Sp + i + Sstride * 15) = (float)(sumF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
#region Dense/Conv jobs declaration for mode: _ActAsFloat_WeightAsHalf
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct DepthwiseConv2DJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public DepthwiseConv2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.kernelCount * sizeof(float);
|
||||
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
// reset accumulators to 0
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
// gather X * K results in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
float* dst = outputAccumulators;
|
||||
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
|
||||
|
||||
int k = 0;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
|
||||
*dst += (float)((*src) * (*kernel));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
|
||||
*dst += (float)((*src) * (*kernel));
|
||||
}
|
||||
}
|
||||
|
||||
{ // write accumulators to memory and add bias
|
||||
int k = 0;
|
||||
float* src = outputAccumulators;
|
||||
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
half* bias = Bptr;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
|
||||
*dst = (float)((*src) + (*bias));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
|
||||
*dst = (float)((*src) + (*bias));
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct Dense3Job_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public Dense3JobHelper data;
|
||||
|
||||
public const int blockSize = 16;
|
||||
public void Execute(int threadID)
|
||||
{
|
||||
float* A = this.Xptr;
|
||||
half* B = this.Sptr;
|
||||
half* C = this.Bptr;
|
||||
float* S = this.Optr;
|
||||
int AM = data.AM;
|
||||
int BM = data.BM;
|
||||
int SM = data.SM;
|
||||
int AN = data.AN;
|
||||
int BN = data.BN;
|
||||
int SN = data.SN;
|
||||
|
||||
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
|
||||
|
||||
int batch = (threadID / dispatchThreadXY);
|
||||
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
|
||||
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
|
||||
|
||||
int batchOffSetA = (batch * AM * AN);
|
||||
int batchOffSetS = (batch * SM * SN);
|
||||
|
||||
int rowA = i * blockSize;
|
||||
int colB = j * blockSize;
|
||||
|
||||
unsafe
|
||||
{
|
||||
float* blockTempA = null;
|
||||
half* blockTempB = null;
|
||||
float* blockTempS = null;
|
||||
|
||||
float* blockS = S + rowA + SM * colB + batchOffSetS;
|
||||
int strideS = SM;
|
||||
|
||||
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
|
||||
{
|
||||
blockTempS = AllocBlock(blockSize, blockSize);
|
||||
strideS = blockSize;
|
||||
blockS = blockTempS;
|
||||
}
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
|
||||
|
||||
for (int l = 0; l < AN; l += blockSize) // inner-loop
|
||||
{
|
||||
float* blockA = A + rowA + AM * l + batchOffSetA;
|
||||
half* blockB = B + l * BN + colB;
|
||||
int strideA = AM;
|
||||
int strideB = BN;
|
||||
|
||||
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
|
||||
{
|
||||
if (blockTempA == null)
|
||||
blockTempA = AllocBlock(blockSize, blockSize);
|
||||
strideA = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
|
||||
|
||||
blockA = blockTempA;
|
||||
}
|
||||
|
||||
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
|
||||
{
|
||||
if (blockTempB == null)
|
||||
blockTempB = AllocBlockHalf(blockSize, blockSize);
|
||||
strideB = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
|
||||
|
||||
blockB = blockTempB;
|
||||
}
|
||||
|
||||
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
|
||||
}
|
||||
|
||||
if (blockS == blockTempS) // copy back
|
||||
{
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
{
|
||||
if (((rowA + x) < SM) && ((colB + y) < SN))
|
||||
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
|
||||
}
|
||||
}
|
||||
|
||||
FreeBlock(blockTempA);
|
||||
FreeBlock(blockTempB);
|
||||
FreeBlock(blockTempS);
|
||||
}
|
||||
}
|
||||
|
||||
static void MultiplyBlockUnrollHx16(float* Ap, int Astride, half* Bp, int Bstride, float* Sp, int Sstride)
|
||||
{
|
||||
for (int i = 0; i < blockSize; i++)
|
||||
{
|
||||
float sum0 = *(Sp + i + Sstride * 0);
|
||||
float sum1 = *(Sp + i + Sstride * 1);
|
||||
float sum2 = *(Sp + i + Sstride * 2);
|
||||
float sum3 = *(Sp + i + Sstride * 3);
|
||||
float sum4 = *(Sp + i + Sstride * 4);
|
||||
float sum5 = *(Sp + i + Sstride * 5);
|
||||
float sum6 = *(Sp + i + Sstride * 6);
|
||||
float sum7 = *(Sp + i + Sstride * 7);
|
||||
float sum8 = *(Sp + i + Sstride * 8);
|
||||
float sum9 = *(Sp + i + Sstride * 9);
|
||||
float sumA = *(Sp + i + Sstride * 10);
|
||||
float sumB = *(Sp + i + Sstride * 11);
|
||||
float sumC = *(Sp + i + Sstride * 12);
|
||||
float sumD = *(Sp + i + Sstride * 13);
|
||||
float sumE = *(Sp + i + Sstride * 14);
|
||||
float sumF = *(Sp + i + Sstride * 15);
|
||||
|
||||
for (int l = 0; l < blockSize; l++)
|
||||
{
|
||||
float A = *(Ap + i + Astride * l);
|
||||
|
||||
float B0 = *(Bp + l * Bstride + 0);
|
||||
float B1 = *(Bp + l * Bstride + 1);
|
||||
float B2 = *(Bp + l * Bstride + 2);
|
||||
float B3 = *(Bp + l * Bstride + 3);
|
||||
float B4 = *(Bp + l * Bstride + 4);
|
||||
float B5 = *(Bp + l * Bstride + 5);
|
||||
float B6 = *(Bp + l * Bstride + 6);
|
||||
float B7 = *(Bp + l * Bstride + 7);
|
||||
float B8 = *(Bp + l * Bstride + 8);
|
||||
float B9 = *(Bp + l * Bstride + 9);
|
||||
float BA = *(Bp + l * Bstride + 10);
|
||||
float BB = *(Bp + l * Bstride + 11);
|
||||
float BC = *(Bp + l * Bstride + 12);
|
||||
float BD = *(Bp + l * Bstride + 13);
|
||||
float BE = *(Bp + l * Bstride + 14);
|
||||
float BF = *(Bp + l * Bstride + 15);
|
||||
|
||||
|
||||
sum0 += A * B0;
|
||||
sum1 += A * B1;
|
||||
sum2 += A * B2;
|
||||
sum3 += A * B3;
|
||||
sum4 += A * B4;
|
||||
sum5 += A * B5;
|
||||
sum6 += A * B6;
|
||||
sum7 += A * B7;
|
||||
sum8 += A * B8;
|
||||
sum9 += A * B9;
|
||||
sumA += A * BA;
|
||||
sumB += A * BB;
|
||||
sumC += A * BC;
|
||||
sumD += A * BD;
|
||||
sumE += A * BE;
|
||||
sumF += A * BF;
|
||||
}
|
||||
|
||||
*(Sp + i + Sstride * 0 ) = (float)(sum0);
|
||||
*(Sp + i + Sstride * 1 ) = (float)(sum1);
|
||||
*(Sp + i + Sstride * 2 ) = (float)(sum2);
|
||||
*(Sp + i + Sstride * 3 ) = (float)(sum3);
|
||||
*(Sp + i + Sstride * 4 ) = (float)(sum4);
|
||||
*(Sp + i + Sstride * 5 ) = (float)(sum5);
|
||||
*(Sp + i + Sstride * 6 ) = (float)(sum6);
|
||||
*(Sp + i + Sstride * 7 ) = (float)(sum7);
|
||||
*(Sp + i + Sstride * 8 ) = (float)(sum8);
|
||||
*(Sp + i + Sstride * 9 ) = (float)(sum9);
|
||||
*(Sp + i + Sstride * 10) = (float)(sumA);
|
||||
*(Sp + i + Sstride * 11) = (float)(sumB);
|
||||
*(Sp + i + Sstride * 12) = (float)(sumC);
|
||||
*(Sp + i + Sstride * 13) = (float)(sumD);
|
||||
*(Sp + i + Sstride * 14) = (float)(sumE);
|
||||
*(Sp + i + Sstride * 15) = (float)(sumF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
#region Dense/Conv jobs declaration for mode: _Full_Half
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct DepthwiseConv2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public DepthwiseConv2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.kernelCount * sizeof(half);
|
||||
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
// reset accumulators to 0
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
// gather X * K results in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
half* dst = outputAccumulators;
|
||||
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
|
||||
|
||||
int k = 0;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
|
||||
*dst += (half)((*src) * (*kernel));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
|
||||
*dst += (half)((*src) * (*kernel));
|
||||
}
|
||||
}
|
||||
|
||||
{ // write accumulators to memory and add bias
|
||||
int k = 0;
|
||||
half* src = outputAccumulators;
|
||||
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
half* bias = Bptr;
|
||||
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
|
||||
*dst = (half)((*src) + (*bias));
|
||||
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
|
||||
*dst = (half)((*src) + (*bias));
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct Dense3Job_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public Dense3JobHelper data;
|
||||
|
||||
public const int blockSize = 16;
|
||||
public void Execute(int threadID)
|
||||
{
|
||||
half* A = this.Xptr;
|
||||
half* B = this.Sptr;
|
||||
half* C = this.Bptr;
|
||||
half* S = this.Optr;
|
||||
int AM = data.AM;
|
||||
int BM = data.BM;
|
||||
int SM = data.SM;
|
||||
int AN = data.AN;
|
||||
int BN = data.BN;
|
||||
int SN = data.SN;
|
||||
|
||||
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
|
||||
|
||||
int batch = (threadID / dispatchThreadXY);
|
||||
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
|
||||
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
|
||||
|
||||
int batchOffSetA = (batch * AM * AN);
|
||||
int batchOffSetS = (batch * SM * SN);
|
||||
|
||||
int rowA = i * blockSize;
|
||||
int colB = j * blockSize;
|
||||
|
||||
unsafe
|
||||
{
|
||||
half* blockTempA = null;
|
||||
half* blockTempB = null;
|
||||
half* blockTempS = null;
|
||||
|
||||
half* blockS = S + rowA + SM * colB + batchOffSetS;
|
||||
int strideS = SM;
|
||||
|
||||
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
|
||||
{
|
||||
blockTempS = AllocBlockHalf(blockSize, blockSize);
|
||||
strideS = blockSize;
|
||||
blockS = blockTempS;
|
||||
}
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockS[x + strideS * y] = (half)((colB + y) < BN ? C[colB + y] : 0.0f);
|
||||
|
||||
for (int l = 0; l < AN; l += blockSize) // inner-loop
|
||||
{
|
||||
half* blockA = A + rowA + AM * l + batchOffSetA;
|
||||
half* blockB = B + l * BN + colB;
|
||||
int strideA = AM;
|
||||
int strideB = BN;
|
||||
|
||||
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
|
||||
{
|
||||
if (blockTempA == null)
|
||||
blockTempA = AllocBlockHalf(blockSize, blockSize);
|
||||
strideA = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempA[x + blockSize * y] = (half)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
|
||||
|
||||
blockA = blockTempA;
|
||||
}
|
||||
|
||||
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
|
||||
{
|
||||
if (blockTempB == null)
|
||||
blockTempB = AllocBlockHalf(blockSize, blockSize);
|
||||
strideB = blockSize;
|
||||
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
|
||||
|
||||
blockB = blockTempB;
|
||||
}
|
||||
|
||||
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
|
||||
}
|
||||
|
||||
if (blockS == blockTempS) // copy back
|
||||
{
|
||||
for (int y = 0; y < blockSize; y++)
|
||||
for (int x = 0; x < blockSize; x++)
|
||||
{
|
||||
if (((rowA + x) < SM) && ((colB + y) < SN))
|
||||
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
|
||||
}
|
||||
}
|
||||
|
||||
FreeBlock(blockTempA);
|
||||
FreeBlock(blockTempB);
|
||||
FreeBlock(blockTempS);
|
||||
}
|
||||
}
|
||||
|
||||
static void MultiplyBlockUnrollHx16(half* Ap, int Astride, half* Bp, int Bstride, half* Sp, int Sstride)
|
||||
{
|
||||
for (int i = 0; i < blockSize; i++)
|
||||
{
|
||||
float sum0 = *(Sp + i + Sstride * 0);
|
||||
float sum1 = *(Sp + i + Sstride * 1);
|
||||
float sum2 = *(Sp + i + Sstride * 2);
|
||||
float sum3 = *(Sp + i + Sstride * 3);
|
||||
float sum4 = *(Sp + i + Sstride * 4);
|
||||
float sum5 = *(Sp + i + Sstride * 5);
|
||||
float sum6 = *(Sp + i + Sstride * 6);
|
||||
float sum7 = *(Sp + i + Sstride * 7);
|
||||
float sum8 = *(Sp + i + Sstride * 8);
|
||||
float sum9 = *(Sp + i + Sstride * 9);
|
||||
float sumA = *(Sp + i + Sstride * 10);
|
||||
float sumB = *(Sp + i + Sstride * 11);
|
||||
float sumC = *(Sp + i + Sstride * 12);
|
||||
float sumD = *(Sp + i + Sstride * 13);
|
||||
float sumE = *(Sp + i + Sstride * 14);
|
||||
float sumF = *(Sp + i + Sstride * 15);
|
||||
|
||||
for (int l = 0; l < blockSize; l++)
|
||||
{
|
||||
float A = *(Ap + i + Astride * l);
|
||||
|
||||
float B0 = *(Bp + l * Bstride + 0);
|
||||
float B1 = *(Bp + l * Bstride + 1);
|
||||
float B2 = *(Bp + l * Bstride + 2);
|
||||
float B3 = *(Bp + l * Bstride + 3);
|
||||
float B4 = *(Bp + l * Bstride + 4);
|
||||
float B5 = *(Bp + l * Bstride + 5);
|
||||
float B6 = *(Bp + l * Bstride + 6);
|
||||
float B7 = *(Bp + l * Bstride + 7);
|
||||
float B8 = *(Bp + l * Bstride + 8);
|
||||
float B9 = *(Bp + l * Bstride + 9);
|
||||
float BA = *(Bp + l * Bstride + 10);
|
||||
float BB = *(Bp + l * Bstride + 11);
|
||||
float BC = *(Bp + l * Bstride + 12);
|
||||
float BD = *(Bp + l * Bstride + 13);
|
||||
float BE = *(Bp + l * Bstride + 14);
|
||||
float BF = *(Bp + l * Bstride + 15);
|
||||
|
||||
|
||||
sum0 += A * B0;
|
||||
sum1 += A * B1;
|
||||
sum2 += A * B2;
|
||||
sum3 += A * B3;
|
||||
sum4 += A * B4;
|
||||
sum5 += A * B5;
|
||||
sum6 += A * B6;
|
||||
sum7 += A * B7;
|
||||
sum8 += A * B8;
|
||||
sum9 += A * B9;
|
||||
sumA += A * BA;
|
||||
sumB += A * BB;
|
||||
sumC += A * BC;
|
||||
sumD += A * BD;
|
||||
sumE += A * BE;
|
||||
sumF += A * BF;
|
||||
}
|
||||
|
||||
*(Sp + i + Sstride * 0 ) = (half)(sum0);
|
||||
*(Sp + i + Sstride * 1 ) = (half)(sum1);
|
||||
*(Sp + i + Sstride * 2 ) = (half)(sum2);
|
||||
*(Sp + i + Sstride * 3 ) = (half)(sum3);
|
||||
*(Sp + i + Sstride * 4 ) = (half)(sum4);
|
||||
*(Sp + i + Sstride * 5 ) = (half)(sum5);
|
||||
*(Sp + i + Sstride * 6 ) = (half)(sum6);
|
||||
*(Sp + i + Sstride * 7 ) = (half)(sum7);
|
||||
*(Sp + i + Sstride * 8 ) = (half)(sum8);
|
||||
*(Sp + i + Sstride * 9 ) = (half)(sum9);
|
||||
*(Sp + i + Sstride * 10) = (half)(sumA);
|
||||
*(Sp + i + Sstride * 11) = (half)(sumB);
|
||||
*(Sp + i + Sstride * 12) = (half)(sumC);
|
||||
*(Sp + i + Sstride * 13) = (half)(sumD);
|
||||
*(Sp + i + Sstride * 14) = (half)(sumE);
|
||||
*(Sp + i + Sstride * 15) = (half)(sumF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 417ca864422a2384ab3013114bf9f845
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 30d1de61c64693a4895a66fecf45a004
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,890 +0,0 @@
|
||||
// This is auto-generated -- do not modify directly
|
||||
using UnityEngine;
|
||||
using System;
|
||||
using Unity.Burst;
|
||||
using Unity.Burst.Intrinsics;
|
||||
using Unity.Collections;
|
||||
using Unity.Jobs;
|
||||
using Unity.Mathematics;
|
||||
using static Unity.Burst.Intrinsics.X86.Avx;
|
||||
using static Unity.Burst.Intrinsics.X86.Fma;
|
||||
using Unity.Collections.LowLevel.Unsafe;
|
||||
using Unity.Jobs.LowLevel.Unsafe;
|
||||
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
public partial class BurstCPUOps
|
||||
{
|
||||
#region Reduce jobs declaration for mode: _Full_Float
|
||||
|
||||
internal partial struct ReduceMaxJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new ReduceMaxJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new ReduceMaxJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
internal partial struct ReduceMaxJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new ReduceMaxJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new ReduceMaxJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public ReduceMaxJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float maxV = float.MinValue;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
maxV = math.max(maxV, v);
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (float)maxV;
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct ReduceSumJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new ReduceSumJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new ReduceSumJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceSumJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public ReduceSumJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float sumV = 0;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
sumV += v;
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (float)(sumV);
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct ReduceMeanJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new ReduceMeanJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new ReduceMeanJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceMeanJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public ReduceMeanJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float sumV = 0;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
sumV += v;
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (float)(sumV / (float)data.reduceDim);
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct ExpBiasReduceJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXBO(BurstTensorData pinX, FencedMemoryAlloc pinB, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool WHalf = pinB.type == DataType.Half;
|
||||
bool OHalf = pinO.type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf && WHalf)
|
||||
{
|
||||
var job = new ExpBiasReduceJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && WHalf)
|
||||
{
|
||||
var job = new ExpBiasReduceJob_ActAsFloat_WeightAsHalf();
|
||||
job.data = this;
|
||||
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && !WHalf)
|
||||
{
|
||||
var job = new ExpBiasReduceJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else //if (AHalf && !WHalf)
|
||||
{
|
||||
UnityEngine.Assertions.Assert.IsTrue(false, "ExpBiasReduceJob does not support activation as half while weights are floats.");
|
||||
return new JobHandle();
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ExpBiasReduceJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public ExpBiasReduceJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float accum = 0.0f;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
float b = Bptr[y * data.offsetReduce + x];
|
||||
accum += math.exp(v - b);
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (float)accum;
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct SoftmaxEndJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool WHalf = pinS.type == DataType.Half;
|
||||
bool BHalf = pinB.type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
|
||||
if (AHalf && WHalf)
|
||||
{
|
||||
var job = new SoftmaxEndJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && WHalf)
|
||||
{
|
||||
var job = new SoftmaxEndJob_ActAsFloat_WeightAsHalf();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && !WHalf)
|
||||
{
|
||||
var job = new SoftmaxEndJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else //if (AHalf && !WHalf)
|
||||
{
|
||||
UnityEngine.Assertions.Assert.IsTrue(false, "SoftmaxEndJob does not support activation as half while weights are floats.");
|
||||
return new JobHandle();
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct SoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public SoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct LogSoftmaxEndJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool WHalf = pinS.type == DataType.Half;
|
||||
bool BHalf = pinB.type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
|
||||
if (AHalf && WHalf)
|
||||
{
|
||||
var job = new LogSoftmaxEndJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && WHalf)
|
||||
{
|
||||
var job = new LogSoftmaxEndJob_ActAsFloat_WeightAsHalf();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else if (!AHalf && !WHalf)
|
||||
{
|
||||
var job = new LogSoftmaxEndJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else //if (AHalf && !WHalf)
|
||||
{
|
||||
UnityEngine.Assertions.Assert.IsTrue(false, "LogSoftmaxEndJob does not support activation as half while weights are floats.");
|
||||
return new JobHandle();
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct LogSoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public LogSoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct MaxPool2DJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new MaxPool2DJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new MaxPool2DJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct MaxPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public MaxPool2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.inChannels * sizeof(float);
|
||||
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
bool firstNotRejectedPixelInKernel = true;
|
||||
// gather max results in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
float* dst = outputAccumulators;
|
||||
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
|
||||
int k = 0;
|
||||
if (firstNotRejectedPixelInKernel) // first pass, write-through
|
||||
{
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = *src;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = (*dst) > (*src) ? (*dst) : (*src);
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = (*dst) > (*src) ? (*dst) : (*src);
|
||||
}
|
||||
firstNotRejectedPixelInKernel = false;
|
||||
}
|
||||
}
|
||||
|
||||
// safety net, if kernel was completely outside of X
|
||||
// fill with padding_value (0) to avoid uninitialized memory
|
||||
if (firstNotRejectedPixelInKernel)
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
{ // write accumulators to memory
|
||||
int k = 0;
|
||||
float* src = outputAccumulators;
|
||||
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = *src;
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
internal partial struct AvgPool2DJobHelper
|
||||
{
|
||||
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
var pinX = Pin(X);
|
||||
var pinO = Pin(O, uploadCache: false);
|
||||
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
||||
{
|
||||
bool AHalf = pinX.array.Type == DataType.Half;
|
||||
bool OHalf = pinO.array.Type == DataType.Half;
|
||||
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
||||
if (AHalf)
|
||||
{
|
||||
var job = new AvgPool2DJob_Full_Half();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new AvgPool2DJob_Full_Float();
|
||||
job.data = this;
|
||||
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct AvgPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public AvgPool2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.inChannels * sizeof(float);
|
||||
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
// reset accumulators & counter
|
||||
int counter = 0;
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
// gather sums in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
float* dst = outputAccumulators;
|
||||
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
|
||||
int k = 0;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst += *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst += *src;
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
// safety net, if kernel was completely outside of X
|
||||
counter = math.max(1, counter);
|
||||
|
||||
{ // write accumulators to memory
|
||||
int k = 0;
|
||||
float invCounter = 1f / counter;
|
||||
float* src = outputAccumulators;
|
||||
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = (float)(*src * invCounter);
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = (float)(*src * invCounter);
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
#region Reduce jobs declaration for mode: _ActAsFloat_WeightAsHalf
|
||||
|
||||
|
||||
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ExpBiasReduceJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public ExpBiasReduceJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float accum = 0.0f;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
float b = Bptr[y * data.offsetReduce + x];
|
||||
accum += math.exp(v - b);
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (float)accum;
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct SoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public SoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct LogSoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
||||
public LogSoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endregion
|
||||
#region Reduce jobs declaration for mode: _Full_Half
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public ReduceMaxJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float maxV = float.MinValue;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
maxV = math.max(maxV, v);
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (half)maxV;
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceSumJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public ReduceSumJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float sumV = 0;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
sumV += v;
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (half)(sumV);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ReduceMeanJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public ReduceMeanJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float sumV = 0;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
sumV += v;
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (half)(sumV / (float)data.reduceDim);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct ExpBiasReduceJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public ExpBiasReduceJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = i / data.offsetReduce;
|
||||
|
||||
float accum = 0.0f;
|
||||
for (int z = 0; z < data.reduceDim; ++z)
|
||||
{
|
||||
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
|
||||
float b = Bptr[y * data.offsetReduce + x];
|
||||
accum += math.exp(v - b);
|
||||
}
|
||||
Optr[y * data.offsetReduce + x] = (half)accum;
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct SoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public SoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (half)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
||||
unsafe struct LogSoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
||||
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public LogSoftmaxEndJobHelper data;
|
||||
|
||||
public void Execute(int i)
|
||||
{
|
||||
int x = i % data.offsetReduce;
|
||||
int y = ((i / data.offsetReduce) % data.reduceDim);
|
||||
int z = ((i / data.offsetReduce) / data.reduceDim);
|
||||
|
||||
Optr[i] = (half)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct MaxPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public MaxPool2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.inChannels * sizeof(half);
|
||||
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
bool firstNotRejectedPixelInKernel = true;
|
||||
// gather max results in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
half* dst = outputAccumulators;
|
||||
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
|
||||
int k = 0;
|
||||
if (firstNotRejectedPixelInKernel) // first pass, write-through
|
||||
{
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = *src;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = (*dst) > (*src) ? (*dst) : (*src);
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = (*dst) > (*src) ? (*dst) : (*src);
|
||||
}
|
||||
firstNotRejectedPixelInKernel = false;
|
||||
}
|
||||
}
|
||||
|
||||
// safety net, if kernel was completely outside of X
|
||||
// fill with padding_value (0) to avoid uninitialized memory
|
||||
if (firstNotRejectedPixelInKernel)
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
{ // write accumulators to memory
|
||||
int k = 0;
|
||||
half* src = outputAccumulators;
|
||||
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = *src;
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
||||
unsafe struct AvgPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
||||
{
|
||||
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
||||
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
||||
public AvgPool2DJobHelper data;
|
||||
|
||||
const int unrollSize = 16;
|
||||
public void Execute(int y)
|
||||
{
|
||||
int accumulatorMemSize = data.inChannels * sizeof(half);
|
||||
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
|
||||
|
||||
for (int n = 0; n < data.outBatch; ++n)
|
||||
for (int x = 0; x < data.outWidth; ++x)
|
||||
{
|
||||
// reset accumulators & counter
|
||||
int counter = 0;
|
||||
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
|
||||
|
||||
// gather sums in accumulators
|
||||
for (int dy = 0; dy < data.kernelHeight; ++dy)
|
||||
{
|
||||
int readY = y * data.strideY + dy - data.padY;
|
||||
if (readY < 0) continue;
|
||||
if (readY >= data.inHeight) continue;
|
||||
|
||||
for (int dx = 0; dx < data.kernelWidth; ++dx)
|
||||
{
|
||||
int readX = x * data.strideX + dx - data.padY;
|
||||
if (readX < 0) continue;
|
||||
if (readX >= data.inWidth) continue;
|
||||
|
||||
half* dst = outputAccumulators;
|
||||
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
|
||||
|
||||
int k = 0;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst += *src;
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst += *src;
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
// safety net, if kernel was completely outside of X
|
||||
counter = math.max(1, counter);
|
||||
|
||||
{ // write accumulators to memory
|
||||
int k = 0;
|
||||
float invCounter = 1f / counter;
|
||||
half* src = outputAccumulators;
|
||||
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
|
||||
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
|
||||
for (int q = 0; q < unrollSize; q++, src++, dst++)
|
||||
*dst = (half)(*src * invCounter);
|
||||
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
|
||||
*dst = (half)(*src * invCounter);
|
||||
}
|
||||
}
|
||||
|
||||
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f555ca3db5aa9674f9cdba4d5b715e79
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 1f9c24a13966b425fa5bfd1a4007c3f4
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: dd2cfd0651655b44ca226eb4f0b952aa
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 6bc05bfa1b9544e8a813df0c3eaab6b0
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: badd0d6a0383049eab2cb58e1d0d6fa9
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,143 +0,0 @@
|
||||
using System.Diagnostics;
|
||||
using UnityEngine;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
internal class ComputeDebugUtils
|
||||
{
|
||||
/// <summary>
|
||||
/// DEBUG ONLY: `debugKernels` allow to track out of bound read/write and assertion in kernels.
|
||||
/// When set to true be sure to define KERNEL_ASSERTS or FORCE_DEBUG in the particular kernel(s)
|
||||
/// you want to debug (see in DebugUtils.cginc).
|
||||
/// Production code should not set this to 'true' as this will significantly degrade performances.
|
||||
/// </summary>
|
||||
public static bool debugKernels = false;
|
||||
|
||||
/// <summary>
|
||||
/// DEBUG ONLY: if ComputeDebugUtils.debugKernels is true and debugger is attached, debugger will break when a kernel assertion is catch.
|
||||
/// </summary>
|
||||
public static bool breakOnAssertion = false;
|
||||
|
||||
//Keep in sync with DebugUtils.cginc KERNEL_ASSERT_CONTEXT defines
|
||||
private enum KernelAssertContext
|
||||
{
|
||||
ReadOnlyTensor_Read = 0,
|
||||
ReadWriteTensor_Read = 1,
|
||||
ReadWriteTensor_Write = 2,
|
||||
SharedTensor_Read = 3,
|
||||
Assertion = 4,
|
||||
AssertionWithValue = 5
|
||||
}
|
||||
|
||||
static ComputeDebugUtils()
|
||||
{
|
||||
string[] args = System.Environment.GetCommandLineArgs ();
|
||||
for (int i = 0; i < args.Length; i++) {
|
||||
if (args [i] == "-barracuda-debug-gpu-kernels")
|
||||
{
|
||||
debugKernels = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
public struct KernelAssertInfo
|
||||
{
|
||||
public KernelAssertInfo(uint[] data)
|
||||
{
|
||||
UnityEngine.Debug.Assert(numUintInKernelAssertInfo == data.Length);
|
||||
UnityEngine.Debug.Assert(numUintInKernelAssertInfo == 8,
|
||||
"Please change KernelAssertInfo constructor if altering the struct.");
|
||||
lockValue = data[0];
|
||||
lineNumber = data[1];
|
||||
context = data[2];
|
||||
index = data[3];
|
||||
bufferSize = data[4];
|
||||
debugValue = data[5];
|
||||
padding1 = data[6];
|
||||
padding2 = data[7];
|
||||
}
|
||||
|
||||
public readonly uint lockValue;
|
||||
public readonly uint lineNumber;
|
||||
public readonly uint context;
|
||||
public readonly uint index;
|
||||
public readonly uint bufferSize;
|
||||
public readonly uint debugValue;
|
||||
public readonly uint padding1;
|
||||
public readonly uint padding2;
|
||||
}
|
||||
private static readonly int numUintInKernelAssertInfo = Marshal.SizeOf(typeof(KernelAssertInfo))/sizeof(uint);
|
||||
|
||||
private static ComputeBuffer kernelDebugInfo = null;
|
||||
|
||||
private static void LogAssertion(KernelAssertInfo info, string kernelName)
|
||||
{
|
||||
if (info.lockValue != 0)
|
||||
{
|
||||
string source;
|
||||
switch (info.context)
|
||||
{
|
||||
case (int) KernelAssertContext.ReadOnlyTensor_Read:
|
||||
source = $"Out of bound while Reading a ReadonlyTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
|
||||
break;
|
||||
case (int) KernelAssertContext.ReadWriteTensor_Read:
|
||||
source = $"Out of bound while Reading a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
|
||||
break;
|
||||
case (int) KernelAssertContext.ReadWriteTensor_Write:
|
||||
source = $"Out of bound while Writing to a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
|
||||
break;
|
||||
case (int) KernelAssertContext.SharedTensor_Read:
|
||||
source = $"Out of bound while Reading a SharedTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
|
||||
break;
|
||||
case (int) KernelAssertContext.Assertion:
|
||||
source = $"Assertion at line {info.lineNumber}";
|
||||
break;
|
||||
case (int) KernelAssertContext.AssertionWithValue:
|
||||
source = $"Assertion at line {info.lineNumber}, debug value is {info.debugValue}";
|
||||
break;
|
||||
default:
|
||||
source = "Unknown error";
|
||||
break;
|
||||
}
|
||||
|
||||
string message = $"{source} in kernel {kernelName}.";
|
||||
D.LogError(message);
|
||||
|
||||
if (breakOnAssertion)
|
||||
{
|
||||
Debugger.Break();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void PrepareDispatch()
|
||||
{
|
||||
//Lazy alloc, will be released by GC.
|
||||
if (debugKernels && kernelDebugInfo == null)
|
||||
{
|
||||
kernelDebugInfo = new ComputeBuffer(1, numUintInKernelAssertInfo*sizeof(uint));
|
||||
}
|
||||
|
||||
if (debugKernels)
|
||||
{
|
||||
Shader.SetGlobalBuffer("KernelAssertInfoBuffer", kernelDebugInfo);
|
||||
kernelDebugInfo.SetData(new uint[numUintInKernelAssertInfo]); //TODO use a kernel to zero out the buffer to avoid a extra sync.
|
||||
}
|
||||
}
|
||||
|
||||
public static void VerifyDispatch(string kernelName)
|
||||
{
|
||||
if (debugKernels)
|
||||
{
|
||||
UnityEngine.Debug.Assert(kernelDebugInfo != null);
|
||||
var data = new uint[numUintInKernelAssertInfo];
|
||||
kernelDebugInfo.GetData(data, 0, 0, numUintInKernelAssertInfo);
|
||||
LogAssertion(new KernelAssertInfo(data), kernelName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 72797c6856a1f9642a53f0b22d65e5dc
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 1126b6ab4d825624a9135b0501f4d793
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5fea18c74a3be4c7680b4ee28cbe1a86
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: e7398940fb81d45ee8e648e0b0f467f2
|
||||
timeCreated: 1503433373
|
||||
licenseType: Pro
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 3e48b2167ab1b453bb10a8fdac9dc531
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: c077f9591cc6d4804bc89b66a2a67c0d
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 3d3848101f7774555899e75a86641621
|
||||
timeCreated: 1506427659
|
||||
licenseType: Pro
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,93 +0,0 @@
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
/// <summary>
|
||||
/// `CompareOps` utilities
|
||||
/// </summary>
|
||||
public class CompareOpsUtils
|
||||
{
|
||||
/// <summary>
|
||||
/// `CompareOps` log level enum
|
||||
/// </summary>
|
||||
public enum LogLevel
|
||||
{
|
||||
/// <summary>
|
||||
/// Warning
|
||||
/// </summary>
|
||||
Warning,
|
||||
|
||||
/// <summary>
|
||||
/// Error
|
||||
/// </summary>
|
||||
Error
|
||||
}
|
||||
|
||||
static internal void CheckSame(Tensor X, Tensor Y, Layer.Type type, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
|
||||
{
|
||||
CheckSame(X, Y, type.ToString(), logLevel, epsilon, inputs);
|
||||
}
|
||||
|
||||
static internal void CheckSame(Tensor X, Tensor Y, string opName, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
|
||||
{
|
||||
if (!X.Approximately(Y, epsilon))
|
||||
{
|
||||
if (logLevel == LogLevel.Error)
|
||||
{
|
||||
string mainLogMessage = $"Tensors not equal after {opName}, epsilon {epsilon}";
|
||||
D.LogError(mainLogMessage);
|
||||
}
|
||||
else
|
||||
{
|
||||
string mainLogMessage = $"Tensors not equal after {opName} max error: {X.MaxDifference(Y)}";
|
||||
D.LogWarning(mainLogMessage);
|
||||
|
||||
D.Log("First: " + X.shape);
|
||||
D.Log("Second:" + Y.shape);
|
||||
|
||||
X.PrintDataPart(X.channels * X.width * 2);
|
||||
Y.PrintDataPart(Y.channels * Y.width * 2);
|
||||
|
||||
for (var i = 0; i < inputs.Length; i++)
|
||||
{
|
||||
inputs[i].PrintDataPart(32, "input_" + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (X.tensorOnDevice != Y.tensorOnDevice)
|
||||
Y.Dispose();
|
||||
}
|
||||
|
||||
static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, Layer.Type type, LogLevel logLevel)
|
||||
{
|
||||
return CheckApproximately(X, Y, count, epsilon, type.ToString(), logLevel);
|
||||
}
|
||||
|
||||
static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, string opName, LogLevel logLevel)
|
||||
{
|
||||
if (!X.Approximately(Y, epsilon, count))
|
||||
{
|
||||
string mainLogMessage = $"Tensors not equal after {opName}";
|
||||
if (logLevel == LogLevel.Error)
|
||||
D.LogError(mainLogMessage);
|
||||
else
|
||||
D.LogWarning(mainLogMessage);
|
||||
|
||||
D.Log("First: " + X.shape);
|
||||
D.Log("Second:" + Y.shape);
|
||||
|
||||
if (count < 0)
|
||||
count = X.channels * X.width * 2;
|
||||
X.PrintDataPart(count);
|
||||
Y.PrintDataPart(count);
|
||||
return false;
|
||||
}
|
||||
if (X.tensorOnDevice != Y.tensorOnDevice)
|
||||
Y.Dispose();
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5e3e5424b979b5c43997409257895b6b
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,132 +0,0 @@
|
||||
using UnityEngine;
|
||||
using UnityEngine.Rendering;
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
/// <summary>
|
||||
/// GPU compute info
|
||||
/// </summary>
|
||||
public class ComputeInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Channel order enum
|
||||
/// </summary>
|
||||
public enum ChannelsOrder
|
||||
{
|
||||
/// <summary>
|
||||
/// Channels last
|
||||
/// </summary>
|
||||
NHWC,
|
||||
|
||||
/// <summary>
|
||||
/// Channels first
|
||||
/// </summary>
|
||||
NCHW
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// GPU supports shared memory
|
||||
/// </summary>
|
||||
public static bool supportsComputeSharedMemory = true;
|
||||
|
||||
/// <summary>
|
||||
/// GPU supports Dense 32x32 kernels
|
||||
/// </summary>
|
||||
public static bool supportsDense32x32 = true;
|
||||
|
||||
/// <summary>
|
||||
/// GPU supports Dense 64x64 kernels
|
||||
/// </summary>
|
||||
public static bool supportsDense64x64 = true;
|
||||
|
||||
/// <summary>
|
||||
/// GPU supports compute
|
||||
/// </summary>
|
||||
public static bool supportsCompute = true;
|
||||
|
||||
/// <summary>
|
||||
/// Max compute work group size supported by GPU
|
||||
/// </summary>
|
||||
public static uint maxComputeWorkGroupSize = 1024;
|
||||
|
||||
/// <summary>
|
||||
/// GPU vendor
|
||||
/// </summary>
|
||||
public static string graphicsDeviceVendor = "";
|
||||
|
||||
/// <summary>
|
||||
/// Helper for hardware selection
|
||||
/// </summary>
|
||||
public static bool IsMobileGPU() { return
|
||||
(Application.platform == RuntimePlatform.Android) ||
|
||||
(Application.platform == RuntimePlatform.IPhonePlayer) ||
|
||||
graphicsDeviceVendor.Contains("Intel");
|
||||
}
|
||||
public static bool IsiPhoneGPU() { return
|
||||
(Application.platform == RuntimePlatform.IPhonePlayer);
|
||||
}
|
||||
public static bool IsQualcommGPU() { return
|
||||
(Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("Qualcomm");
|
||||
}
|
||||
public static bool IsARMGPU() { return
|
||||
(Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("ARM");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// EXPERIMENTAL: Select Channel order of the compute backends.
|
||||
/// Production code should stick to default (NHWC) for now.
|
||||
/// </summary>
|
||||
public static ChannelsOrder channelsOrder = ChannelsOrder.NHWC;
|
||||
|
||||
/// <summary>
|
||||
/// Static constructor, initializes and caches data
|
||||
/// </summary>
|
||||
static ComputeInfo()
|
||||
{
|
||||
string[] args = System.Environment.GetCommandLineArgs ();
|
||||
for (int i = 0; i < args.Length; i++) {
|
||||
if (args [i] == "-barracuda-compute-use-nchw")
|
||||
{
|
||||
channelsOrder = ChannelsOrder.NCHW;
|
||||
}
|
||||
}
|
||||
|
||||
supportsCompute = SystemInfo.supportsComputeShaders;
|
||||
|
||||
graphicsDeviceVendor = SystemInfo.graphicsDeviceVendor;
|
||||
|
||||
// TODO switch to SystemInfo.maxComputeWorkGroupSize when we bump min spec to 2019.3
|
||||
if (Application.platform == RuntimePlatform.Android)
|
||||
{
|
||||
maxComputeWorkGroupSize = (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan) ? 256u : 128u;
|
||||
|
||||
var gpuName = SystemInfo.graphicsDeviceName ?? "";
|
||||
var osName = SystemInfo.operatingSystem ?? "";
|
||||
|
||||
// Known issue with Adreno Vulkan drivers on Android 8.x
|
||||
if (gpuName.Contains("Adreno") && osName.StartsWith("Android OS 8") &&
|
||||
SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan)
|
||||
maxComputeWorkGroupSize = 128u;
|
||||
}
|
||||
else if (Application.platform == RuntimePlatform.IPhonePlayer || Application.platform == RuntimePlatform.tvOS)
|
||||
{
|
||||
var gpuName = SystemInfo.graphicsDeviceName;
|
||||
if (gpuName != null && gpuName.StartsWith("Apple A"))
|
||||
{
|
||||
int gpuNumber = 0, idx = "Apple A".Length;
|
||||
while (idx < gpuName.Length && '0' <= gpuName[idx] && gpuName[idx] <= '9')
|
||||
{
|
||||
gpuNumber = gpuNumber * 10 + gpuName[idx++] - '0';
|
||||
}
|
||||
|
||||
// TODO check on lower end iOS devices
|
||||
maxComputeWorkGroupSize = (gpuNumber <= 10) ? 224u : 256u;
|
||||
}
|
||||
else
|
||||
{
|
||||
maxComputeWorkGroupSize = 256u;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 96aee99fc4154e2a991ac0edd6056c2b
|
||||
timeCreated: 1558541124
|
||||
@@ -1,404 +0,0 @@
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using UnityEngine;
|
||||
using UnityEngine.Profiling;
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
|
||||
internal enum ComputeShaderContext
|
||||
{
|
||||
Reference,
|
||||
Optimized
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stores compute kernel cache for GPU compute backends
|
||||
/// </summary>
|
||||
public sealed class ComputeShaderSingleton
|
||||
{
|
||||
/// <summary>
|
||||
/// Enable kernel usage tracking
|
||||
/// </summary>
|
||||
public bool EnableDebug = false;
|
||||
|
||||
private static readonly ComputeShaderSingleton instance = new ComputeShaderSingleton ();
|
||||
|
||||
// Maps kernel name -> shader name
|
||||
private Dictionary<string, string> mKernelToShaderName = new Dictionary<string, string>();
|
||||
|
||||
// Maps shader name -> ComputeShader
|
||||
private Dictionary<string, ComputeShader> mShaderNameToComputeShader = new Dictionary<string, ComputeShader>();
|
||||
|
||||
private HashSet<string> mUsedOptimizedKernels = new HashSet<string>();
|
||||
private HashSet<string> mUsedReferenceKernels = new HashSet<string>();
|
||||
|
||||
private ComputeShaderSingleton()
|
||||
{
|
||||
RegisterKernels("Barracuda/TextureUtils",
|
||||
new[] {"TextureToTensor", "TensorToTextureNoLUT", "TensorToTexture3DLUT"});
|
||||
|
||||
RegisterKernels("Barracuda/ActivationA",
|
||||
new[]
|
||||
{
|
||||
"Relu_Flat", "Relu_FlatStrict", "Relu_Loop", "Relu6_Flat", "Relu6_FlatStrict", "Relu6_Loop",
|
||||
"Tanh_Flat", "Tanh_FlatStrict", "Tanh_Loop", "Swish_Flat", "Swish_FlatStrict", "Swish_Loop",
|
||||
"Sigmoid_Flat", "Sigmoid_FlatStrict", "Sigmoid_Loop", "LeakyRelu_Flat", "LeakyRelu_FlatStrict",
|
||||
"LeakyRelu_Loop", "Clip_Flat", "Clip_FlatStrict", "Clip_Loop", "PRelu_Flat", "PRelu_Loop"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/ActivationB",
|
||||
new[]
|
||||
{
|
||||
"Reciprocal_Flat", "Reciprocal_FlatStrict", "Reciprocal_Loop", "Sqrt_Flat", "Sqrt_FlatStrict",
|
||||
"Sqrt_Loop", "HardSigmoid_Flat", "HardSigmoid_FlatStrict", "HardSigmoid_Loop"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/ActivationBase",
|
||||
new string[]
|
||||
{
|
||||
"Abs_Flat", "Abs_FlatStrict", "Abs_Loop", "Neg_Flat", "Neg_FlatStrict", "Neg_Loop", "Ceil_Flat",
|
||||
"Ceil_FlatStrict", "Ceil_Loop", "Floor_Flat", "Floor_FlatStrict", "Floor_Loop",
|
||||
"Round_Flat", "Round_FlatStrict", "Round_Loop", "Selu_Flat",
|
||||
"Selu_FlatStrict", "Selu_Loop", "Softplus_Flat", "Softplus_FlatStrict", "Softplus_Loop", "Elu_Flat",
|
||||
"Elu_FlatStrict", "Elu_Loop", "Exp_Flat", "Exp_FlatStrict", "Exp_Loop", "Log_Flat",
|
||||
"Log_FlatStrict", "Log_Loop", "Pow_Flat", "Pow_FlatStrict", "Pow_Loop", "LogicalNot_Flat",
|
||||
"LogicalNot_FlatStrict", "LogicalNot_Loop", "Sign_Flat", "Sign_FlatStrict", "Sign_Loop",
|
||||
"Acos_Flat", "Acos_FlatStrict", "Acos_Loop",
|
||||
"Acosh_Flat", "Acosh_FlatStrict", "Acosh_Loop", "Asin_Flat", "Asin_FlatStrict", "Asin_Loop",
|
||||
"Asinh_Flat", "Asinh_FlatStrict", "Asinh_Loop", "Atan_Flat", "Atan_FlatStrict", "Atan_Loop",
|
||||
"Atanh_Flat", "Atanh_FlatStrict", "Atanh_Loop", "Cos_Flat", "Cos_FlatStrict", "Cos_Loop",
|
||||
"Cosh_Flat", "Cosh_FlatStrict", "Cosh_Loop", "Sin_Flat", "Sin_FlatStrict", "Sin_Loop", "Sinh_Flat",
|
||||
"Sinh_FlatStrict", "Sinh_Loop", "Tan_Flat", "Tan_FlatStrict", "Tan_Loop", "Erf_Flat", "Erf_FlatStrict", "Erf_Loop",
|
||||
"Relu_NHWC", "Relu_NCHW", "Relu_CNyx_NHWC", "Relu_Nyxc_NHWC", "Relu6_NHWC", "Relu6_NCHW", "Relu6_CNyx_NHWC",
|
||||
"Relu6_Nyxc_NHWC", "PRelu_NHWC", "PRelu_NCHW", "PRelu_CNyx2_NHWC", "Selu_NHWC", "Selu_NCHW",
|
||||
"Selu_CNyx_NHWC", "Selu_Nyxc_NHWC", "Tanh_NHWC", "Tanh_NCHW", "Tanh_CNyx_NHWC", "Tanh_Nyxc_NHWC",
|
||||
"Swish_NHWC", "Swish_NCHW", "Swish_CNyx_NHWC", "Swish_Nyxc_NHWC", "Softplus_NHWC", "Softplus_NCHW",
|
||||
"Softplus_CNyx_NHWC", "Softplus_Nyxc_NHWC", "Sigmoid_NHWC", "Sigmoid_NCHW", "Sigmoid_CNyx_NHWC",
|
||||
"Sigmoid_Nyxc_NHWC", "HardSigmoid_NHWC", "HardSigmoid_NCHW", "HardSigmoid_CNyx_NHWC", "HardSigmoid_Nyxc_NHWC",
|
||||
"Elu_NHWC", "Elu_NCHW", "Elu_CNyx_NHWC", "Elu_Nyxc_NHWC", "LeakyRelu_NHWC",
|
||||
"LeakyRelu_NCHW", "LeakyRelu_CNyx_NHWC", "LeakyRelu_Nyxc_NHWC", "Exp_NHWC", "Exp_NCHW",
|
||||
"Exp_CNyx_NHWC", "Exp_Nyxc_NHWC", "Log_NHWC", "Log_NCHW", "Log_CNyx_NHWC", "Log_Nyxc_NHWC",
|
||||
"Sqrt_NHWC", "Sqrt_NCHW", "Sqrt_CNyx_NHWC", "Sqrt_Nyxc_NHWC", "Pow_NHWC", "Pow_NCHW",
|
||||
"Pow_CNyx_NHWC", "Pow_Nyxc_NHWC",
|
||||
"Clip_NHWC", "Clip_NCHW", "Clip_CNyx_NHWC", "Clip_Nyxc_NHWC", "Acos_NHWC",
|
||||
"Acos_NCHW", "Acos_CNyx_NHWC", "Acos_Nyxc_NHWC", "Acosh_NHWC", "Acosh_NCHW", "Acosh_CNyx_NHWC",
|
||||
"Acosh_Nyxc_NHWC", "Asin_NHWC", "Asin_NCHW", "Asin_CNyx_NHWC", "Asin_Nyxc_NHWC", "Asinh_NHWC",
|
||||
"Asinh_NCHW", "Asinh_CNyx_NHWC", "Asinh_Nyxc_NHWC", "Atan_NHWC", "Atan_NCHW", "Atan_CNyx_NHWC",
|
||||
"Atan_Nyxc_NHWC", "Atanh_NHWC", "Atanh_NCHW", "Atanh_CNyx_NHWC", "Atanh_Nyxc_NHWC", "Cos_NHWC",
|
||||
"Cos_NCHW", "Cos_CNyx_NHWC", "Cos_Nyxc_NHWC", "Cosh_NHWC", "Cosh_NCHW", "Cosh_CNyx_NHWC",
|
||||
"Cosh_Nyxc_NHWC", "Sin_NHWC", "Sin_NCHW", "Sin_CNyx_NHWC", "Sin_Nyxc_NHWC", "Sinh_NHWC",
|
||||
"Sinh_NCHW", "Sinh_CNyx_NHWC", "Sinh_Nyxc_NHWC", "Tan_NHWC", "Tan_NCHW", "Tan_CNyx_NHWC",
|
||||
"Tan_Nyxc_NHWC", "Erf_NHWC", "Erf_NCHW", "Erf_CNyx_NHWC", "Erf_Nyxc_NHWC"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Broadcast_NHWC",
|
||||
new[]
|
||||
{
|
||||
"BroadcastAdd_NHWC", "BroadcastSub_NHWC", "BroadcastMul_NHWC", "BroadcastDiv_NHWC",
|
||||
"BroadcastPow_NHWC", "BroadcastMin_NHWC", "BroadcastMax_NHWC", "BroadcastMean_NHWC",
|
||||
"BroadcastGreater_NHWC", "BroadcastGreaterEqual_NHWC", "BroadcastLess_NHWC",
|
||||
"BroadcastLessEqual_NHWC", "BroadcastEqual_NHWC", "BroadcastLogicalOr_NHWC",
|
||||
"BroadcastLogicalAnd_NHWC", "BroadcastLogicalXor_NHWC", "BroadcastWhere_NHWC",
|
||||
"BroadcastDivExpSub_NHWC", "LogSoftmaxEnd_NHWC"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Broadcast_NCHW",
|
||||
new[]
|
||||
{
|
||||
"BroadcastAdd_NCHW", "BroadcastSub_NCHW", "BroadcastMul_NCHW", "BroadcastDiv_NCHW",
|
||||
"BroadcastPow_NCHW", "BroadcastMin_NCHW", "BroadcastMax_NCHW", "BroadcastMean_NCHW",
|
||||
"BroadcastGreater_NCHW", "BroadcastGreaterEqual_NCHW", "BroadcastLess_NCHW",
|
||||
"BroadcastLessEqual_NCHW", "BroadcastEqual_NCHW", "BroadcastLogicalOr_NCHW",
|
||||
"BroadcastLogicalAnd_NCHW", "BroadcastLogicalXor_NCHW", "BroadcastWhere_NCHW",
|
||||
"BroadcastDivExpSub_NCHW", "LogSoftmaxEnd_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Conv2dA_NHWC",
|
||||
new[]
|
||||
{
|
||||
"Conv2D_NHWC", "Conv2D_RegisterBlock4x2_NHWC", "DepthwiseConv2D_NHWC",
|
||||
"Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NHWC", "Conv2DKernelKxK_T16x16_R4x4_NHWC",
|
||||
"Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NHWC"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Conv2dA_NCHW",
|
||||
new[]
|
||||
{
|
||||
"Conv2D_NCHW", "Conv2D_RegisterBlock4x2_NCHW", "DepthwiseConv2D_NCHW",
|
||||
"Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NCHW", "Conv2DKernelKxK_T16x16_R4x4_NCHW",
|
||||
"Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Conv2dBase",
|
||||
new[]
|
||||
{
|
||||
"Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NHWC",
|
||||
"Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NCHW",
|
||||
"Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NHWC", "Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NCHW",
|
||||
"Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NHWC",
|
||||
"Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NCHW",
|
||||
"Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NCHW",
|
||||
"Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NCHW",
|
||||
"Conv2DTrans_NHWC", "Conv2DTrans_NCHW", "Conv2DTrans_KernelCached_K5x5_T16x16_NHWC",
|
||||
"Conv2DTrans_KernelCached_K5x5_T16x16_NCHW", "Conv2DTransFlipKernel", "Conv2DTransPadFill_NHWC",
|
||||
"Conv2DTransPadFill_NCHW", "KernelWinograd_3x3",
|
||||
"Conv2DWinograd_2x2_Kernel3x3_StrictC8StrictK16_T16x16_R4x4_NCHW",
|
||||
"Conv2DWinograd_2x2_Kernel3x3_StrictC8LaxK16_T16x16_R4x4_NCHW"
|
||||
});
|
||||
RegisterKernels("Barracuda/Conv2dMobile",
|
||||
new[]
|
||||
{
|
||||
//"Conv2D_Default_T8x8_R4x4_NHWC",
|
||||
//"Conv2D_Default_T8x8_R4x4_NHWC",
|
||||
"Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
|
||||
"Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
|
||||
//"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
|
||||
//"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
|
||||
//"Conv2D_Kernel1x1_1x4x4_NHWC",
|
||||
//"Conv2D_Kernel1x1_1x4x4_NCHW",
|
||||
"Conv2D_KernelKxK_T16x16_R4x4_NHWC",
|
||||
"Conv2D_KernelKxK_T16x16_R4x4_NCHW",
|
||||
"Conv2D_Kernel1x1_T16x16_R4x4_NHWC",
|
||||
"Conv2D_Kernel1x1_T16x16_R4x4_NCHW",
|
||||
"Conv2D_KernelKxK_T8x8_R4x4_NHWC",
|
||||
"Conv2D_KernelKxK_T8x8_R4x4_NCHW",
|
||||
"Conv2D_Kernel1x1_T8x8_R4x4_NHWC",
|
||||
"Conv2D_Kernel1x1_T8x8_R4x4_NCHW",
|
||||
"DepthwiseConv2D_Default_NHWC",
|
||||
"DepthwiseConv2D_Default_NCHW",
|
||||
"DepthwiseConv2D_Winograd_2x2_Kernel3x3_NHWC",
|
||||
"DepthwiseConv2D_Winograd_2x2_Kernel3x3_NCHW",
|
||||
//"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NHWC",
|
||||
//"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NCHW",
|
||||
//"KernelWinograd_5x5"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Conv3d",
|
||||
new[]
|
||||
{
|
||||
"Conv3D_NHWC", "Conv3D_NCHW", "Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NHWC",
|
||||
"Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NCHW", "Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NHWC",
|
||||
"Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NCHW",
|
||||
"Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NHWC",
|
||||
"Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Dense",
|
||||
new[]
|
||||
{
|
||||
"Dense_L1Cached64", "DenseTiled16x16", "DenseTiled32x32", "DenseTiled64x64", "Dense_T8x8_R4x4",
|
||||
"Dense_T16x16_R4x4", "Dense_Tilled2x2_Cached", "Dense_Tilled4x4_Cached", "MatMulPackB0Bias",
|
||||
"Dense_V_L1Cached64"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/MatMul",
|
||||
new[]
|
||||
{
|
||||
"MultidimMatMul_T16x16_R4x4_AR3_BR2_NHWC", "MultidimMatMul_T16x16_R4x4_AR3_BR2_NCHW",
|
||||
"MultidimMatMul_T8x8_R8x8_AR3_BR2_NHWC", "MultidimMatMul_T8x8_R8x8_AR3_BR2_NCHW",
|
||||
"MultidimMatMul_L1Cached64_AR3_BR2_NHWC", "MultidimMatMul_L1Cached64_AR3_BR2_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Dense3",
|
||||
new[]
|
||||
{
|
||||
"Dense3_T8x8_R8x8_NHWC", "Dense3_T8x8_R8x8_NCHW",
|
||||
"Dense3_T8x16_R4x4_NHWC", "Dense3_T8x16_R4x4_NCHW",
|
||||
"Dense3_L1Cached64_NHWC", "Dense3_L1Cached64_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Generic",
|
||||
new[]
|
||||
{
|
||||
"ScaleBias_NHWC", "ScaleBias_NCHW", "ScaleBias_CNyx_NHWC", "ScaleBias_CNyx2_NHWC",
|
||||
"ScaleBias_Flat_NHWC", "ScaleBias_Flat_NCHW", "ScaleBias_Loop_NHWC", "ScaleBias_Loop_NCHW",
|
||||
"InstanceNormTail_CNyx2_NHWC", "InstanceNormTail_Flat_NHWC", "InstanceNormTail_Flat_NCHW",
|
||||
"InstanceNormTail_Loop_NHWC", "InstanceNormTail_Loop_NCHW", "Upsample2D_NHWC", "Upsample2D_NCHW",
|
||||
"UpsampleBilinear2D_NHWC", "UpsampleBilinear2D_NCHW", "UpsampleBilinear2D_2x2_NHWC",
|
||||
"UpsampleBilinear2D_2x2_NCHW", "Copy_NHWC", "Copy_NCHW", "ReshapeFromNHWCModel_Flat_NCHW",
|
||||
"ReshapeFromNHWCModel_Loop_NCHW", "TransposeToChannelFirst"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Pad",
|
||||
new[]
|
||||
{
|
||||
"Border2D_NHWC", "Border2D_NCHW", "Pad2DEdge_NHWC", "Pad2DEdge_NCHW", "Pad2DReflect_NHWC",
|
||||
"Pad2DReflect_NCHW", "Pad2DSymmetric_NHWC", "Pad2DSymmetric_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Transpose",
|
||||
new[]
|
||||
{
|
||||
"Transpose2D_NHWC","Transpose2D_NCHW","Transpose_NHWC","Transpose_NCHW","Transpose8D"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Pool_NHWC",
|
||||
new[]
|
||||
{
|
||||
"AvgPool2D_NHWC", "MaxPool2D_NHWC", "AvgPool2DReduce_NHWC", "MaxPool2DReduce_NHWC",
|
||||
"GlobalAvgPool2D_NHWC", "GlobalMaxPool2D_NHWC", "AvgVariancePool2DReduce_NHWC",
|
||||
"GlobalAvgVariancePool2D_NHWC"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Pool_NCHW",
|
||||
new[]
|
||||
{
|
||||
"AvgPool2D_NCHW", "MaxPool2D_NCHW", "AvgPool2DReduce_NCHW", "MaxPool2DReduce_NCHW",
|
||||
"GlobalAvgPool2D_NCHW", "GlobalMaxPool2D_NCHW", "AvgVariancePool2DReduce_NCHW",
|
||||
"GlobalAvgVariancePool2D_NCHW"
|
||||
});
|
||||
|
||||
RegisterKernels("Barracuda/Reduce",
|
||||
new[]
|
||||
{
|
||||
"PartialReduceMin", "PartialReduceMin_Loop",
|
||||
"GlobalReduceMin", "GlobalReduceMin_Loop",
|
||||
|
||||
"PartialReduceMax", "PartialReduceMax_Loop",
|
||||
"GlobalReduceMax", "GlobalReduceMax_Loop",
|
||||
|
||||
"PartialReduceSum", "PartialReduceSum_Loop",
|
||||
"GlobalReduceSum", "GlobalReduceSum_Loop",
|
||||
|
||||
"PartialReduceMean", "PartialReduceMean_Loop",
|
||||
"GlobalReduceMean", "GlobalReduceMean_Loop",
|
||||
|
||||
"PartialReduceProd", "PartialReduceProd_Loop",
|
||||
"GlobalReduceProd", "GlobalReduceProd_Loop",
|
||||
|
||||
"PartialReduceExpBias", "PartialReduceExpBias_Loop",
|
||||
"GlobalReduceExpBias", "GlobalReduceExpBias_Loop"
|
||||
});
|
||||
RegisterKernels("Barracuda/ReduceSlow",
|
||||
new[]
|
||||
{
|
||||
"ArgMax_NHWC", "ArgMax_NCHW", "ArgMin_NHWC", "ArgMin_NCHW"
|
||||
});
|
||||
}
|
||||
|
||||
private void RegisterKernels(string shaderName, string[] kernels)
|
||||
{
|
||||
foreach (var kernel in kernels)
|
||||
{
|
||||
mKernelToShaderName[kernel] = shaderName;
|
||||
}
|
||||
}
|
||||
|
||||
internal ComputeShader FindComputeShader(ComputeShaderContext ctx, string kernelName)
|
||||
{
|
||||
if (ctx == ComputeShaderContext.Optimized)
|
||||
return FindOptimizedComputeShader(kernelName);
|
||||
|
||||
return FindReferenceComputeShader(kernelName);
|
||||
}
|
||||
|
||||
private ComputeShader FindReferenceComputeShader(string kernelName)
|
||||
{
|
||||
if (EnableDebug) mUsedReferenceKernels.Add(kernelName);
|
||||
|
||||
return FindComputeShader("Barracuda/BarracudaReferenceImpl");
|
||||
}
|
||||
|
||||
private ComputeShader FindOptimizedComputeShader(string kernelName)
|
||||
{
|
||||
string shaderName = null;
|
||||
mKernelToShaderName.TryGetValue(kernelName, out shaderName);
|
||||
|
||||
// Kernel not found
|
||||
if (shaderName == null)
|
||||
return null;
|
||||
|
||||
if (EnableDebug) mUsedOptimizedKernels.Add(kernelName);
|
||||
|
||||
return FindComputeShader(shaderName);
|
||||
}
|
||||
|
||||
private ComputeShader FindComputeShader(string shaderName)
|
||||
{
|
||||
if (!mShaderNameToComputeShader.ContainsKey(shaderName))
|
||||
{
|
||||
Profiler.BeginSample(shaderName);
|
||||
mShaderNameToComputeShader[shaderName] = Resources.Load<ComputeShader>(shaderName);
|
||||
Profiler.EndSample();
|
||||
}
|
||||
|
||||
return mShaderNameToComputeShader[shaderName];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Warmup reference kernels
|
||||
/// </summary>
|
||||
/// <param name="kernels">list of kernels to warm up</param>
|
||||
/// <returns>IEnumerator</returns>
|
||||
public IEnumerator WarmupReferenceKernels(List<string> kernels)
|
||||
{
|
||||
if (kernels?.Count > 0)
|
||||
FindComputeShader("Barracuda/BarracudaReferenceImpl");
|
||||
|
||||
yield break;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Warmup optimized kernels
|
||||
/// </summary>
|
||||
/// <param name="kernels">list of kernels to warm up</param>
|
||||
/// <returns>IEnumerator</returns>
|
||||
public IEnumerator WarmupOptimizedKernels(List<string> kernels)
|
||||
{
|
||||
foreach (var kernel in kernels)
|
||||
{
|
||||
var shader = mKernelToShaderName[kernel];
|
||||
if (!mShaderNameToComputeShader.ContainsKey(shader))
|
||||
{
|
||||
FindComputeShader(shader);
|
||||
yield return null;
|
||||
}
|
||||
}
|
||||
yield break;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get used reference kernels list
|
||||
/// </summary>
|
||||
/// <returns>list of kernels</returns>
|
||||
public List<string> GetUsedReferenceKernels()
|
||||
{
|
||||
if (!EnableDebug)
|
||||
{
|
||||
D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
|
||||
return null;
|
||||
}
|
||||
|
||||
return mUsedReferenceKernels.ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get used optimized kernels list
|
||||
/// </summary>
|
||||
/// <returns>list of kernels</returns>
|
||||
public List<string> GetUsedOptimizedKernels()
|
||||
{
|
||||
if (!EnableDebug)
|
||||
{
|
||||
D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
|
||||
return null;
|
||||
}
|
||||
|
||||
return mUsedOptimizedKernels.ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Singleton
|
||||
/// </summary>
|
||||
public static ComputeShaderSingleton Instance {
|
||||
get { return instance; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Check if GPU compute is supported
|
||||
/// </summary>
|
||||
public bool supported { get { return SystemInfo.supportsComputeShaders; } }
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 815b6432da283415d87dabe9ef715cd9
|
||||
timeCreated: 1495620775
|
||||
licenseType: Pro
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f7473266805a8439287433d3dac88945
|
||||
timeCreated: 1506427659
|
||||
licenseType: Pro
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,758 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq; // ToArray(), ToDictionary()
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
internal class LinearLayerFusing
|
||||
{
|
||||
public static bool IsLayerLinear(Layer layer, Dictionary<string, Layer> constantLayers)
|
||||
{
|
||||
var constInputs = layer.inputs.Count(x => constantLayers.ContainsKey(x));
|
||||
bool allConstInputsButOne = (layer.inputs.Length - constInputs) == 1;
|
||||
|
||||
return layer.type == Layer.Type.Dense ||
|
||||
layer.type == Layer.Type.Conv2D || //TODO Conv3D
|
||||
layer.type == Layer.Type.DepthwiseConv2D ||
|
||||
layer.type == Layer.Type.ScaleBias ||
|
||||
IsLayerLinearMathOp(layer) && allConstInputsButOne;
|
||||
}
|
||||
|
||||
public static bool IsLayerLinearMathOp(Layer layer)
|
||||
{
|
||||
return layer.type == Layer.Type.Add ||
|
||||
layer.type == Layer.Type.Mul;
|
||||
}
|
||||
|
||||
public bool AreLayersFusable(Layer l0, Layer l1)
|
||||
{
|
||||
bool conditions = true;
|
||||
if ((l0.type == Layer.Type.DepthwiseConv2D) || (l0.type == Layer.Type.Conv2D) || (l0.type == Layer.Type.ScaleBias) &&
|
||||
(l1.type == Layer.Type.Conv2D) || (l1.type == Layer.Type.DepthwiseConv2D))
|
||||
conditions = conditions && !l1.pad.Any(x => x != 0); // padding breaks bias merging for non-zero bias
|
||||
if (IsLayerLinearMathOp(l0) && (l1.type == Layer.Type.Conv2D))
|
||||
{
|
||||
if (l0.datasets == null || l0.datasets.Length != 1)
|
||||
return false;
|
||||
conditions = conditions && (l0.datasets[0].shape.length == 1) ||
|
||||
(l0.datasets[0].shape.batch == 1 && l0.datasets[0].shape.height == 1 && l0.datasets[0].shape.width == 1 && l0.datasets[0].shape.channels == l1.datasets[0].shape.kernelCount);
|
||||
}
|
||||
if ((l0.type == Layer.Type.Conv2D) && IsLayerLinearMathOp(l1))
|
||||
{
|
||||
if (l1.datasets == null || l1.datasets.Length != 1)
|
||||
return false;
|
||||
conditions = conditions && (l1.datasets[0].shape.length == 1) ||
|
||||
(l1.datasets[0].shape.batch == 1 && l1.datasets[0].shape.height == 1 && l1.datasets[0].shape.width == 1 && l1.datasets[0].shape.channels == l0.datasets[0].shape.kernelCount);
|
||||
}
|
||||
|
||||
return m_LayerFusers.ContainsKey((l0.type, l1.type)) && conditions;
|
||||
}
|
||||
|
||||
private readonly BurstCPUOps m_Ops = new BurstCPUOps();
|
||||
|
||||
private readonly Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>> m_LayerFusers =
|
||||
new Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>>();
|
||||
|
||||
private void Add((Layer.Type, Layer.Type) layersType, Func<Layer, Layer, Layer> opFuseAction)
|
||||
{
|
||||
m_LayerFusers.Add(layersType, opFuseAction);
|
||||
}
|
||||
public LinearLayerFusing()
|
||||
{
|
||||
Add((Layer.Type.Add, Layer.Type.Add), (l0, l1) =>
|
||||
{
|
||||
Tensor bias0 = l0.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(0);
|
||||
|
||||
int rankO = Math.Max(bias0.dimensions, bias1.dimensions);
|
||||
if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
|
||||
{
|
||||
// broadcast rule
|
||||
int rank0 = l0.axis;
|
||||
List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias0.shape, rank0);
|
||||
rank0 = Math.Max(rank0, 1);
|
||||
int rank1 = l1.axis;
|
||||
List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias1.shape, rank1);
|
||||
rank1 = Math.Max(rank1, 1);
|
||||
|
||||
rankO = Math.Max(rank0, rank1);
|
||||
for (int k = 0; k < rankO - rank0; k++)
|
||||
shape0.Insert(0, 1);
|
||||
for (int k = 0; k < rankO - rank1; k++)
|
||||
shape1.Insert(0, 1);
|
||||
|
||||
bias0 = bias0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
|
||||
bias1 = bias1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
|
||||
}
|
||||
|
||||
TensorShape biasShape = TensorExtensions.MaxShape(new [] { bias0, bias1 });
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = new Layer.DataSet[1];
|
||||
lmerged.datasets[0].name = l0.datasets[0].name;
|
||||
lmerged.datasets[0].shape = biasShape;
|
||||
lmerged.datasets[0].itemSizeInBytes = 4;
|
||||
lmerged.datasets[0].length = biasShape.length;
|
||||
lmerged.datasets[0].offset = 0;
|
||||
lmerged.weights = new BarracudaArray(biasShape.length);
|
||||
lmerged.axis = rankO;
|
||||
|
||||
Tensor bias = m_Ops.Add(new [] { bias0, bias1 });
|
||||
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
|
||||
|
||||
bias.Dispose();
|
||||
bias0.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Mul, Layer.Type.Mul), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
|
||||
int rankO = Math.Max(scale0.dimensions, scale1.dimensions);
|
||||
if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
|
||||
{
|
||||
// broadcast rule
|
||||
int rank0 = l0.axis;
|
||||
List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale0.shape, rank0);
|
||||
rank0 = Math.Max(rank0, 1);
|
||||
int rank1 = l1.axis;
|
||||
List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale1.shape, rank1);
|
||||
rank1 = Math.Max(rank1, 1);
|
||||
|
||||
rankO = Math.Max(rank0, rank1);
|
||||
for (int k = 0; k < rankO - rank0; k++)
|
||||
shape0.Insert(0, 1);
|
||||
for (int k = 0; k < rankO - rank1; k++)
|
||||
shape1.Insert(0, 1);
|
||||
|
||||
scale0 = scale0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
|
||||
scale1 = scale1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
|
||||
}
|
||||
|
||||
TensorShape biasShape = TensorExtensions.MaxShape(new[] { scale0, scale1 });
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = new Layer.DataSet[1];
|
||||
lmerged.datasets[0].name = l0.datasets[0].name;
|
||||
lmerged.datasets[0].shape = biasShape;
|
||||
lmerged.datasets[0].itemSizeInBytes = 4;
|
||||
lmerged.datasets[0].length = biasShape.length;
|
||||
lmerged.datasets[0].offset = 0;
|
||||
lmerged.weights = new BarracudaArray(biasShape.length);
|
||||
lmerged.axis = rankO;
|
||||
|
||||
Tensor bias = m_Ops.Mul(new[] { scale0, scale1 });
|
||||
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
|
||||
|
||||
bias.Dispose();
|
||||
scale0.Dispose();
|
||||
scale1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.ScaleBias, Layer.Type.ScaleBias), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// s1*(s0*x + b0)+b1 = s1*s0*x + s1*b0+b1
|
||||
Tensor scale = m_Ops.Mul(new [] { scale1, scale0});
|
||||
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
|
||||
|
||||
BarracudaArray.Copy(scale.ToReadOnlyArray(), 0, lmerged.weights, 0, scale.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, scale.length, bias.length);
|
||||
|
||||
scale.Dispose();
|
||||
bias.Dispose();
|
||||
scale0.Dispose();
|
||||
bias0.Dispose();
|
||||
scale1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.ScaleBias, Layer.Type.Dense), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor weights1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l1.datasets;
|
||||
lmerged.weights = new BarracudaArray(l1.weights.Length);
|
||||
|
||||
// b = W1 x b0 + b1
|
||||
Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
|
||||
|
||||
// W = W1 x s
|
||||
Tensor weights = new Tensor(weights1.shape);
|
||||
for (int x = 0; x < weights1.flatWidth; ++x)
|
||||
for (int i = 0; i < weights1.flatHeight; ++i)
|
||||
{
|
||||
int c = i % bias0.length;
|
||||
float gamma = scale0[c];
|
||||
|
||||
float w = weights1[i, x];
|
||||
weights[i, x] = w * gamma;
|
||||
}
|
||||
|
||||
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
|
||||
|
||||
bias.Dispose();
|
||||
weights.Dispose();
|
||||
scale0.Dispose();
|
||||
bias0.Dispose();
|
||||
weights1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Dense, Layer.Type.ScaleBias), (l0, l1) =>
|
||||
{
|
||||
Tensor weights0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// w = s1*w0
|
||||
Tensor weights = m_Ops.Mul(new [] { scale1, weights0 });
|
||||
// b = s1*b0+b1
|
||||
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
|
||||
|
||||
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
|
||||
|
||||
weights.Dispose();
|
||||
bias.Dispose();
|
||||
weights0.Dispose();
|
||||
bias0.Dispose();
|
||||
scale1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Mul, Layer.Type.Conv2D), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
|
||||
Tensor kernel1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.pad = l1.pad;
|
||||
lmerged.stride = l1.stride;
|
||||
lmerged.pool = l1.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l1.datasets;
|
||||
lmerged.weights = new BarracudaArray(l1.weights.Length);
|
||||
|
||||
// k = k * s
|
||||
Tensor kernel = new Tensor(kernel1.shape);
|
||||
|
||||
for (int y = 0; y < kernel1.kernelHeight; ++y)
|
||||
for (int x = 0; x < kernel1.kernelWidth; ++x)
|
||||
for (int c = 0; c < kernel1.kernelDepth; ++c)
|
||||
{
|
||||
float gamma = scale0[scale0.IndexWithBroadcast(0, 0, 0, c)];
|
||||
for (int k = 0; k < kernel1.kernelCount; ++k)
|
||||
{
|
||||
float w = kernel1[y, x, c, k];
|
||||
kernel[y, x, c, k] = gamma * w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias1.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias1.length);
|
||||
|
||||
kernel.Dispose();
|
||||
scale0.Dispose();
|
||||
kernel1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Conv2D, Layer.Type.Mul), (l0, l1) =>
|
||||
{
|
||||
Tensor kernel0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.pad = l0.pad;
|
||||
lmerged.stride = l0.stride;
|
||||
lmerged.pool = l0.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// k = s1*k0
|
||||
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
|
||||
// b = s1*b0
|
||||
Tensor bias = m_Ops.Mul(new[] { scale1, bias0 });
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
kernel0.Dispose();
|
||||
bias0.Dispose();
|
||||
scale1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Add, Layer.Type.Conv2D), (l0, l1) =>
|
||||
{
|
||||
Tensor bias0 = l0.DataSetToTensor(0);
|
||||
|
||||
Tensor kernel1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.pad = l1.pad;
|
||||
lmerged.stride = l1.stride;
|
||||
lmerged.pool = l1.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l1.datasets;
|
||||
lmerged.weights = new BarracudaArray(l1.weights.Length);
|
||||
|
||||
// k = k
|
||||
// b = Sum_k[wk * beta] + b
|
||||
Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
|
||||
for (int y = 0; y < kernel1.kernelHeight; ++y)
|
||||
for (int x = 0; x < kernel1.kernelWidth; ++x)
|
||||
for (int c = 0; c < kernel1.kernelDepth; ++c)
|
||||
{
|
||||
float beta = bias0[bias0.IndexWithBroadcast(0, 0, 0, c)];
|
||||
for (int k = 0; k < kernel1.kernelCount; ++k)
|
||||
{
|
||||
float w = kernel1[y, x, c, k];
|
||||
bias[k] += w * beta;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BarracudaArray.Copy(kernel1.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel1.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel1.length, bias.length);
|
||||
|
||||
bias.Dispose();
|
||||
bias0.Dispose();
|
||||
kernel1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Conv2D, Layer.Type.Add), (l0, l1) =>
|
||||
{
|
||||
Tensor kernel0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor bias1 = l1.DataSetToTensor(0);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.pad = l0.pad;
|
||||
lmerged.stride = l0.stride;
|
||||
lmerged.pool = l0.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// b = b0+b1
|
||||
Tensor bias = m_Ops.Add( new [] { bias0, bias1 });
|
||||
|
||||
BarracudaArray.Copy(kernel0.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel0.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel0.length, bias.length);
|
||||
|
||||
bias.Dispose();
|
||||
kernel0.Dispose();
|
||||
bias0.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Conv2D, Layer.Type.ScaleBias), (l0, l1) =>
|
||||
{
|
||||
Tensor kernel0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.pad = l0.pad;
|
||||
lmerged.stride = l0.stride;
|
||||
lmerged.pool = l0.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// k = s1*k0
|
||||
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
|
||||
// b = s1*b0+b1
|
||||
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
kernel0.Dispose();
|
||||
bias0.Dispose();
|
||||
scale1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.ScaleBias, Layer.Type.Conv2D), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor kernel1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.pad = l1.pad;
|
||||
lmerged.stride = l1.stride;
|
||||
lmerged.pool = l1.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l1.datasets;
|
||||
lmerged.weights = new BarracudaArray(l1.weights.Length);
|
||||
|
||||
// k = k * s
|
||||
Tensor kernel = new Tensor(kernel1.shape);
|
||||
// b = Sum_k[wk * beta] + b
|
||||
Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
|
||||
for (int y = 0; y < kernel1.kernelHeight; ++y)
|
||||
for (int x = 0; x < kernel1.kernelWidth; ++x)
|
||||
for (int c = 0; c < kernel1.kernelDepth; ++c)
|
||||
{
|
||||
float beta = bias0[0, 0, 0, c];
|
||||
float gamma = scale0[0, 0, 0, c];
|
||||
for (int k = 0; k < kernel1.kernelCount; ++k)
|
||||
{
|
||||
float w = kernel1[y, x, c, k];
|
||||
kernel[y, x, c, k] = gamma * w;
|
||||
bias[k] += w * beta;
|
||||
}
|
||||
}
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
scale0.Dispose();
|
||||
bias0.Dispose();
|
||||
kernel1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.DepthwiseConv2D, Layer.Type.ScaleBias), (l0, l1) =>
|
||||
{
|
||||
Tensor kernel0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor scale1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l0.type);
|
||||
lmerged.pad = l0.pad;
|
||||
lmerged.stride = l0.stride;
|
||||
lmerged.pool = l0.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l0.datasets;
|
||||
lmerged.weights = new BarracudaArray(l0.weights.Length);
|
||||
|
||||
// k = s1*k0
|
||||
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
|
||||
// b = s1*b0+b1
|
||||
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
kernel0.Dispose();
|
||||
bias0.Dispose();
|
||||
scale1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.ScaleBias, Layer.Type.DepthwiseConv2D), (l0, l1) =>
|
||||
{
|
||||
Tensor scale0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
Tensor kernel1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.pad = l1.pad;
|
||||
lmerged.stride = l1.stride;
|
||||
lmerged.pool = l1.pool;
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = l1.datasets;
|
||||
lmerged.weights = new BarracudaArray(l1.weights.Length);
|
||||
|
||||
// k = k * s
|
||||
Tensor kernel = new Tensor(kernel1.shape);
|
||||
// b = Sum_k[wk * beta] + b
|
||||
Tensor bias = new Tensor(bias1.shape);
|
||||
for (int k = 0; k < kernel1.kernelCount; ++k)
|
||||
{
|
||||
float b = bias1[k];
|
||||
|
||||
float beta = bias0[0, 0, 0, k];
|
||||
float gamma = scale0[0, 0, 0, k];
|
||||
for (int y = 0; y < kernel1.kernelHeight; ++y)
|
||||
for (int x = 0; x < kernel1.kernelWidth; ++x)
|
||||
{
|
||||
float w = kernel1[y, x, 0, k];
|
||||
kernel[y, x, 0, k] = gamma * w;
|
||||
b += w * beta;
|
||||
}
|
||||
|
||||
bias[k] = b;
|
||||
}
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
scale0.Dispose();
|
||||
bias0.Dispose();
|
||||
kernel1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Dense, Layer.Type.Dense), (l0, l1) =>
|
||||
{
|
||||
var weights0 = l0.DataSetToTensor(0);
|
||||
var bias0 = l0.DataSetToTensor(1);
|
||||
|
||||
var weights1 = l1.DataSetToTensor(0);
|
||||
var bias1 = l1.DataSetToTensor(1);
|
||||
|
||||
TensorShape weightsShape = new TensorShape(weights0.shape.flatHeight, weights1.shape.flatWidth);
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.datasets = new Layer.DataSet[2];
|
||||
lmerged.datasets[0].name = weights0.name;
|
||||
lmerged.datasets[0].shape = weightsShape;
|
||||
lmerged.datasets[0].itemSizeInBytes = 4;
|
||||
lmerged.datasets[0].length = weightsShape.length;
|
||||
lmerged.datasets[0].offset = 0;
|
||||
|
||||
lmerged.datasets[1].name = bias0.name;
|
||||
lmerged.datasets[1].shape = bias1.shape;
|
||||
lmerged.datasets[1].itemSizeInBytes = 4;
|
||||
lmerged.datasets[1].length = bias1.length;
|
||||
lmerged.datasets[1].offset = weightsShape.length;
|
||||
lmerged.weights = new BarracudaArray(weightsShape.length + bias1.shape.length);
|
||||
|
||||
// W = W1 x W0
|
||||
Tensor weights = m_Ops.MatMul(weights0, false, weights1, false);
|
||||
// b = W1 x b0 + b1
|
||||
Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
|
||||
|
||||
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
|
||||
|
||||
weights.Dispose();
|
||||
bias.Dispose();
|
||||
weights0.Dispose();
|
||||
bias0.Dispose();
|
||||
weights1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
Add((Layer.Type.Conv2D, Layer.Type.Conv2D), (l0, l1) =>
|
||||
{
|
||||
Tensor kernel0 = l0.DataSetToTensor(0);
|
||||
Tensor bias0 = l0.DataSetToTensor(1);
|
||||
var strides0 = l0.stride;
|
||||
var pad0 = l0.pad;
|
||||
|
||||
Tensor kernel1 = l1.DataSetToTensor(0);
|
||||
Tensor bias1 = l1.DataSetToTensor(1);
|
||||
var strides1 = l1.stride;
|
||||
var pad1 = l1.pad;
|
||||
|
||||
|
||||
// Y = (X * K0 + b0) * K1 + b1
|
||||
// = (X * K0) * K1 + (b0 * K1 + b1)
|
||||
// = X * (K0 * k1) + (b0 * K1 + b1)
|
||||
// = X * K2 + b2
|
||||
// K2 dimensions:
|
||||
// kernelDepth and kernelCount:
|
||||
// X = [n, . , . , c0], K0 = [ . , . , c0, d0] , K1 = [ . , . , c1, d1]
|
||||
// => Km = [ x , x , c0, d1]
|
||||
// kernelHeight and kernelHeight:
|
||||
// Y = (((X + 2*p0 - k0)/s0 + 1) + 2*p1 - k1)/s1 + 1
|
||||
// = ((X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0)/s0)/s1 + 1
|
||||
// = (X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0) / (s0*s1) + 1
|
||||
// = (X + 2*(p0+p1*s0) - (k0 + k1*s0 - s0)) / (s0*s1) + 1
|
||||
// => pad = p0 + p1*s0
|
||||
// kernel = k0 + s0*(k1 - 1)
|
||||
// stride = s0*s1
|
||||
TensorShape kernelShape = new TensorShape(kernel0.kernelHeight + (kernel1.kernelHeight - 1) * strides0[0],
|
||||
kernel0.kernelWidth + (kernel1.kernelWidth - 1) * strides0[1],
|
||||
kernel0.kernelDepth, kernel1.kernelCount);
|
||||
|
||||
var pad = new int[4] { pad0[0] + pad1[0] * strides0[0], pad0[1] + pad1[1] * strides0[1],
|
||||
pad0[2] + pad1[2] * strides0[0], pad0[3] + pad1[3] * strides0[1] };
|
||||
var strides = new int[2] { strides0[0] * strides1[0], strides0[1] * strides1[1] };
|
||||
|
||||
TensorShape biasShape = bias1.shape;
|
||||
|
||||
|
||||
Layer lmerged = new Layer(l0.name, l1.type);
|
||||
lmerged.inputs = l0.inputs;
|
||||
lmerged.stride = strides;
|
||||
lmerged.pad = pad;
|
||||
lmerged.datasets = new Layer.DataSet[2];
|
||||
lmerged.datasets[0].name = kernel0.name;
|
||||
lmerged.datasets[0].shape = kernelShape;
|
||||
lmerged.datasets[0].itemSizeInBytes = 4;
|
||||
lmerged.datasets[0].length = kernelShape.length;
|
||||
lmerged.datasets[0].offset = 0;
|
||||
|
||||
lmerged.datasets[1].name = bias0.name;
|
||||
lmerged.datasets[1].shape = biasShape;
|
||||
lmerged.datasets[1].itemSizeInBytes = 4;
|
||||
lmerged.datasets[1].length = biasShape.length;
|
||||
lmerged.datasets[1].offset = kernelShape.length;
|
||||
lmerged.weights = new BarracudaArray(kernelShape.length + biasShape.length);
|
||||
|
||||
|
||||
Tensor kernel = new Tensor(kernelShape); // 0-filled by default
|
||||
// |x0 x1 x3 | x4 |y0 y1| y2 |z0| z1
|
||||
// |x5 x6 x7 | x8 * k0 k1 => |y3 y4| y5 * l0 l1 => z2 z3
|
||||
// |x9 x10 x11| x12 k2 k3 y6 y7 y8 l2 l3
|
||||
// x13 x14 x15 x13
|
||||
//
|
||||
// in order to compute z0, we need to do 2 convolutions
|
||||
//
|
||||
// |y0 y1/
|
||||
// | |x0 /x1| x3/ |
|
||||
// | |x5 /x6| x7/ |
|
||||
// | x9 x10 x11 |
|
||||
//
|
||||
// |x0 x1| is convolved with K and then * l0
|
||||
// |x5 x6|
|
||||
// /x1 x3/ is convolved with K and then * l1
|
||||
// /x6 x7/
|
||||
//
|
||||
// by unwrapping the whole process
|
||||
// z0 = [x0 * k0 * l0 + x1 * k1 * l0 + ....] + [x1 * k1 * l1 + ....]
|
||||
// l0 * y0-block l1 * y1-block
|
||||
// resulting conv kernel is the following
|
||||
//
|
||||
// z0 = | x0 x1 x3 | * | [k0*l0] [k1*l0 + k1*l1] [l2*l1] |
|
||||
// | x5 x6 x7 | | [k2*l0 + k2*l2] [k3*l0 + k2*l1 + k1*l2 + k0*l3] [k3*l1 + k3*l3] |
|
||||
// | x9 x10 x11 | | [k2*l2] [k2*l0 + k2*l3 [k3*l3] |
|
||||
Tensor kernel0T = m_Ops.Transpose(kernel0, new[] { 2, 0, 1, 3 });
|
||||
Tensor emptyB = new Tensor(new TensorShape(1, 1, 1, kernel.kernelCount));
|
||||
for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
|
||||
for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
|
||||
{
|
||||
Tensor kernel1XY = m_Ops.StridedSlice(kernel1, new[] { y1, x1, 0, 0 }, new[] { y1 + 1, x1 + 1, kernel1.kernelDepth, kernel.kernelCount }, new[] { 1, 1, 1, 1 });
|
||||
Tensor kernelk = m_Ops.Conv2D(kernel0T, kernel1XY, emptyB, new[] { 1, 1 }, new[] { 0, 0, 0, 0 }, Layer.FusedActivation.None);
|
||||
|
||||
for (int y0 = 0; y0 < kernel0.kernelHeight; ++y0)
|
||||
for (int x0 = 0; x0 < kernel0.kernelWidth; ++x0)
|
||||
{
|
||||
int ox = x0 + strides0[0] * x1;
|
||||
int oy = y0 + strides0[1] * y1;
|
||||
for (int c = 0; c < kernel.kernelDepth; ++c)
|
||||
for (int k = 0; k < kernel.kernelCount; ++k)
|
||||
{
|
||||
kernel[oy, ox, c, k] += kernelk[c,y0,x0,k];
|
||||
}
|
||||
}
|
||||
kernel1XY.Dispose();
|
||||
kernelk.Dispose();
|
||||
}
|
||||
|
||||
// |y0 y1| * l0 l1 + bl = z0
|
||||
// |y3 y4| l2 l3
|
||||
// y0 = Sum_k() + bk, y1 = Sum_k() + bk
|
||||
// y2 = Sum_k() + bk, y2 = Sum_k() + bk
|
||||
//
|
||||
// moving b from the convolution process leads
|
||||
// z0 = | x0 x1 x3 | * M + bl + l0*bk + l1*bk + l2*bk + l3*bk
|
||||
// | x5 x6 x7 |
|
||||
// | x9 x10 x11 |
|
||||
// N.B: as you can see this breaks if there is some amount of zero-padding to the second conv layer
|
||||
// because some weights of L will be * 0, essentialy masking out bk
|
||||
Tensor bias = new Tensor(biasShape, bias1.ToReadOnlyArray());
|
||||
for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
|
||||
for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
|
||||
for (int c = 0; c < kernel1.kernelDepth; ++c)
|
||||
{
|
||||
float bias0c = bias0[c];
|
||||
for (var k = 0; k < kernel.kernelCount; ++k)
|
||||
{
|
||||
bias[k] += kernel1[y1, x1, c, k] * bias0c;
|
||||
}
|
||||
}
|
||||
|
||||
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
|
||||
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
|
||||
|
||||
kernel0T.Dispose();
|
||||
emptyB.Dispose();
|
||||
kernel.Dispose();
|
||||
bias.Dispose();
|
||||
kernel0.Dispose();
|
||||
bias0.Dispose();
|
||||
kernel1.Dispose();
|
||||
bias1.Dispose();
|
||||
|
||||
return lmerged;
|
||||
});
|
||||
}
|
||||
|
||||
public Layer FuseLayers(Layer l0, Layer l1)
|
||||
{
|
||||
var fnFuse = m_LayerFusers[(l0.type, l1.type)];
|
||||
return fnFuse(l0, l1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: b940ee731fee3c3478e90a161a7a7288
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,259 +0,0 @@
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading.Tasks;
|
||||
using UnityEngine.Assertions;
|
||||
using UnityEngine.Scripting;
|
||||
|
||||
using Unity.Collections;
|
||||
using Unity.Collections.LowLevel.Unsafe;
|
||||
using Unity.Jobs;
|
||||
|
||||
[assembly: InternalsVisibleTo("Unity.Barracuda.BurstBLAS")]
|
||||
|
||||
namespace Unity.Barracuda
|
||||
{
|
||||
[Preserve]
|
||||
internal class CSharpBLAS : BLASPlugin
|
||||
{
|
||||
public bool IsNative()
|
||||
{
|
||||
return false; // reference implementation
|
||||
}
|
||||
|
||||
public bool IsCurrentPlatformSupported()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public unsafe void SGEMM(float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN, int bs,
|
||||
bool transposeA = false, bool transposeB = false)
|
||||
{
|
||||
MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(Ap, AM, AN, Bp, BM, BN, Cp, CM, CN, bs,
|
||||
transposeA, transposeB);
|
||||
}
|
||||
|
||||
public unsafe JobHandle ScheduleSGEMM(JobHandle dependsOn,
|
||||
float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN,
|
||||
int bs,
|
||||
bool transposeA = false, bool transposeB = false)
|
||||
{
|
||||
var job = new SGEMMJob();
|
||||
job.Ap = Ap; job.AM = AM; job.AN = AN;
|
||||
job.Bp = Bp; job.BM = BM; job.BN = BN;
|
||||
job.Cp = Cp; job.CM = CM; job.CN = CN;
|
||||
job.transposeA = transposeA;
|
||||
job.transposeB = transposeB;
|
||||
job.bs = bs;
|
||||
return job.Schedule(dependsOn);
|
||||
}
|
||||
|
||||
unsafe struct SGEMMJob : IJob
|
||||
{
|
||||
[NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Ap;
|
||||
public int AM, AN;
|
||||
[NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Bp;
|
||||
public int BM, BN;
|
||||
[NativeDisableUnsafePtrRestriction] public unsafe float* Cp;
|
||||
public int CM, CN;
|
||||
public int bs;
|
||||
public bool transposeA;
|
||||
public bool transposeB;
|
||||
|
||||
public void Execute()
|
||||
{
|
||||
MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(
|
||||
Ap, AM, AN,
|
||||
Bp, BM, BN,
|
||||
Cp, CM, CN, bs,
|
||||
transposeA, transposeB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal class MatrixUtils
|
||||
{
|
||||
public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float[] blockOut, int bs, bool transpose = false)
|
||||
{
|
||||
Array.Clear(blockOut, 0, bs * bs);
|
||||
|
||||
var rowFinal = Math.Min(row + bs, M);
|
||||
var count = Math.Min(col + bs, N) - col;
|
||||
|
||||
// @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
|
||||
if (transpose)
|
||||
{
|
||||
// sequential access over blockOut, strided over matrixIn
|
||||
//for (var i = row; i < rowFinal; i++)
|
||||
// for (var j = 0; j < count; ++j)
|
||||
// blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
|
||||
|
||||
// sequential access over matrixIn, strided over blockOut
|
||||
for (var j = 0; j < count; ++j)
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
|
||||
}
|
||||
else
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
{
|
||||
//D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
|
||||
Marshal.Copy((IntPtr)(matrixIn + i * N + col), blockOut, (i - row) * bs, count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static unsafe void ClearFloatArray(float* arr, float val, int count)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
arr[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void CopyFloatArray(float* from, float* to, int count)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
to[i] = from[i];
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float* blockOut, int bs, bool transpose = false)
|
||||
{
|
||||
ClearFloatArray(blockOut, 0, bs * bs);
|
||||
|
||||
var rowFinal = Math.Min(row + bs, M);
|
||||
var count = Math.Min(col + bs, N) - col;
|
||||
|
||||
// @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
|
||||
if (transpose)
|
||||
{
|
||||
// sequential access over blockOut, strided over matrixIn
|
||||
//for (var i = row; i < rowFinal; i++)
|
||||
// for (var j = 0; j < count; ++j)
|
||||
// blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
|
||||
|
||||
// sequential access over matrixIn, strided over blockOut
|
||||
for (var j = 0; j < count; ++j)
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
|
||||
}
|
||||
else
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
{
|
||||
//D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
|
||||
CopyFloatArray(matrixIn + i * N + col, blockOut + (i - row) * bs, count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static unsafe void CopyBlockWithPadding(float[] blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
|
||||
{
|
||||
var rowFinal = Math.Min(row + bs, M);
|
||||
var count = Math.Min(col + bs, N) - col;
|
||||
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
Marshal.Copy(blockOut, (i - row) * bs, (IntPtr)(matrixIn + i * N + col), count);
|
||||
}
|
||||
|
||||
public static unsafe void CopyBlockWithPadding(float* blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
|
||||
{
|
||||
var rowFinal = Math.Min(row + bs, M);
|
||||
var count = Math.Min(col + bs, N) - col;
|
||||
|
||||
for (var i = row; i < rowFinal; i++)
|
||||
CopyFloatArray(blockOut + (i - row) * bs, matrixIn + i * N + col, count);
|
||||
}
|
||||
|
||||
public static unsafe void MultiplyBlockUnrollHx8Padded(float* Ap,
|
||||
float* Bp,
|
||||
float* Cp, int bs)
|
||||
{
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
for (int j = 0; j < bs; j += 8)
|
||||
{
|
||||
int baseC = i * bs + j;
|
||||
float sum0 = *(Cp + baseC);
|
||||
float sum1 = *(Cp + baseC + 1);
|
||||
float sum2 = *(Cp + baseC + 2);
|
||||
float sum3 = *(Cp + baseC + 3);
|
||||
float sum4 = *(Cp + baseC + 4);
|
||||
float sum5 = *(Cp + baseC + 5);
|
||||
float sum6 = *(Cp + baseC + 6);
|
||||
float sum7 = *(Cp + baseC + 7);
|
||||
|
||||
for (int l = 0; l < bs; l++)
|
||||
{
|
||||
float A = Ap[i * bs + l];
|
||||
int baseB = l * bs + j;
|
||||
|
||||
sum0 += A * *(Bp + baseB);
|
||||
sum1 += A * *(Bp + baseB + 1);
|
||||
sum2 += A * *(Bp + baseB + 2);
|
||||
sum3 += A * *(Bp + baseB + 3);
|
||||
sum4 += A * *(Bp + baseB + 4);
|
||||
sum5 += A * *(Bp + baseB + 5);
|
||||
sum6 += A * *(Bp + baseB + 6);
|
||||
sum7 += A * *(Bp + baseB + 7);
|
||||
}
|
||||
|
||||
*(Cp + baseC) = sum0;
|
||||
*(Cp + baseC + 1) = sum1;
|
||||
*(Cp + baseC + 2) = sum2;
|
||||
*(Cp + baseC + 3) = sum3;
|
||||
*(Cp + baseC + 4) = sum4;
|
||||
*(Cp + baseC + 5) = sum5;
|
||||
*(Cp + baseC + 6) = sum6;
|
||||
*(Cp + baseC + 7) = sum7;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void MultiplyBlockUnrollHx8ParallelWithPadding(float* Ap, int AM, int AN,
|
||||
float* Bp, int BM, int BN,
|
||||
float* Cp, int CM, int CN, int bs,
|
||||
bool transposeA = false, bool transposeB = false)
|
||||
{
|
||||
if (transposeA)
|
||||
{
|
||||
var tmp = AM; AM = AN; AN = tmp;
|
||||
}
|
||||
if (transposeB)
|
||||
{
|
||||
var tmp = BM; BM = BN; BN = tmp;
|
||||
}
|
||||
|
||||
int N = AM;
|
||||
{
|
||||
Assert.IsTrue(bs >= 8, "Matrix Mul block size should be >= 8");
|
||||
|
||||
Parallel.For(0, (BN / bs) + (BN % bs > 0 ? 1 : 0), colB =>
|
||||
{
|
||||
float[] blockA = new float[bs * bs];
|
||||
float[] blockB = new float[bs * bs];
|
||||
float[] blockC = new float[bs * bs];
|
||||
|
||||
for (int rowA = 0; rowA < N; rowA += bs)
|
||||
{
|
||||
for (int l = 0; l < AN; l += bs)
|
||||
{
|
||||
|
||||
CopyBlockWithPadding(Ap, rowA, AM, l, AN, blockA, bs, transposeA);
|
||||
CopyBlockWithPadding(Bp, l, BM, colB * bs, BN, blockB, bs, transposeB);
|
||||
CopyBlockWithPadding(Cp, rowA, CM, colB * bs, CN, blockC, bs);
|
||||
|
||||
fixed (float* blockAp = blockA, blockBp = blockB, blockCp = blockC)
|
||||
{
|
||||
MultiplyBlockUnrollHx8Padded(blockAp, blockBp, blockCp, bs);
|
||||
}
|
||||
|
||||
CopyBlockWithPadding(blockC, Cp, rowA, CM, colB * bs, CN, bs);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: bf04fe6d135714369af8cab2915b2735
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,985 +0,0 @@
|
||||
#if ENABLE_BARRACUDA_STATS
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using UnityEngine.Assertions;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
internal static class MemoryAndExecutionReportHelper
|
||||
{
|
||||
public static void GenerateStringReport(StringBuilder stringBuilder, ModelExecutionReport modelExecutionReport,
|
||||
bool spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append($"Number of completed layers : {modelExecutionReport.CompletedLayerExecutionReports.Count}\n");
|
||||
if (modelExecutionReport.CurrentLayerExecutionReport != null)
|
||||
stringBuilder.Append("Warning: last layer was not completed. It will be logged, but it's information might be incomplete or erroneous.\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
List<LayerExecutionReport> allLayerReports = new List<LayerExecutionReport>();
|
||||
allLayerReports.AddRange(modelExecutionReport.CompletedLayerExecutionReports);
|
||||
if (modelExecutionReport.CurrentLayerExecutionReport != null)
|
||||
allLayerReports.Add(modelExecutionReport.CurrentLayerExecutionReport);
|
||||
|
||||
var layerExecutionViews = GenerateExecutionViews(allLayerReports, modelExecutionReport.CompletedLayerExecutionReports.Count);
|
||||
GenerateReportForViews(stringBuilder, layerExecutionViews, spreadSheetFormat, "", false);
|
||||
}
|
||||
|
||||
public static MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, List<MemorySnapshotReport> memorySnapshots,
|
||||
bool spreadSheetFormat)
|
||||
{
|
||||
CollectAllAsFirstSeen(in memorySnapshots,
|
||||
out var allTensorAsFirstSeen,
|
||||
out var allAllocatorAsFirstSeen,
|
||||
out var allTensorDataAsFirstSeen,
|
||||
out var allTempMemoriesAsFirstSeen);
|
||||
|
||||
var summaryViews = GenerateSummaryViews(memorySnapshots, allTensorAsFirstSeen, allTensorDataAsFirstSeen, allTempMemoriesAsFirstSeen, out var memoryPeakSummary);
|
||||
GenerateHeaderForSummaryViews(stringBuilder, summaryViews, spreadSheetFormat);
|
||||
GenerateReportForViews(stringBuilder, summaryViews, spreadSheetFormat, "Tensors allocation and deallocation (diff from previous snapshot):", isSummaryView:true);
|
||||
stringBuilder.Append("\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
var tensorViews = GenerateTensorsViews(memorySnapshots, allTensorAsFirstSeen);
|
||||
GenerateHeaderForTensorViews(stringBuilder, tensorViews, spreadSheetFormat);
|
||||
GenerateReportForViews(stringBuilder, tensorViews, spreadSheetFormat, "All Tensors:", isSummaryView:false);
|
||||
stringBuilder.Append("\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
var allocatorViews = GenerateAllocatorViews(memorySnapshots, allAllocatorAsFirstSeen);
|
||||
GenerateHeaderForAllocatorsViews(stringBuilder, allocatorViews, spreadSheetFormat);
|
||||
GenerateReportForViews(stringBuilder, allocatorViews, spreadSheetFormat, "All Allocators:", isSummaryView:false);
|
||||
stringBuilder.Append("\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
var tensorDatasViews = GenerateTensorDatasViews(memorySnapshots, allTensorDataAsFirstSeen);
|
||||
GenerateHeaderForTensorDatasViews(stringBuilder, tensorDatasViews, spreadSheetFormat);
|
||||
GenerateReportForViews(stringBuilder, tensorDatasViews, spreadSheetFormat, "All TensorDatas:", isSummaryView:false);
|
||||
stringBuilder.Append("\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
var tempMemoriesDatasViews = GenerateTempMemoriesDatasViews(memorySnapshots, allTempMemoriesAsFirstSeen);
|
||||
GenerateHeaderForTempMemoriesViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat);
|
||||
GenerateReportForViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat, "All worker temporary memories:", isSummaryView:false);
|
||||
stringBuilder.Append("\n");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
return memoryPeakSummary;
|
||||
}
|
||||
|
||||
#region `Internal data format` declaration
|
||||
private class SnapshotFields
|
||||
{
|
||||
public readonly string[] Titles;
|
||||
public readonly Dictionary<string, string> Items;
|
||||
|
||||
public SnapshotFields(string[] titles)
|
||||
{
|
||||
Titles = titles;
|
||||
Items = new Dictionary<string, string>();
|
||||
foreach (var title in titles)
|
||||
{
|
||||
Items[title] = "";
|
||||
}
|
||||
}
|
||||
|
||||
public string this[string title]
|
||||
{
|
||||
set {
|
||||
Assert.IsTrue(Items.ContainsKey(title));
|
||||
Assert.IsTrue(Items[title] == "");
|
||||
Items[title] = value;
|
||||
}
|
||||
get => Items[title];
|
||||
}
|
||||
|
||||
public void AddTitlesToReport(StringBuilder stringBuilder, string separator)
|
||||
{
|
||||
foreach (var title in Titles)
|
||||
{
|
||||
stringBuilder.Append(title);
|
||||
stringBuilder.Append(separator);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddValuesToReport(StringBuilder stringBuilder, string separator)
|
||||
{
|
||||
foreach (var title in Titles)
|
||||
{
|
||||
stringBuilder.Append(Items[title]);
|
||||
stringBuilder.Append(separator);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddAllToReport(StringBuilder stringBuilder, string suffix, string prefix="")
|
||||
{
|
||||
bool first = true;
|
||||
foreach (var title in Titles)
|
||||
{
|
||||
if (!first)
|
||||
stringBuilder.Append(suffix);
|
||||
|
||||
stringBuilder.Append(prefix);
|
||||
stringBuilder.Append(title);
|
||||
stringBuilder.Append(": ");
|
||||
stringBuilder.Append(Items[title]);
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class SnapshotFieldsWithContexts
|
||||
{
|
||||
public readonly string[] FieldTitles;
|
||||
public readonly string[] ContextTitles;
|
||||
public SortedDictionary<int, SnapshotFields> Fields { get; }
|
||||
public SortedDictionary<int, SnapshotFields> Contexts { get; }
|
||||
|
||||
public SnapshotFieldsWithContexts(string[] fieldsTitles, string[] contextTitles)
|
||||
{
|
||||
FieldTitles = fieldsTitles;
|
||||
ContextTitles = contextTitles;
|
||||
Contexts = new SortedDictionary<int, SnapshotFields>();
|
||||
Fields = new SortedDictionary<int, SnapshotFields>();
|
||||
}
|
||||
|
||||
public void AddContext(int uniqueId)
|
||||
{
|
||||
Assert.IsFalse(Contexts.ContainsKey(uniqueId));
|
||||
Contexts[uniqueId] = new SnapshotFields(ContextTitles);
|
||||
Fields[uniqueId] = new SnapshotFields(FieldTitles);
|
||||
}
|
||||
|
||||
public void SetContext(int uniqueId, string title, string value)
|
||||
{
|
||||
Assert.IsTrue(Contexts.ContainsKey(uniqueId));
|
||||
Contexts[uniqueId][title] = value;
|
||||
}
|
||||
|
||||
public string this[int uniqueId, string title]
|
||||
{
|
||||
set
|
||||
{
|
||||
Assert.IsTrue(Fields.ContainsKey(uniqueId));
|
||||
Fields[uniqueId][title] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class SnapshotView
|
||||
{
|
||||
public SnapshotFields context;
|
||||
public SnapshotFields summary;
|
||||
public SnapshotFieldsWithContexts sections;
|
||||
|
||||
public SnapshotView(int snapShotIndex, MemorySnapshotReport report)
|
||||
{
|
||||
context = new SnapshotFields( new [] {"Snapshot index", "Type", "Name"} );
|
||||
context["Snapshot index"] = snapShotIndex.ToString();
|
||||
context["Type"] = report.ContextType;
|
||||
context["Name"] = report.ContextName;
|
||||
}
|
||||
|
||||
public SnapshotView(int snapShotIndex, LayerExecutionReport report)
|
||||
{
|
||||
context = new SnapshotFields( new [] {"Layer index", "Type", "Name"} );
|
||||
context["Layer index"] = snapShotIndex.ToString();
|
||||
context["Type"] = report.LayerType;
|
||||
context["Name"] = report.LayerName;
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Helpers to find information in Reports
|
||||
|
||||
private static TempMemoryInfo FindTempMemoryInSnapshot(MemorySnapshotReport memorySnapshot, int tempMemoryId)
|
||||
{
|
||||
return memorySnapshot.TempMemoriesInfo.Find(memoryInfo => memoryInfo.UniqueId == tempMemoryId);
|
||||
}
|
||||
|
||||
private static AllocatorMemoryInfo FindAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int allocatorId)
|
||||
{
|
||||
return memorySnapshot.AllocatorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == allocatorId);
|
||||
}
|
||||
|
||||
|
||||
private static string FindTensorDataAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
|
||||
{
|
||||
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
|
||||
{
|
||||
var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
|
||||
if (foundTensorData != null)
|
||||
return $"{allocatorMemoryInfo.Name} / Id: {allocatorMemoryInfo.UniqueId}";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static TensorDataMemoryInfo FindTensorDataInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
|
||||
{
|
||||
bool MatchTensorDataGuidForTensor(TensorMemoryInfo memoryInfo) =>
|
||||
memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId;
|
||||
|
||||
var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(MatchTensorDataGuidForTensor);
|
||||
if (foundTensor != null)
|
||||
return foundTensor.tensorDataMemoryInfo;
|
||||
|
||||
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
|
||||
{
|
||||
var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
|
||||
if (foundTensorData != null)
|
||||
return foundTensorData;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IEnumerable<TensorMemoryInfo> FindAllTensorsInSnapshotUsingTensorDataId(MemorySnapshotReport memorySnapshot, int tensorDataId)
|
||||
{
|
||||
SortedSet<TensorMemoryInfo> tensors = new SortedSet<TensorMemoryInfo>( Comparer<TensorMemoryInfo>.Create((a, b) => a.UniqueId.CompareTo(b.UniqueId)));
|
||||
|
||||
var foundTensors = memorySnapshot.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
|
||||
tensors.UnionWith(foundTensors);
|
||||
|
||||
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
|
||||
{
|
||||
var allocatorFoundTensor = allocatorMemoryInfo.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
|
||||
tensors.UnionWith(allocatorFoundTensor);
|
||||
}
|
||||
|
||||
return tensors;
|
||||
}
|
||||
|
||||
private static TensorMemoryInfo FindTensorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorId)
|
||||
{
|
||||
var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
|
||||
if (foundTensor != null)
|
||||
return foundTensor;
|
||||
|
||||
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
|
||||
{
|
||||
foundTensor = allocatorMemoryInfo.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
|
||||
if (foundTensor != null)
|
||||
return foundTensor;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void CollectAllAsFirstSeen(in List<MemorySnapshotReport> memorySnapshots,
|
||||
out SortedDictionary<int,TensorMemoryInfo> tensors,
|
||||
out SortedDictionary<int,AllocatorMemoryInfo> allocators,
|
||||
out SortedDictionary<int,TensorDataMemoryInfo> tensorDatas,
|
||||
out SortedDictionary<int,TempMemoryInfo> tempMemories)
|
||||
{
|
||||
tensors = new SortedDictionary<int, TensorMemoryInfo>();
|
||||
allocators = new SortedDictionary<int, AllocatorMemoryInfo>();
|
||||
tensorDatas = new SortedDictionary<int, TensorDataMemoryInfo>();
|
||||
tempMemories = new SortedDictionary<int, TempMemoryInfo>();
|
||||
|
||||
//Collect all unique tensors, tensors and allocator
|
||||
foreach (var snapshot in memorySnapshots)
|
||||
{
|
||||
//From Vars
|
||||
foreach (var tensor in snapshot.TensorsMemoryInfo)
|
||||
{
|
||||
tensors[tensor.UniqueId] = tensor;
|
||||
if (tensor.tensorDataMemoryInfo != null)
|
||||
tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
|
||||
}
|
||||
|
||||
//From allocators
|
||||
foreach (var allocator in snapshot.AllocatorsMemoryInfo)
|
||||
{
|
||||
allocators[allocator.UniqueId] = allocator;
|
||||
foreach (var tensor in allocator.TensorsMemoryInfo)
|
||||
{
|
||||
tensors[tensor.UniqueId] = tensor;
|
||||
if (tensor.tensorDataMemoryInfo != null)
|
||||
tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
|
||||
}
|
||||
|
||||
foreach (var tensorData in allocator.TensorDatasMemoryInfo)
|
||||
{
|
||||
tensorDatas[tensorData.UniqueId] = tensorData;
|
||||
}
|
||||
}
|
||||
|
||||
//From temp memories
|
||||
foreach (var tempMemoryInfo in snapshot.TempMemoriesInfo)
|
||||
{
|
||||
tempMemories[tempMemoryInfo.UniqueId] = tempMemoryInfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Reports -> internal data format
|
||||
|
||||
private static List<SnapshotView> GenerateTempMemoriesDatasViews(List<MemorySnapshotReport> memorySnapshots,
|
||||
SortedDictionary<int, TempMemoryInfo> allTempMemoryInfosAsFirstSeen)
|
||||
{
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
|
||||
{
|
||||
long allTotal = 0L;
|
||||
var snapshot = memorySnapshots[memorySnapshotIndex];
|
||||
|
||||
//Titles and contexts
|
||||
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
|
||||
view.sections = new SnapshotFieldsWithContexts(
|
||||
fieldsTitles: new[]
|
||||
{
|
||||
"Allocated (bytes)",
|
||||
"On GPU"
|
||||
},
|
||||
contextTitles: new[] {"Name", "Id"});
|
||||
foreach (var tempMemoryInfo in allTempMemoryInfosAsFirstSeen)
|
||||
{
|
||||
var id = tempMemoryInfo.Key;
|
||||
view.sections.AddContext(id);
|
||||
view.sections.SetContext(id, "Name", tempMemoryInfo.Value.Name);
|
||||
view.sections.SetContext(id, "Id", id.ToString());
|
||||
}
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"Memory pressure in bytes (sum of all temp memory capacities)"
|
||||
});
|
||||
|
||||
//Details
|
||||
foreach (var alloc in allTempMemoryInfosAsFirstSeen)
|
||||
{
|
||||
var tempMemory = FindTempMemoryInSnapshot(snapshot, alloc.Key);
|
||||
if (tempMemory != null)
|
||||
{
|
||||
allTotal += tempMemory.TotalBytes;
|
||||
view.sections[tempMemory.UniqueId, "Allocated (bytes)"] = tempMemory.TotalBytes.ToString();
|
||||
view.sections[tempMemory.UniqueId, "On GPU"] = tempMemory.IsGPUMem ? "GPU" : "CPU";
|
||||
}
|
||||
}
|
||||
|
||||
//Summary
|
||||
view.summary["Memory pressure in bytes (sum of all temp memory capacities)"] = allTotal.ToString();
|
||||
views.Add(view);
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
|
||||
private static List<SnapshotView> GenerateAllocatorViews(List<MemorySnapshotReport> memorySnapshots,
|
||||
SortedDictionary<int, AllocatorMemoryInfo> allAllocatorAsFirstSeen)
|
||||
{
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
|
||||
{
|
||||
long allTotal = 0L;
|
||||
long allBusy = 0L;
|
||||
long allUsed = 0L;
|
||||
long allFragmented = 0L;
|
||||
long allFree = 0L;
|
||||
var snapshot = memorySnapshots[memorySnapshotIndex];
|
||||
|
||||
//Titles and contexts
|
||||
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
|
||||
view.sections = new SnapshotFieldsWithContexts(
|
||||
fieldsTitles: new[]
|
||||
{
|
||||
"Memory pressure in bytes (sum of allocated tensorDatas capacities)",
|
||||
"Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
|
||||
"Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
|
||||
"Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
|
||||
"Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
|
||||
},
|
||||
contextTitles: new[] {"Name", "Id"});
|
||||
foreach (var allocatorMemoryInfo in allAllocatorAsFirstSeen)
|
||||
{
|
||||
var id = allocatorMemoryInfo.Key;
|
||||
view.sections.AddContext(id);
|
||||
view.sections.SetContext(id, "Name", allocatorMemoryInfo.Value.Name);
|
||||
view.sections.SetContext(id, "Id", id.ToString());
|
||||
}
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)",
|
||||
"Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
|
||||
"Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
|
||||
"Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
|
||||
"Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
|
||||
});
|
||||
|
||||
//Details
|
||||
foreach (var alloc in allAllocatorAsFirstSeen)
|
||||
{
|
||||
var allocator = FindAllocatorInSnapshot(snapshot, alloc.Key);
|
||||
if (allocator != null)
|
||||
{
|
||||
allTotal += allocator.TotalBytes;
|
||||
allBusy += allocator.BusyBytes;
|
||||
allUsed += allocator.UsedBytes;
|
||||
allFragmented += allocator.BusyBytes-allocator.UsedBytes;
|
||||
allFree += allocator.FreeBytes;
|
||||
view.sections[allocator.UniqueId, "Memory pressure in bytes (sum of allocated tensorDatas capacities)"] = allocator.TotalBytes.ToString();
|
||||
view.sections[allocator.UniqueId, "Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allocator.BusyBytes.ToString();
|
||||
view.sections[allocator.UniqueId, "Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allocator.UsedBytes.ToString();
|
||||
view.sections[allocator.UniqueId, "Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allocator.BytesLostToFragmentation.ToString();
|
||||
view.sections[allocator.UniqueId, "Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allocator.FreeBytes.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
//Summary
|
||||
view.summary["Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)"] = allTotal.ToString();
|
||||
view.summary["Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allBusy.ToString();
|
||||
view.summary["Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allUsed.ToString();
|
||||
view.summary["Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allFragmented.ToString();
|
||||
view.summary["Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allFree.ToString();
|
||||
views.Add(view);
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
|
||||
private static List<SnapshotView> GenerateTensorDatasViews(List<MemorySnapshotReport> memorySnapshots,
|
||||
SortedDictionary<int,TensorDataMemoryInfo> allTensorDataAsFirstSeen)
|
||||
{
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
|
||||
{
|
||||
long allGPUInBytes = 0L;
|
||||
long allCPUInBytes = 0L;
|
||||
long allUsedGPUInBytes = 0L;
|
||||
long allUsedCPUInBytes = 0L;
|
||||
long allFragmentedMemGPUInBytes = 0L;
|
||||
long allFragmentedMemCPUInBytes = 0L;
|
||||
|
||||
var snapshot = memorySnapshots[memorySnapshotIndex];
|
||||
|
||||
//Titles and contexts
|
||||
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
|
||||
view.sections = new SnapshotFieldsWithContexts(
|
||||
fieldsTitles: new[]
|
||||
{
|
||||
"In use", "Capacity (bytes)", "On GPU", "Allocator",
|
||||
"Tensor(s) Id(s)", "Tensor(s) max bytes", "Fragmented bytes"
|
||||
},
|
||||
contextTitles: new[] {"Id"});
|
||||
foreach (var tensorData in allTensorDataAsFirstSeen)
|
||||
{
|
||||
var id = tensorData.Key;
|
||||
view.sections.AddContext(id);
|
||||
view.sections.SetContext(id, "Id", id.ToString());
|
||||
}
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"GPU sum of all allocated tensorData capacities (bytes)",
|
||||
"CPU sum of all allocated tensorData capacities (bytes)",
|
||||
"GPU sum of all 'in use' tensorData (bytes)",
|
||||
"CPU sum of all 'in use' tensorData (bytes)",
|
||||
"GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
|
||||
"CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
|
||||
});
|
||||
|
||||
foreach (var tData in allTensorDataAsFirstSeen)
|
||||
{
|
||||
TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
|
||||
if (tensorData != null)
|
||||
{
|
||||
var associatedTensors = FindAllTensorsInSnapshotUsingTensorDataId(snapshot, tensorData.UniqueId);
|
||||
string tensorNamesandIds = "";
|
||||
int tensorBytes = 0;
|
||||
bool first = true;
|
||||
foreach (var tensor in associatedTensors)
|
||||
{
|
||||
if (!first)
|
||||
tensorNamesandIds += " / ";
|
||||
tensorNamesandIds += tensor.Name + " Id:" + tensor.UniqueId;
|
||||
first = false;
|
||||
tensorBytes = Math.Max(tensorBytes, tensor.Shape.length * sizeof(float));
|
||||
}
|
||||
int fragmentedTensorDataBytes = (tensorData.InUse) ? tensorData.MaxBytes - tensorBytes : 0;
|
||||
|
||||
if (tensorData.IsGPUMem)
|
||||
{
|
||||
allGPUInBytes += tensorData.MaxBytes;
|
||||
if (tensorData.InUse)
|
||||
{
|
||||
allFragmentedMemGPUInBytes += fragmentedTensorDataBytes;
|
||||
allUsedGPUInBytes += tensorData.MaxBytes;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
allCPUInBytes += tensorData.MaxBytes;
|
||||
if (tensorData.InUse)
|
||||
{
|
||||
allFragmentedMemCPUInBytes += fragmentedTensorDataBytes;
|
||||
allUsedCPUInBytes += tensorData.MaxBytes;
|
||||
}
|
||||
}
|
||||
|
||||
view.sections[tensorData.UniqueId, "In use"] = tensorData.InUse ? "Yes" : "";
|
||||
view.sections[tensorData.UniqueId, "Capacity (bytes)"] = tensorData.MaxBytes.ToString();
|
||||
view.sections[tensorData.UniqueId, "On GPU"] = tensorData.IsGPUMem ? "GPU" : "CPU";
|
||||
view.sections[tensorData.UniqueId, "Allocator"] = FindTensorDataAllocatorInSnapshot(snapshot, tensorData.UniqueId);
|
||||
view.sections[tensorData.UniqueId, "Tensor(s) Id(s)"] = tensorNamesandIds;
|
||||
view.sections[tensorData.UniqueId, "Tensor(s) max bytes"] = tensorBytes.ToString();
|
||||
view.sections[tensorData.UniqueId, "Fragmented bytes"] = fragmentedTensorDataBytes.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
//Summary
|
||||
view.summary["GPU sum of all allocated tensorData capacities (bytes)"] = allGPUInBytes.ToString();
|
||||
view.summary["CPU sum of all allocated tensorData capacities (bytes)"] = allCPUInBytes.ToString();
|
||||
view.summary["GPU sum of all 'in use' tensorData (bytes)"] = allUsedGPUInBytes.ToString();
|
||||
view.summary["CPU sum of all 'in use' tensorData (bytes)"] = allUsedCPUInBytes.ToString();
|
||||
view.summary["GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemGPUInBytes.ToString();
|
||||
view.summary["CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemCPUInBytes.ToString();
|
||||
views.Add(view);
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
|
||||
private static List<SnapshotView> GenerateTensorsViews(List<MemorySnapshotReport> memorySnapshots,
|
||||
SortedDictionary<int, TensorMemoryInfo> allTensorAsFirstSeen)
|
||||
{
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
|
||||
{
|
||||
var snapshot = memorySnapshots[memorySnapshotIndex];
|
||||
|
||||
//Titles and contexts
|
||||
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
|
||||
view.sections = new SnapshotFieldsWithContexts(
|
||||
fieldsTitles: new[] {"Allocated (bytes)", "Name", "Shape", "Cache size (bytes)", "TensorData Id", "TensorData Capacity (bytes)"},
|
||||
contextTitles: new[] {"Id"});
|
||||
foreach (var tensorMemoryInfo in allTensorAsFirstSeen)
|
||||
{
|
||||
var id = tensorMemoryInfo.Key;
|
||||
view.sections.AddContext(id);
|
||||
view.sections.SetContext(id, "Id", id.ToString());
|
||||
}
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"Tensor memory on GPU (in bytes)",
|
||||
"Tensor memory on CPU (in bytes)",
|
||||
"On CPU tensor cache (in bytes)"
|
||||
});
|
||||
|
||||
//Details
|
||||
long cacheMemInBytes = 0L;
|
||||
long gpuMem = 0L;
|
||||
long cpuMem = 0L;
|
||||
foreach (var tensorFromDict in allTensorAsFirstSeen)
|
||||
{
|
||||
var tensor = FindTensorInSnapshot(snapshot, tensorFromDict.Key);
|
||||
if (tensor != null)
|
||||
{
|
||||
cacheMemInBytes += tensor.CacheBytes;
|
||||
var dataBytes = tensor.Shape.length * sizeof(float);
|
||||
|
||||
string allocatedStr = "Yes";
|
||||
if (tensor.tensorDataMemoryInfo != null)
|
||||
{
|
||||
allocatedStr += $" ({(tensor.Shape.length * sizeof(float)).ToString()})";
|
||||
view.sections[tensor.UniqueId, "TensorData Id"] = tensor.tensorDataMemoryInfo.UniqueId.ToString();
|
||||
view.sections[tensor.UniqueId, "TensorData Capacity (bytes)"] = tensor.tensorDataMemoryInfo.MaxBytes.ToString();
|
||||
if (tensor.tensorDataMemoryInfo.IsGPUMem)
|
||||
gpuMem += dataBytes;
|
||||
else
|
||||
cpuMem += dataBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
allocatedStr += " (0)";
|
||||
}
|
||||
view.sections[tensor.UniqueId, "Name"] = tensor.Name;
|
||||
view.sections[tensor.UniqueId, "Shape"] = tensor.Shape.ToString();
|
||||
view.sections[tensor.UniqueId, "Cache size (bytes)"] = tensor.CacheBytes.ToString();
|
||||
view.sections[tensor.UniqueId, "Allocated (bytes)"] = allocatedStr;
|
||||
}
|
||||
}
|
||||
|
||||
//Summary
|
||||
view.summary["Tensor memory on GPU (in bytes)"] = gpuMem.ToString();
|
||||
view.summary["Tensor memory on CPU (in bytes)"] = cpuMem.ToString();
|
||||
view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
|
||||
views.Add(view);
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
|
||||
private static List<SnapshotView> GenerateExecutionViews(List<LayerExecutionReport> layerReports, int numCompletedLayer)
|
||||
{
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
for (var layerIndex = 0; layerIndex < layerReports.Count; layerIndex++)
|
||||
{
|
||||
var report = layerReports[layerIndex];
|
||||
|
||||
//Titles
|
||||
SnapshotView view = new SnapshotView(layerIndex, report);
|
||||
view.sections = new SnapshotFieldsWithContexts(null, null);
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"Summary",
|
||||
"Compute Kernels(workItems:X,Y,Z)",
|
||||
"Theoretical ALU count",
|
||||
"Theoretical Bandwidth (bytes)",
|
||||
"Note"
|
||||
});
|
||||
|
||||
//Summary
|
||||
view.summary["Summary"] = report.Summary==""?"NA":report.Summary;
|
||||
view.summary["Compute Kernels(workItems:X,Y,Z)"] = report.DispatchInfos;
|
||||
view.summary["Theoretical ALU count"] = report.NumAlu.ToString();
|
||||
view.summary["Theoretical Bandwidth (bytes)"] = report.NumBytes.ToString();
|
||||
if (layerIndex >= numCompletedLayer)
|
||||
view.summary["Note"] = "UNCOMPLETED LAYER";
|
||||
views.Add(view);
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
|
||||
private static List<SnapshotView> GenerateSummaryViews(List<MemorySnapshotReport> memorySnapshots,
|
||||
SortedDictionary<int, TensorMemoryInfo> allTensorsAsFirstSeen,
|
||||
SortedDictionary<int, TensorDataMemoryInfo> allTensorDatasAsFirstSeen,
|
||||
SortedDictionary<int, TempMemoryInfo> allTempMemoriesAsFirstSeen,
|
||||
out MemoryPeakSummary memoryPeakSummary)
|
||||
{
|
||||
HashSet<int> previousSnapshotTensorIds = new HashSet<int>();
|
||||
List<SnapshotView> views = new List<SnapshotView>();
|
||||
|
||||
long peakMemoryUsageGPU = 0;
|
||||
long peakMemoryUsageCPU = 0;
|
||||
long peakMemoryUsageGPUAndCPU = 0;
|
||||
|
||||
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
|
||||
{
|
||||
var snapshot = memorySnapshots[memorySnapshotIndex];
|
||||
|
||||
//Titles and contexts
|
||||
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
|
||||
view.sections = new SnapshotFieldsWithContexts(
|
||||
fieldsTitles: new[] {"Allocated", "Released"},
|
||||
contextTitles: new[] {"Type" });
|
||||
view.sections.AddContext(0);
|
||||
view.sections.SetContext(0, "Type", "Tensor");
|
||||
view.summary = new SnapshotFields(new[]
|
||||
{
|
||||
"Total memory pressure on GPU (in bytes)",
|
||||
"Total memory pressure on CPU (in bytes)",
|
||||
"On CPU tensor cache (in bytes)"
|
||||
});
|
||||
|
||||
//Summary
|
||||
HashSet<int> currentSnapshotTensorIds = new HashSet<int>();
|
||||
long cacheMemInBytes = 0L;
|
||||
foreach (var tensor in snapshot.TensorsMemoryInfo)
|
||||
{
|
||||
cacheMemInBytes += tensor.CacheBytes;
|
||||
currentSnapshotTensorIds.Add(tensor.UniqueId);
|
||||
}
|
||||
long gpuMem = 0L;
|
||||
long cpuMem = 0L;
|
||||
foreach (var tData in allTensorDatasAsFirstSeen)
|
||||
{
|
||||
TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
|
||||
if (tensorData != null)
|
||||
{
|
||||
if (tensorData.IsGPUMem)
|
||||
gpuMem += tensorData.MaxBytes;
|
||||
else
|
||||
cpuMem += tensorData.MaxBytes;
|
||||
}
|
||||
}
|
||||
foreach (var mData in allTempMemoriesAsFirstSeen)
|
||||
{
|
||||
TempMemoryInfo tempMemoryInfo = FindTempMemoryInSnapshot(snapshot, mData.Key);
|
||||
if (tempMemoryInfo != null)
|
||||
{
|
||||
if (tempMemoryInfo.IsGPUMem)
|
||||
gpuMem += tempMemoryInfo.TotalBytes;
|
||||
else
|
||||
cpuMem += tempMemoryInfo.TotalBytes;
|
||||
}
|
||||
}
|
||||
view.summary["Total memory pressure on GPU (in bytes)"] = gpuMem.ToString();
|
||||
view.summary["Total memory pressure on CPU (in bytes)"] = cpuMem.ToString();
|
||||
view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
|
||||
|
||||
peakMemoryUsageGPU = Math.Max(peakMemoryUsageGPU, gpuMem);
|
||||
peakMemoryUsageCPU = Math.Max(peakMemoryUsageCPU, cpuMem);
|
||||
peakMemoryUsageGPUAndCPU = Math.Max(peakMemoryUsageGPUAndCPU, gpuMem+cpuMem);
|
||||
|
||||
if (memorySnapshotIndex != 0)
|
||||
{
|
||||
//Tensor allocated and freed (diff from snapshot to snapshot)
|
||||
var allocatedTensorsId = currentSnapshotTensorIds.Except(previousSnapshotTensorIds);
|
||||
var releasedTensorsId = previousSnapshotTensorIds.Except(currentSnapshotTensorIds);
|
||||
StringBuilder tensorDiff = new StringBuilder();
|
||||
bool first = true;
|
||||
foreach (var tensorId in allocatedTensorsId)
|
||||
{
|
||||
var tensor = FindTensorInSnapshot(snapshot, tensorId);
|
||||
string tensorDataInfo = "none";
|
||||
if (tensor.tensorDataMemoryInfo != null)
|
||||
{
|
||||
var data = tensor.tensorDataMemoryInfo;
|
||||
var memType = data.IsGPUMem ? "GPU" : "CPU";
|
||||
tensorDataInfo = $"id:{data.UniqueId} bytes:{data.MaxBytes} on:{memType}";
|
||||
}
|
||||
if (!first) tensorDiff.Append(" / ");
|
||||
first = false;
|
||||
tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId} tensorData:[{tensorDataInfo}]");
|
||||
|
||||
}
|
||||
view.sections[0, "Allocated"] = tensorDiff.ToString();
|
||||
tensorDiff.Clear();
|
||||
|
||||
first = true;
|
||||
foreach (var tensorId in releasedTensorsId)
|
||||
{
|
||||
var tensor = allTensorsAsFirstSeen[tensorId];
|
||||
if (!first) tensorDiff.Append(" / ");
|
||||
first = false;
|
||||
tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId}");
|
||||
}
|
||||
view.sections[0, "Released"] = tensorDiff.ToString();
|
||||
}
|
||||
|
||||
views.Add(view);
|
||||
previousSnapshotTensorIds = currentSnapshotTensorIds;
|
||||
}
|
||||
|
||||
memoryPeakSummary = new MemoryPeakSummary(peakMemoryUsageGPU, peakMemoryUsageCPU, peakMemoryUsageGPUAndCPU);
|
||||
return views;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Internal data format -> text
|
||||
|
||||
private static void Append(this StringBuilder sb, string str, int repeatCount)
|
||||
{
|
||||
for (int i = 0; i < repeatCount; ++i)
|
||||
sb.Append(str);
|
||||
}
|
||||
|
||||
private static void Append(this StringBuilder sb, string str, string separator)
|
||||
{
|
||||
sb.Append(str);
|
||||
sb.Append(separator);
|
||||
}
|
||||
|
||||
private static void GenerateReportForViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string sectionTitle, bool isSummaryView)
|
||||
{
|
||||
if (spreadSheetFormat)
|
||||
{
|
||||
//Columns Titles
|
||||
views[0].context.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
views[0].summary.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var tensorFields in views[0].sections.Fields)
|
||||
{
|
||||
tensorFields.Value.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
//All snapshots
|
||||
foreach (var view in views)
|
||||
{
|
||||
view.context.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
view.summary.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var tensorFields in view.sections.Fields)
|
||||
{
|
||||
tensorFields.Value.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
string doubleIndentation = ModelExecutionsReporter.TextIndentation + ModelExecutionsReporter.TextIndentation;
|
||||
|
||||
foreach (var view in views)
|
||||
{
|
||||
view.context.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
|
||||
stringBuilder.Append("\n");
|
||||
view.summary.AddAllToReport(stringBuilder, suffix:"\n", prefix: ModelExecutionsReporter.TextIndentation);
|
||||
stringBuilder.Append("\n"+ModelExecutionsReporter.TextIndentation + sectionTitle +"\n");
|
||||
|
||||
foreach (var context in view.sections.Contexts)
|
||||
{
|
||||
stringBuilder.Append(doubleIndentation);
|
||||
if (isSummaryView)
|
||||
{
|
||||
view.sections.Fields[context.Key].AddAllToReport(stringBuilder, "\n"+doubleIndentation);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Value.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
|
||||
stringBuilder.Append("\n"+doubleIndentation +"=> ");
|
||||
view.sections.Fields[context.Key].AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForSummaryViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
|
||||
{
|
||||
if (views.Count == 0)
|
||||
{
|
||||
stringBuilder.Append("<******** Summary info ********> NONE!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append("<******** Summary info ********>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//Columns names
|
||||
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
|
||||
int sectionFieldCount = views[0].sections.FieldTitles.Length;
|
||||
|
||||
stringBuilder.Append("<******** Summary info ********>");
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var context in views[0].sections.Contexts)
|
||||
{
|
||||
stringBuilder.Append(context.Value["Type"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForTensorViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
|
||||
{
|
||||
GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "Tensors");
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForTensorDatasViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
|
||||
{
|
||||
GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "TensorDatas");
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForViewsByID(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string dataType)
|
||||
{
|
||||
if (views.Count == 0)
|
||||
{
|
||||
stringBuilder.Append($"<******** {dataType} info ********> NONE!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append($"<******** {dataType} info ********>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//Columns names
|
||||
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
|
||||
int sectionFieldCount = views[0].sections.FieldTitles.Length;
|
||||
|
||||
stringBuilder.Append($"<******** {dataType} info ********>");
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var context in views[0].sections.Contexts)
|
||||
{
|
||||
stringBuilder.Append("Id: ");
|
||||
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForTempMemoriesViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
|
||||
{
|
||||
if (views.Count == 0)
|
||||
{
|
||||
stringBuilder.Append("<******** Worker temporary memories info ********> NONE!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append("<******** Worker temporary memories info ********>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//Columns names
|
||||
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
|
||||
int sectionFieldCount = views[0].sections.FieldTitles.Length;
|
||||
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("Temp memories names and ids:");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
stringBuilder.Append("<******** Worker temporary memories info ********>");
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var context in views[0].sections.Contexts)
|
||||
{
|
||||
stringBuilder.Append(context.Value["Name"], " / Id: ");
|
||||
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
|
||||
private static void GenerateHeaderForAllocatorsViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
|
||||
{
|
||||
if (views.Count == 0)
|
||||
{
|
||||
stringBuilder.Append("<******** Allocators info ********> NONE!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append("<******** Allocators info ********>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//Columns names
|
||||
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
|
||||
int sectionFieldCount = views[0].sections.FieldTitles.Length;
|
||||
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append("Allocators names and shapes:");
|
||||
stringBuilder.Append("\n");
|
||||
|
||||
stringBuilder.Append("<******** Allocators info ********>");
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
foreach (var context in views[0].sections.Contexts)
|
||||
{
|
||||
stringBuilder.Append(context.Value["Name"], " / Id: ");
|
||||
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
|
||||
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
|
||||
}
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
|
||||
#endif //ENABLE_BARRACUDA_STATS
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 5b125a79bdbfb1b41adba78ef255dd80
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,196 +0,0 @@
|
||||
#if ENABLE_BARRACUDA_STATS
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
public class TensorDataMemoryInfo
|
||||
{
|
||||
public int UniqueId { get; }
|
||||
public int MaxBytes { get; }
|
||||
public bool InUse { get; }
|
||||
public bool IsGPUMem { get; }
|
||||
|
||||
internal TensorDataMemoryInfo(ITensorDataStatistics tensorDataStatistics)
|
||||
{
|
||||
UniqueId = tensorDataStatistics.uniqueId;
|
||||
MaxBytes = tensorDataStatistics.maxCapacity * sizeof(float);
|
||||
InUse = tensorDataStatistics.inUse;
|
||||
IsGPUMem = tensorDataStatistics.isGPUMem;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"TensorData of maxBytes {MaxBytes}, inUse:{InUse}, onGPU:{IsGPUMem}, uniqueId:{UniqueId}";
|
||||
}
|
||||
}
|
||||
|
||||
public class TempMemoryInfo
|
||||
{
|
||||
public int UniqueId { get; }
|
||||
public string Name { get; }
|
||||
public long TotalBytes { get; }
|
||||
public bool IsGPUMem { get; }
|
||||
|
||||
internal TempMemoryInfo(TempMemoryStatistics tempMemoryStatistics)
|
||||
{
|
||||
UniqueId = tempMemoryStatistics.uniqueId;
|
||||
Name = tempMemoryStatistics.name;
|
||||
TotalBytes = tempMemoryStatistics.size;
|
||||
IsGPUMem = tempMemoryStatistics.isGPUMem;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Temp memory '{Name}' of totalBytes {TotalBytes}";
|
||||
}
|
||||
}
|
||||
|
||||
public class AllocatorMemoryInfo
|
||||
{
|
||||
public int UniqueId { get; }
|
||||
public string Name { get; }
|
||||
public long UsedBytes { get; }
|
||||
public long BusyBytes { get; }
|
||||
public long FreeBytes { get; }
|
||||
public long TotalBytes { get; }
|
||||
public List<TensorDataMemoryInfo> TensorDatasMemoryInfo { get; }
|
||||
public List<TensorMemoryInfo> TensorsMemoryInfo { get; }
|
||||
public long BytesLostToFragmentation => BusyBytes - UsedBytes;
|
||||
|
||||
internal AllocatorMemoryInfo(IAllocatorStatistics allocatorStatistics)
|
||||
{
|
||||
UniqueId = allocatorStatistics.uniqueId;
|
||||
Name = allocatorStatistics.name;
|
||||
UsedBytes = allocatorStatistics.usedBytes;
|
||||
BusyBytes = allocatorStatistics.busyBytes;
|
||||
FreeBytes = allocatorStatistics.freeBytes;
|
||||
TotalBytes = allocatorStatistics.totalBytes;
|
||||
TensorDatasMemoryInfo = new List<TensorDataMemoryInfo>();
|
||||
foreach (var tensorDataStatistics in allocatorStatistics.GetTensorDatasStatistics())
|
||||
{
|
||||
TensorDatasMemoryInfo.Add(new TensorDataMemoryInfo(tensorDataStatistics));
|
||||
}
|
||||
TensorsMemoryInfo = new List<TensorMemoryInfo>();
|
||||
foreach (var tensorStatistics in allocatorStatistics.GetTensorsStatistics())
|
||||
{
|
||||
TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistics));
|
||||
}
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Allocator '{Name}' of totalBytes {TotalBytes}, usedBytes:{UsedBytes}, lostToFragmentation:{BytesLostToFragmentation}, free:{FreeBytes}";
|
||||
}
|
||||
}
|
||||
|
||||
public class TensorMemoryInfo
|
||||
{
|
||||
public int UniqueId { get; }
|
||||
public string Name { get; }
|
||||
public TensorShape Shape { get; }
|
||||
public int CacheBytes { get; }
|
||||
public TensorDataMemoryInfo tensorDataMemoryInfo { get; }
|
||||
|
||||
internal TensorMemoryInfo(ITensorStatistics tensorStatistics)
|
||||
{
|
||||
UniqueId = tensorStatistics.uniqueId;
|
||||
Name = tensorStatistics.name;
|
||||
Shape = tensorStatistics.shape;
|
||||
CacheBytes = tensorStatistics.cacheBytes;
|
||||
var tensorDataStats = tensorStatistics.GetTensorDataStatistics();
|
||||
if (tensorDataStats != null)
|
||||
tensorDataMemoryInfo = new TensorDataMemoryInfo(tensorDataStats);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var tensorDataStr = (tensorDataMemoryInfo != null) ? tensorDataMemoryInfo.ToString() : "";
|
||||
return $"Tensor: {Name} of shape {Shape.ToString()}, cacheBytes: {CacheBytes} (data: {tensorDataStr})";
|
||||
}
|
||||
}
|
||||
|
||||
public class MemorySnapshotReport
|
||||
{
|
||||
public string ContextType { get; }
|
||||
public string ContextName { get; }
|
||||
public List<TensorMemoryInfo> TensorsMemoryInfo { get; }
|
||||
public List<AllocatorMemoryInfo> AllocatorsMemoryInfo { get; }
|
||||
public List<TempMemoryInfo> TempMemoriesInfo { get; }
|
||||
|
||||
internal MemorySnapshotReport(IOps ops, IVarsStatistics vars, string context, Layer layer)
|
||||
{
|
||||
ContextType = context;
|
||||
ContextName = "";
|
||||
if (layer != null)
|
||||
{
|
||||
ContextType += ": " + layer.type + ((layer.type == Layer.Type.Activation) ? ("." + layer.activation) : "");
|
||||
ContextName += layer.name;
|
||||
}
|
||||
|
||||
TensorsMemoryInfo = new List<TensorMemoryInfo>();
|
||||
AllocatorsMemoryInfo = new List<AllocatorMemoryInfo>();
|
||||
TempMemoriesInfo = new List<TempMemoryInfo>();
|
||||
|
||||
foreach (var allocatorsStatistic in vars.GetAllocatorsStatistics())
|
||||
{
|
||||
AllocatorsMemoryInfo.Add(new AllocatorMemoryInfo(allocatorsStatistic));
|
||||
}
|
||||
|
||||
foreach (var tensorStatistic in vars.GetTensorsStatistics())
|
||||
{
|
||||
TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistic));
|
||||
}
|
||||
|
||||
foreach (var tempMemoryStatistic in ops.GetTempMemoryStatistics())
|
||||
{
|
||||
TempMemoriesInfo.Add(new TempMemoryInfo(tempMemoryStatistic));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class MemorySnapshotsReport
|
||||
{
|
||||
public List<MemorySnapshotReport> MemorySnapshotsReports { get; private set; }
|
||||
|
||||
public MemorySnapshotsReport()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
public void Reset()
|
||||
{
|
||||
MemorySnapshotsReports = new List<MemorySnapshotReport>();
|
||||
}
|
||||
|
||||
public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
|
||||
{
|
||||
var varsWithStatistics = vars as IVarsStatistics;
|
||||
if (varsWithStatistics == null)
|
||||
return;
|
||||
|
||||
MemorySnapshotsReports.Add(new MemorySnapshotReport(ops, varsWithStatistics, context, layer));
|
||||
}
|
||||
|
||||
public MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, bool spreadSheetFormat)
|
||||
{
|
||||
stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - START ****************\n");
|
||||
stringBuilder.Append($"Number of snapshots : {MemorySnapshotsReports.Count}\n\n");
|
||||
|
||||
var memoryPeakSummary = MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, MemorySnapshotsReports, spreadSheetFormat);
|
||||
stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - STOP ****************\n");
|
||||
return memoryPeakSummary;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var stringBuilder = new StringBuilder(10000);
|
||||
GenerateStringReport(stringBuilder, spreadSheetFormat:false);
|
||||
return stringBuilder.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
|
||||
#endif //ENABLE_BARRACUDA_STATS
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 0e26059fb46b5a345a0a59a9fe3eafae
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,922 +0,0 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
using UnityEngine;
|
||||
using UnityEngine.Assertions;
|
||||
using UnityEngine.Profiling;
|
||||
|
||||
[assembly: InternalsVisibleTo("Unity.Barracuda.ONNX")]
|
||||
[assembly: InternalsVisibleTo("Unity.Barracuda.Editor")]
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
|
||||
internal class ModelAnalyzer
|
||||
{
|
||||
public static string GetDefaultInputName(Model model)
|
||||
{
|
||||
bool modelHasOnlyOneInput = model.inputs.Count == 1;
|
||||
if (modelHasOnlyOneInput)
|
||||
return model.inputs[0].name;
|
||||
|
||||
var memories = new HashSet<string>();
|
||||
foreach (var m in model.memories)
|
||||
memories.Add(m.input);
|
||||
|
||||
// find the first unconnected input as a default model input
|
||||
var previousLayerNames = new HashSet<string>();
|
||||
foreach (var l in model.layers)
|
||||
{
|
||||
previousLayerNames.Add(l.name);
|
||||
|
||||
bool layerDoesNotNeedInput = (l.type == Layer.Type.Load);
|
||||
|
||||
if (layerDoesNotNeedInput)
|
||||
continue;
|
||||
|
||||
foreach (var inputName in l.inputs)
|
||||
{
|
||||
bool inputIsUnconnected = !previousLayerNames.Contains(inputName);
|
||||
bool inputIsNotPartOfMemory = !memories.Contains(inputName);
|
||||
|
||||
if (inputIsUnconnected && inputIsNotPartOfMemory)
|
||||
return inputName;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
static public string GetDefaultOutputName(Model model)
|
||||
{
|
||||
if (model.outputs.Count == 1)
|
||||
return model.outputs[0];
|
||||
|
||||
if (model.layers.Count > 0)
|
||||
{
|
||||
var lastLayer = model.layers[model.layers.Count - 1];
|
||||
return lastLayer.name;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes)
|
||||
{
|
||||
IDictionary<string, TensorShape?> shapesByName;
|
||||
return ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
|
||||
}
|
||||
|
||||
public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes,
|
||||
out IDictionary<string, TensorShape?> shapesByName)
|
||||
{
|
||||
Profiler.BeginSample ("Barracuda.ListTemporaryTensorShapes");
|
||||
var shapes = new List<TensorShape?>();
|
||||
shapesByName = new Dictionary<string, TensorShape?>();
|
||||
foreach (var entry in inputShapes)
|
||||
shapesByName.Add(entry.Key, entry.Value);
|
||||
|
||||
TensorShape? Xn;
|
||||
shapesByName.TryGetValue(GetDefaultInputName(model), out Xn); // default input
|
||||
TensorShape? O = Xn;
|
||||
|
||||
foreach (var l in model.layers)
|
||||
{
|
||||
if (l.inputs.Length > 0 && shapesByName.TryGetValue(l.inputs[0], out TensorShape? xShape))
|
||||
Xn = xShape;
|
||||
else
|
||||
Xn = O; // previous output is used, if-and-only-if layer has no explicit inputs
|
||||
|
||||
if (Xn == null)
|
||||
{
|
||||
shapes.Add(Xn);
|
||||
shapesByName.Add(l.name, Xn);
|
||||
continue;
|
||||
}
|
||||
|
||||
TensorShape X = Xn.Value;
|
||||
|
||||
if (l.type == Layer.Type.Dense)
|
||||
{
|
||||
Assert.IsNotNull(l.datasets);
|
||||
var W = l.datasets[0].shape;
|
||||
O = new TensorShape(X.flatHeight, W.flatWidth);
|
||||
}
|
||||
else if (l.type == Layer.Type.Dense3)
|
||||
{
|
||||
Assert.IsNotNull(l.datasets);
|
||||
var W = l.datasets[0].shape;
|
||||
O = new TensorShape(X.batch, 1, W.channels, X.channels);
|
||||
}
|
||||
else if (l.type == Layer.Type.MatMul)
|
||||
{
|
||||
if (!shapesByName.ContainsKey(l.inputs[1]) || shapesByName[l.inputs[1]] == null)
|
||||
{
|
||||
O = null;
|
||||
break;
|
||||
}
|
||||
|
||||
var Y = shapesByName[l.inputs[1]].Value;
|
||||
|
||||
int rankX;
|
||||
int rankY;
|
||||
List<int> onnxXshape;
|
||||
List<int> onnxYshape;
|
||||
|
||||
if (l.pool == null || l.pool.Length == 0)
|
||||
{
|
||||
LegacyGetXYRanks(X, Y, out rankX, out rankY);
|
||||
}
|
||||
else
|
||||
{
|
||||
rankX = l.pool[0];
|
||||
rankY = l.pool[1];
|
||||
}
|
||||
|
||||
onnxXshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(X, rankX);
|
||||
onnxYshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(Y, rankY);
|
||||
|
||||
int rankO = Math.Max(rankX, rankY);
|
||||
|
||||
// pad 1 on front of shape to both be rankO shape
|
||||
for (int i = 0; i < (rankX - rankY); i++)
|
||||
onnxYshape.Insert(0, 1);
|
||||
|
||||
for (int i = 0; i < (rankY - rankX); i++)
|
||||
onnxXshape.Insert(0, 1);
|
||||
|
||||
if (rankO == 2)
|
||||
O = new TensorShape(onnxXshape[0], 1, 1, onnxYshape[1]);
|
||||
else if (rankO == 3)
|
||||
O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), 1, onnxYshape[2], onnxXshape[1]);
|
||||
else
|
||||
O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), onnxXshape[2], onnxYshape[3], Math.Max(onnxXshape[1], onnxYshape[1]));
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Conv2D ||
|
||||
l.type == Layer.Type.Conv3D ||
|
||||
l.type == Layer.Type.DepthwiseConv2D)
|
||||
{
|
||||
var K = l.datasets[0].shape;
|
||||
|
||||
Assert.IsNotNull(l.stride);
|
||||
Assert.IsNotNull(l.pad);
|
||||
var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
|
||||
|
||||
O = X.ApplyKernel(K, l.stride, pad);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Conv2DTrans)
|
||||
{
|
||||
var K = l.datasets[0].shape;
|
||||
Assert.IsNotNull(l.stride);
|
||||
Assert.IsNotNull(l.pad);
|
||||
// pool size is treated as output_adjustment aka output_padding here
|
||||
var outputAdjustment = l.pool;
|
||||
var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
|
||||
O = X.ApplyKernelInverse(K, l.stride, pad, outputAdjustment);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Upsample2D)
|
||||
{
|
||||
if(l.pool.Length != 2)
|
||||
{
|
||||
O = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
// pool size is treated as upsample coefficient here
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 2);
|
||||
O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels);
|
||||
}
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Upsample3D)
|
||||
{
|
||||
if(l.pool.Length != 2)
|
||||
{
|
||||
O = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
// pool size is treated as upsample coefficient here
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 3);
|
||||
O = new TensorShape(1,1,X.batch, 1, X.depth * l.pool[2], X.height * l.pool[1], X.width * l.pool[0], X.channels);
|
||||
}
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Resample2D)
|
||||
{
|
||||
if(l.pool.Length != 2)
|
||||
{
|
||||
O = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
// pool is treated as resample size here
|
||||
var size = l.pool;
|
||||
Assert.IsNotNull(size);
|
||||
Assert.AreEqual(size.Length, 2);
|
||||
O = new TensorShape(X.batch, size[1], size[0], X.channels);
|
||||
}
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.DepthToSpace)
|
||||
{
|
||||
// pool size is treated as blocksize here
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 2);
|
||||
Assert.AreEqual(X.channels % (l.pool[0] * l.pool[1]), 0);
|
||||
O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels / (l.pool[0] * l.pool[1]));
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.SpaceToDepth)
|
||||
{
|
||||
// pool size is treated as blocksize here
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 2);
|
||||
O = new TensorShape(X.batch, X.height / l.pool[1], X.width / l.pool[0], X.channels * (l.pool[0] * l.pool[1]));
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.MaxPool2D ||
|
||||
l.type == Layer.Type.AvgPool2D)
|
||||
{
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.IsNotNull(l.stride);
|
||||
Assert.IsNotNull(l.pad);
|
||||
var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
|
||||
O = X.ApplyPool(l.pool, l.stride, pad);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.GlobalMaxPool2D ||
|
||||
l.type == Layer.Type.GlobalAvgPool2D)
|
||||
{
|
||||
O = new TensorShape(X.batch, 1, 1, X.channels);
|
||||
}
|
||||
else if (l.type == Layer.Type.Border3D)
|
||||
{
|
||||
Assert.IsNotNull(l.pad);
|
||||
// legacy support
|
||||
if (l.pad.Length == 6)
|
||||
X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], l.pad[2], 0, l.pad[3], l.pad[4], l.pad[5], 0 });
|
||||
else
|
||||
O = X.ApplyBorder(l.pad);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Border2D ||
|
||||
l.type == Layer.Type.Pad2DReflect ||
|
||||
l.type == Layer.Type.Pad2DSymmetric ||
|
||||
l.type == Layer.Type.Pad2DEdge)
|
||||
{
|
||||
Assert.IsNotNull(l.pad);
|
||||
// legacy support
|
||||
if (l.pad.Length == 4)
|
||||
X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], 0, l.pad[2], l.pad[3], 0 });
|
||||
else
|
||||
O = X.ApplyBorder(l.pad);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Conv3D ||
|
||||
l.type == Layer.Type.Conv3DTrans ||
|
||||
l.type == Layer.Type.Upsample3D ||
|
||||
l.type == Layer.Type.MaxPool3D ||
|
||||
l.type == Layer.Type.AvgPool3D ||
|
||||
l.type == Layer.Type.GlobalMaxPool3D ||
|
||||
l.type == Layer.Type.GlobalAvgPool3D ||
|
||||
l.type == Layer.Type.Border3D)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.RandomNormal ||
|
||||
l.type == Layer.Type.RandomUniform)
|
||||
{
|
||||
Assert.IsNotNull(l.pool);
|
||||
// pool size is treated as shape constant, if not empty
|
||||
// otherwise shape of the previous tensor is used
|
||||
if (l.pool.Length > 0)
|
||||
O = new TensorShape(l.pool);
|
||||
else
|
||||
O = X;
|
||||
}
|
||||
else if (l.type == Layer.Type.ConstantOfShape)
|
||||
{
|
||||
if(l.axis != 1)
|
||||
O = null;
|
||||
else
|
||||
O = X;
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Multinomial)
|
||||
{
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 1);
|
||||
O = new TensorShape(X.batch, l.pool[0]);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.OneHot)
|
||||
{
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 1);
|
||||
int depth = l.pool[0];
|
||||
int inputRank = l.axis;
|
||||
inputRank = inputRank < 0 ? X.dimensions : inputRank;
|
||||
|
||||
if (inputRank == 1)
|
||||
O = new TensorShape(X.flatHeight, depth);
|
||||
else if (inputRank == 2)
|
||||
O = new TensorShape(X.flatHeight, 1, depth, X.flatWidth);
|
||||
else
|
||||
O = new TensorShape(X.batch, X.height, depth, X.channels);
|
||||
}
|
||||
else if (l.type == Layer.Type.RoiAlign)
|
||||
{
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.AreEqual(l.pool.Length, 2);
|
||||
|
||||
if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape) && shape != null)
|
||||
{
|
||||
int batches = shape.Value.flatHeight;
|
||||
O = new TensorShape(batches, l.pool[0], l.pool[1], X.channels);
|
||||
}
|
||||
else
|
||||
O = null;
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Add ||
|
||||
l.type == Layer.Type.Sub ||
|
||||
l.type == Layer.Type.Mul ||
|
||||
l.type == Layer.Type.Div ||
|
||||
l.type == Layer.Type.Pow ||
|
||||
l.type == Layer.Type.Min ||
|
||||
l.type == Layer.Type.Max ||
|
||||
l.type == Layer.Type.Mean||
|
||||
l.type == Layer.Type.Greater ||
|
||||
l.type == Layer.Type.GreaterEqual ||
|
||||
l.type == Layer.Type.Less ||
|
||||
l.type == Layer.Type.LessEqual ||
|
||||
l.type == Layer.Type.Equal ||
|
||||
l.type == Layer.Type.LogicalOr ||
|
||||
l.type == Layer.Type.LogicalAnd ||
|
||||
l.type == Layer.Type.LogicalXor ||
|
||||
l.type == Layer.Type.Where)
|
||||
{
|
||||
// gather shapes by names
|
||||
var list = new List<TensorShape>(l.inputs.Length);
|
||||
bool allShapesKnown = true;
|
||||
foreach (var i in l.inputs)
|
||||
{
|
||||
if (shapesByName.TryGetValue(i, out TensorShape? shape) && shape != null)
|
||||
list.Add(shape.Value);
|
||||
else
|
||||
allShapesKnown = false;
|
||||
}
|
||||
|
||||
O = allShapesKnown ? TensorExtensions.Max(list.ToArray()) : default(TensorShape?);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.ReduceL1 ||
|
||||
l.type == Layer.Type.ReduceL2 ||
|
||||
l.type == Layer.Type.ReduceLogSum ||
|
||||
l.type == Layer.Type.ReduceLogSumExp ||
|
||||
l.type == Layer.Type.ReduceMax ||
|
||||
l.type == Layer.Type.ReduceMean ||
|
||||
l.type == Layer.Type.ReduceMin ||
|
||||
l.type == Layer.Type.ReduceProd ||
|
||||
l.type == Layer.Type.ReduceSum ||
|
||||
l.type == Layer.Type.ReduceSumSquare ||
|
||||
l.type == Layer.Type.ArgMax ||
|
||||
l.type == Layer.Type.ArgMin)
|
||||
{
|
||||
O = X.Reduce(l.axis);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Flatten)
|
||||
{
|
||||
O = X.Flatten();
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Reshape)
|
||||
{
|
||||
// pool size is treated as the shape, if not empty
|
||||
var size = l.pool;
|
||||
|
||||
Assert.IsNotNull(size);
|
||||
|
||||
if (size.Length == 0 && l.inputs.Length > 1)
|
||||
{
|
||||
switch (l.axis)
|
||||
{
|
||||
// Legacy - use the shape of the input tensor as the shape
|
||||
case -1:
|
||||
if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape))
|
||||
size = shape.Value.ToArray();
|
||||
break;
|
||||
|
||||
// Use the tensor values as the shape; Calculated at runtime
|
||||
case 1:
|
||||
O = null;
|
||||
break;
|
||||
}
|
||||
|
||||
if (O == null)
|
||||
break;
|
||||
}
|
||||
|
||||
Assert.IsTrue( (size.Length == 4) || (size.Length == 8));
|
||||
O = X.Reshape(size);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Expand)
|
||||
{
|
||||
// pool size is treated as new shape
|
||||
var newShape = l.pool;
|
||||
|
||||
Assert.IsNotNull(newShape);
|
||||
Assert.IsTrue(newShape.Length == 8 || newShape.Length == 4);
|
||||
|
||||
O = new TensorShape(newShape);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Transpose)
|
||||
{
|
||||
var permutations = l.pool;
|
||||
if (permutations == null)
|
||||
O = new TensorShape(X.flatWidth, X.flatHeight);
|
||||
else
|
||||
{
|
||||
Assert.IsTrue(permutations.Length == 8 || permutations.Length == 4);
|
||||
O = X.Permute(permutations);
|
||||
}
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Gather)
|
||||
{
|
||||
if (!shapesByName.TryGetValue(l.inputs[0], out TensorShape? input0Shape) || input0Shape == null
|
||||
|| !shapesByName.TryGetValue(l.inputs[1], out TensorShape? input1Shape) || input1Shape == null)
|
||||
{
|
||||
O = null;
|
||||
break;
|
||||
}
|
||||
|
||||
int[] shape = input0Shape.Value.ToArray();
|
||||
shape[l.axis] = input1Shape.Value.length;
|
||||
|
||||
O = new TensorShape(shape);
|
||||
|
||||
if (l.pool != null && l.pool.Length == 2 && l.pool[1] > 1)
|
||||
{
|
||||
int xRank = l.pool[0];
|
||||
int indicesRank = l.pool[1];
|
||||
var oShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(O.Value, xRank);
|
||||
var indicesShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(input1Shape.Value, indicesRank);
|
||||
|
||||
int axis = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaAxisToTensor(l.axis, xRank);
|
||||
oShape.InsertRange(axis, indicesShape);
|
||||
oShape.RemoveAt(axis + indicesShape.Count);
|
||||
|
||||
O = (O.Value).Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaLayoutToTensorShapeLayout(oShape.ToArray()));
|
||||
|
||||
// rank 2 -> 3
|
||||
if (xRank == 2 && oShape.Count == 3)
|
||||
O = (O.Value).Permute(new int[] { 0, 1, 3, 2 });
|
||||
}
|
||||
|
||||
}
|
||||
else if (l.type == Layer.Type.ScatterND)
|
||||
{
|
||||
O = X;
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Squeeze ||
|
||||
l.type == Layer.Type.Unsqueeze)
|
||||
{
|
||||
O = X;
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Concat)
|
||||
{
|
||||
// gather shapes by names
|
||||
var list = new List<TensorShape>(l.inputs.Length);
|
||||
bool allShapesKnown = true;
|
||||
foreach (var i in l.inputs)
|
||||
{
|
||||
if (!shapesByName.TryGetValue(i, out var shape) || shape == null)
|
||||
{
|
||||
allShapesKnown = false;
|
||||
continue;
|
||||
}
|
||||
list.Add(shape.Value);
|
||||
}
|
||||
|
||||
O = allShapesKnown ? TensorExtensions.Concat(list.ToArray(), l.axis) : default(TensorShape?);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.StridedSlice)
|
||||
{
|
||||
Assert.IsNotNull(l.pad);
|
||||
Assert.IsNotNull(l.pool);
|
||||
Assert.IsNotNull(l.stride);
|
||||
O = X.ApplyStridedSlice(l.pad, l.pool, l.stride);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Tile)
|
||||
{
|
||||
// pool size is treated as tiling coefficient here
|
||||
Assert.IsNotNull(l.pool);
|
||||
var scale = l.pool;
|
||||
O = X.Scale(scale);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Load)
|
||||
{
|
||||
O = l.datasets[0].shape;
|
||||
}
|
||||
else if (// elementwise operations
|
||||
l.type == Layer.Type.Nop ||
|
||||
l.type == Layer.Type.Activation ||
|
||||
l.type == Layer.Type.ScaleBias ||
|
||||
l.type == Layer.Type.Normalization ||
|
||||
l.type == Layer.Type.LRN ||
|
||||
l.type == Layer.Type.Dropout ||
|
||||
l.type == Layer.Type.LogicalNot ||
|
||||
l.type == Layer.Type.Sign)
|
||||
{
|
||||
// works in place, keeps the same shape size
|
||||
O = X;
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.TopKIndices ||
|
||||
l.type == Layer.Type.TopKValues ||
|
||||
l.type == Layer.Type.NonMaxSuppression ||
|
||||
l.type == Layer.Type.LSTM ||
|
||||
l.type == Layer.Type.NonZero)
|
||||
{
|
||||
// Calculated at runtime
|
||||
O = null;
|
||||
}
|
||||
else if (l.type == Layer.Type.Shape)
|
||||
{
|
||||
int shapeRank = l.axis > 0 ? 1 : X.length;
|
||||
O = new TensorShape(shapeRank, 1, 1, 1);
|
||||
}
|
||||
else if (
|
||||
l.type == Layer.Type.Conv3D ||
|
||||
l.type == Layer.Type.Conv3DTrans ||
|
||||
l.type == Layer.Type.Upsample3D ||
|
||||
l.type == Layer.Type.MaxPool3D ||
|
||||
l.type == Layer.Type.AvgPool3D ||
|
||||
l.type == Layer.Type.GlobalMaxPool3D ||
|
||||
l.type == Layer.Type.GlobalAvgPool3D ||
|
||||
l.type == Layer.Type.Border3D)
|
||||
{
|
||||
throw new NotImplementedException("3D operations are not implemented yet!");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException($"Layer type {l.type} needs to be explicitly handled");
|
||||
}
|
||||
|
||||
shapes.Add(O);
|
||||
shapesByName.Add(l.name, O);
|
||||
}
|
||||
|
||||
Profiler.EndSample();
|
||||
return shapes.ToArray();
|
||||
}
|
||||
|
||||
// TODO: Remove when the legacy importer / code path is no longer needed (i.e. when pool is always set)
|
||||
public static void LegacyGetXYRanks(TensorShape X, TensorShape Y, out int rankX, out int rankY)
|
||||
{
|
||||
// ONNX rank 2 : N,C => N,1,1,C
|
||||
// rank 3 : one must be N C W, (batches = N) => N, 1, W, C
|
||||
// rank 4 : one must be N C H W, (batches = N * C) => N H W C
|
||||
// X and Y can be different ranks
|
||||
var onnxXshape = new List<int> { X.batch, X.channels, X.height, X.width };
|
||||
if (X.height == 1) onnxXshape = new List<int> { X.batch, X.channels, X.width, 1 };
|
||||
var onnxYshape = new List<int> { Y.batch, Y.channels, Y.height, Y.width };
|
||||
if (Y.height == 1) onnxYshape = new List<int> { Y.batch, Y.channels, Y.width, 1 };
|
||||
|
||||
rankX = 0;
|
||||
for (int i = 3; i >= 0; i--)
|
||||
{
|
||||
if (onnxXshape[i] != 1)
|
||||
{
|
||||
rankX = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rankY = 0;
|
||||
for (int i = 3; i >= 0; i--)
|
||||
{
|
||||
if (onnxYshape[i] != 1)
|
||||
{
|
||||
rankY = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static bool TryGetOutputTensorShape(Model model, IDictionary<string, TensorShape> inputShapes, string output, out TensorShape shape)
|
||||
{
|
||||
shape = new TensorShape();
|
||||
IDictionary<string, TensorShape?> shapesByName;
|
||||
ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
|
||||
|
||||
TensorShape? dynamicShape;
|
||||
bool found = shapesByName.TryGetValue(output, out dynamicShape) && dynamicShape != null;
|
||||
if (found)
|
||||
shape = dynamicShape.Value;
|
||||
return found;
|
||||
}
|
||||
|
||||
public static bool TryGetOutputTensorShape(Model model, string output, out TensorShape shape)
|
||||
{
|
||||
var inputShapes = new Dictionary<string, TensorShape>();
|
||||
foreach (var i in model.inputs)
|
||||
inputShapes.Add(i.name, new TensorShape(i.shape));
|
||||
return TryGetOutputTensorShape(model, inputShapes, output, out shape);
|
||||
}
|
||||
|
||||
public static bool FindLayerByName(Model model, string name, out Layer layer)
|
||||
{
|
||||
layer = new Layer("",Layer.Type.Nop);
|
||||
foreach (var l in model.layers)
|
||||
{
|
||||
if (l.name == name)
|
||||
{
|
||||
layer = l;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static HashSet<Layer> FindLayersThatRequireStorage(Model model)
|
||||
{
|
||||
var allInputsExceptFromPreviousLayer = new HashSet<string>();
|
||||
Layer prevLayer = null;
|
||||
foreach (var layer in model.layers)
|
||||
{
|
||||
foreach (var input in layer.inputs)
|
||||
if (prevLayer != null && input != prevLayer.name)
|
||||
allInputsExceptFromPreviousLayer.Add(input);
|
||||
prevLayer = layer;
|
||||
}
|
||||
|
||||
var allOutputs = new HashSet<string>();
|
||||
foreach (var output in model.outputs)
|
||||
allOutputs.Add(output);
|
||||
foreach (var memory in model.memories)
|
||||
allOutputs.Add(memory.output);
|
||||
allOutputs.Add(GetDefaultOutputName(model));
|
||||
|
||||
var requireStorage = new HashSet<Layer>();
|
||||
foreach (var layer in model.layers)
|
||||
{
|
||||
// loading constant tensor requires storage
|
||||
if (layer.type == Layer.Type.Load)
|
||||
requireStorage.Add(layer);
|
||||
|
||||
// @TBD: implement safety check that ensures Nop never has input
|
||||
// otherwise it has to be treated as Load operation
|
||||
if (layer.type == Layer.Type.Nop)
|
||||
requireStorage.Add(layer);
|
||||
|
||||
if (allInputsExceptFromPreviousLayer.Contains(layer.name) ||
|
||||
allOutputs.Contains(layer.name))
|
||||
requireStorage.Add(layer);
|
||||
}
|
||||
|
||||
return requireStorage;
|
||||
}
|
||||
|
||||
public static HashSet<Layer> FindUpstreamLayers(Model model, string[] outputs)
|
||||
{
|
||||
// TODO: replace with var layersByName = model.layers.ToDictionary(i => i.name, i => i);
|
||||
var layersByName = new Dictionary<string, Layer>();
|
||||
foreach (var l in model.layers)
|
||||
layersByName.Add(l.name, l);
|
||||
|
||||
var connected = new HashSet<Layer>();
|
||||
var layersToVisit = new HashSet<Layer>();
|
||||
foreach (var o in outputs)
|
||||
if (layersByName.ContainsKey(o))
|
||||
{
|
||||
layersToVisit.Add(layersByName[o]);
|
||||
connected.Add(layersByName[o]);
|
||||
}
|
||||
|
||||
while (layersToVisit.Count > 0)
|
||||
{
|
||||
var visitNext = new HashSet<Layer>();
|
||||
foreach (var l in layersToVisit)
|
||||
foreach (var i in l.inputs)
|
||||
if (layersByName.ContainsKey(i))
|
||||
{
|
||||
visitNext.Add(layersByName[i]);
|
||||
connected.Add(layersByName[i]);
|
||||
}
|
||||
|
||||
layersToVisit = visitNext;
|
||||
}
|
||||
return connected;
|
||||
}
|
||||
|
||||
public static TensorShape FindLargestNecessaryTensorShape(Model model, IDictionary<string, TensorShape> inputShapes)
|
||||
{
|
||||
Profiler.BeginSample ("Barracuda.FindLargestNecessaryTensorShape");
|
||||
|
||||
var shapes = ListTemporaryTensorShapes(model, inputShapes);
|
||||
|
||||
var maxTensorShape = new TensorShape(1,1,1,1);
|
||||
foreach (var X in shapes)
|
||||
if (X?.length > maxTensorShape.length)
|
||||
maxTensorShape = X.Value;
|
||||
|
||||
Profiler.EndSample ();
|
||||
|
||||
return maxTensorShape;
|
||||
}
|
||||
|
||||
public static TensorShape FindLargestArgumentTensorShape(Model model)
|
||||
{
|
||||
TensorShape maxTensorShape = new TensorShape(1,1,1,1);
|
||||
foreach (var layer in model.layers)
|
||||
foreach (var arg in layer.datasets)
|
||||
if (arg.shape.length > maxTensorShape.length)
|
||||
maxTensorShape = arg.shape;
|
||||
|
||||
return maxTensorShape;
|
||||
}
|
||||
|
||||
public static string[] FindUnusedLayers(Model model)
|
||||
{
|
||||
var layerUsageByName = model.layers.ToDictionary(i => i.name, i => false);
|
||||
foreach (var layer in model.layers)
|
||||
{
|
||||
if (layer.flags.HasFlag(Layer.Flags.Preserve))
|
||||
layerUsageByName[layer.name] = true;
|
||||
|
||||
foreach (var i in layer.inputs)
|
||||
{
|
||||
layerUsageByName[i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var o in model.outputs)
|
||||
{
|
||||
layerUsageByName[o] = true;
|
||||
}
|
||||
|
||||
foreach (var mem in model.memories)
|
||||
{
|
||||
layerUsageByName[mem.output] = true;
|
||||
}
|
||||
|
||||
return layerUsageByName.Where(keyValue => !keyValue.Value).Select(keyValue => keyValue.Key).ToArray();
|
||||
}
|
||||
|
||||
private static string[] FindBrokenLinks(Model model, HashSet<string> links)
|
||||
{
|
||||
var allVariables = new HashSet<string>(model.layers.Select(i => i.name));
|
||||
var globalInputs = new HashSet<string>(model.inputs.Select(i => i.name));
|
||||
var memoryInputs = new HashSet<string>(model.memories.Select(i => i.input));
|
||||
allVariables.UnionWith(globalInputs);
|
||||
allVariables.UnionWith(memoryInputs);
|
||||
|
||||
var brokenLinks = links;
|
||||
brokenLinks.ExceptWith(allVariables);
|
||||
return brokenLinks.ToArray();
|
||||
}
|
||||
|
||||
private static string[] FindBrokenLinks(Model model, string[] links)
|
||||
{
|
||||
return FindBrokenLinks(model, new HashSet<string>(links));
|
||||
}
|
||||
|
||||
public static string[] FindBrokenLinks(Model model)
|
||||
{
|
||||
// check global outputs
|
||||
var linksToInspect = new HashSet<string>(model.outputs);
|
||||
|
||||
// and all layers
|
||||
foreach (var layer in model.layers)
|
||||
foreach (var i in layer.inputs)
|
||||
linksToInspect.Add(i);
|
||||
|
||||
return FindBrokenLinks(model, linksToInspect);
|
||||
}
|
||||
|
||||
public static string[] FindUnconnectedInputs(Model model)
|
||||
{
|
||||
var unconnected = model.inputs.ToDictionary(i => i.name, i => true);
|
||||
|
||||
// check global outputs
|
||||
foreach (var o in model.outputs)
|
||||
unconnected.Remove(o);
|
||||
|
||||
// and all layers
|
||||
foreach (var layer in model.layers)
|
||||
foreach (var i in layer.inputs)
|
||||
unconnected.Remove(i);
|
||||
|
||||
return unconnected.Keys.ToArray();
|
||||
}
|
||||
|
||||
public static string[] FindLayerOutputs(Model model, string layerName)
|
||||
{
|
||||
var allVariables = model.layers.Where(x => x.inputs.Contains(layerName)).Select(x => x.name);
|
||||
var globalOutputs = model.outputs.Where(x => x == layerName); ;
|
||||
|
||||
allVariables.Union(globalOutputs);
|
||||
|
||||
return allVariables.ToArray();
|
||||
}
|
||||
|
||||
static public string[] FindUnconnectedOutputs(Model model)
|
||||
{
|
||||
return FindBrokenLinks(model, model.outputs.ToArray());
|
||||
}
|
||||
|
||||
public static bool IsLayerBroacastable(Layer layer)
|
||||
{
|
||||
return layer.type == Layer.Type.Add ||
|
||||
layer.type == Layer.Type.Sub ||
|
||||
layer.type == Layer.Type.Mul ||
|
||||
layer.type == Layer.Type.Div ||
|
||||
layer.type == Layer.Type.Pow ||
|
||||
layer.type == Layer.Type.Min ||
|
||||
layer.type == Layer.Type.Max ||
|
||||
layer.type == Layer.Type.Mean ||
|
||||
layer.type == Layer.Type.Greater ||
|
||||
layer.type == Layer.Type.GreaterEqual ||
|
||||
layer.type == Layer.Type.Less ||
|
||||
layer.type == Layer.Type.LessEqual ||
|
||||
layer.type == Layer.Type.Equal ||
|
||||
layer.type == Layer.Type.LogicalOr ||
|
||||
layer.type == Layer.Type.LogicalAnd ||
|
||||
layer.type == Layer.Type.LogicalXor ||
|
||||
layer.type == Layer.Type.Where ||
|
||||
layer.type == Layer.Type.Concat;
|
||||
}
|
||||
public static bool IsLayerBroadcastSkippable(Layer layer)
|
||||
{
|
||||
if(layer.type == Layer.Type.ConstantOfShape)
|
||||
{
|
||||
// dynamic shape support
|
||||
if (layer.axis != 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Allow some unknown input dimension for shape inference pass
|
||||
// for now batch does not yield problematic shape inference, so allow for unkown batch
|
||||
public static bool IsInputShapeAcceptablyKnowForShapeInference(Model.Input input) // acceptable unknown shape : N
|
||||
{
|
||||
for (int i = 0; i < input.shape.Length; i++)
|
||||
{
|
||||
var x = input.shape[i];
|
||||
if (x <= 0 && i != TensorShape.DataBatch)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static bool DoesTransposeChangeTensorLayout(TensorShape shape, int[] permutations)
|
||||
{
|
||||
var activeDimLayout = new List<int>();
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
if (shape[i] != 1)
|
||||
activeDimLayout.Add(i);
|
||||
}
|
||||
|
||||
if (permutations.Length == 4)
|
||||
permutations = TensorExtensions.Get8DPermutationsForNHWCPermutationsAndShape(shape, permutations);
|
||||
|
||||
var transposedLayout = TensorExtensions.Permute(new[] { 0, 1, 2, 3, 4, 5, 6, 7 }, permutations);
|
||||
var permutedShape = shape.Permute(permutations);
|
||||
var premutedActiveDimLayout = new List<int>();
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
if (permutedShape[i] != 1)
|
||||
premutedActiveDimLayout.Add(transposedLayout[i]);
|
||||
}
|
||||
|
||||
return activeDimLayout.SequenceEqual(premutedActiveDimLayout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
@@ -1,11 +0,0 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 58838262534854657974303d5782ea38
|
||||
MonoImporter:
|
||||
externalObjects: {}
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -1,253 +0,0 @@
|
||||
#if ENABLE_BARRACUDA_STATS
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using UnityEngine;
|
||||
using UnityEngine.Assertions;
|
||||
|
||||
namespace Unity.Barracuda {
|
||||
|
||||
public readonly struct DispatchInfo
|
||||
{
|
||||
public readonly string backend;
|
||||
public readonly string kernel;
|
||||
public readonly int workItemsX;
|
||||
public readonly int workItemsY;
|
||||
public readonly int workItemsZ;
|
||||
|
||||
public DispatchInfo(string backend, string kernel, int workItemsX, int workItemsY, int workItemsZ)
|
||||
{
|
||||
this.backend = backend;
|
||||
this.kernel = kernel;
|
||||
this.workItemsX = workItemsX;
|
||||
this.workItemsY = workItemsY;
|
||||
this.workItemsZ = workItemsZ;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{backend}:{kernel}({workItemsX},{workItemsY},{workItemsZ})";
|
||||
}
|
||||
|
||||
internal static DispatchInfo CreateFromComputeFunc(ComputeFunc computeFunc, int x, int y, int z)
|
||||
{
|
||||
var backend = computeFunc.computeShaderContext==ComputeShaderContext.Reference?"REF":"OPT";
|
||||
return new DispatchInfo(backend, computeFunc.kernelName, x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
public class LayerExecutionReport
|
||||
{
|
||||
public string LayerType { get; }
|
||||
public string LayerName { get; }
|
||||
public string DispatchInfos { get; private set; }
|
||||
public string Summary { get; private set; }
|
||||
public long NumAlu { get; private set; }
|
||||
public long NumBytes { get; private set; }
|
||||
|
||||
internal LayerExecutionReport(Layer l)
|
||||
{
|
||||
LayerType = l.type + ((l.type == Layer.Type.Activation) ? ("." + l.activation) : "");
|
||||
LayerName = l.name;
|
||||
Summary = "";
|
||||
DispatchInfos = "";
|
||||
NumAlu = 0;
|
||||
NumBytes = 0;
|
||||
}
|
||||
|
||||
internal void SetSummary(string message)
|
||||
{
|
||||
Summary = message;
|
||||
}
|
||||
|
||||
internal void SetALUAndMemStats(long alu, long bytes)
|
||||
{
|
||||
NumAlu = alu;
|
||||
NumBytes = bytes;
|
||||
}
|
||||
|
||||
internal void AddDispatch(DispatchInfo dispatchInfo)
|
||||
{
|
||||
if (DispatchInfos.Length != 0)
|
||||
DispatchInfos = DispatchInfos + " / ";
|
||||
DispatchInfos = DispatchInfos + dispatchInfo;
|
||||
}
|
||||
}
|
||||
|
||||
public class ModelExecutionReport
|
||||
{
|
||||
public List<LayerExecutionReport> CompletedLayerExecutionReports { get; }
|
||||
public LayerExecutionReport CurrentLayerExecutionReport { get; private set; }
|
||||
|
||||
internal ModelExecutionReport()
|
||||
{
|
||||
CompletedLayerExecutionReports = new List<LayerExecutionReport>();
|
||||
CurrentLayerExecutionReport = null;
|
||||
}
|
||||
|
||||
internal void LayerExecutionStarted(Layer layer)
|
||||
{
|
||||
Assert.IsNull(CurrentLayerExecutionReport);
|
||||
CurrentLayerExecutionReport = new LayerExecutionReport(layer);
|
||||
}
|
||||
|
||||
internal void LayerExecutionCompleted()
|
||||
{
|
||||
CompletedLayerExecutionReports.Add(CurrentLayerExecutionReport);
|
||||
CurrentLayerExecutionReport = null;
|
||||
}
|
||||
|
||||
internal void SetLayerSummary(string message)
|
||||
{
|
||||
Assert.IsNotNull(CurrentLayerExecutionReport);
|
||||
CurrentLayerExecutionReport.SetSummary(message);
|
||||
}
|
||||
|
||||
internal void SetLayerALUAndMemStats(long alu, long bytes)
|
||||
{
|
||||
Assert.IsNotNull(CurrentLayerExecutionReport);
|
||||
CurrentLayerExecutionReport.SetALUAndMemStats(alu, bytes);
|
||||
}
|
||||
|
||||
internal void AddLayerDispatch(DispatchInfo dispatchInfo)
|
||||
{
|
||||
Assert.IsNotNull(CurrentLayerExecutionReport);
|
||||
CurrentLayerExecutionReport.AddDispatch(dispatchInfo);
|
||||
}
|
||||
}
|
||||
|
||||
public class ModelExecutionsReporter : IModelExecutionsReporter
|
||||
{
|
||||
//Tabs separator make importing into spreadsheet software easy.
|
||||
public static readonly string SpreadSheetFieldSeparator = "\t";
|
||||
public static readonly string TextFormatFieldSeparator = " / ";
|
||||
public static readonly string TextIndentation = " ";
|
||||
|
||||
public List<ModelExecutionReport> CompletedModelExecutionReports { get; private set; }
|
||||
public ModelExecutionReport CurrentModelExecutionReport { get; private set; }
|
||||
public MemorySnapshotsReport MemorySnapshotsReport { get; private set; }
|
||||
|
||||
public ModelExecutionsReporter()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
public void Reset()
|
||||
{
|
||||
CompletedModelExecutionReports = new List<ModelExecutionReport>();
|
||||
CurrentModelExecutionReport = null;
|
||||
MemorySnapshotsReport = new MemorySnapshotsReport();
|
||||
}
|
||||
|
||||
public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
|
||||
{
|
||||
MemorySnapshotsReport.TakeMemorySnapshot(ops, vars, context, layer);
|
||||
}
|
||||
|
||||
public void ModelExecutionStarted()
|
||||
{
|
||||
Assert.IsNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport = new ModelExecutionReport();
|
||||
}
|
||||
|
||||
public void ModelExecutionCompleted()
|
||||
{
|
||||
CompletedModelExecutionReports.Add(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport = null;
|
||||
}
|
||||
|
||||
public void LayerExecutionStarted(Layer layer)
|
||||
{
|
||||
Assert.IsNotNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport.LayerExecutionStarted(layer);
|
||||
}
|
||||
|
||||
public void LayerExecutionCompleted()
|
||||
{
|
||||
Assert.IsNotNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport.LayerExecutionCompleted();
|
||||
}
|
||||
|
||||
public void SetLayerSummary(string message)
|
||||
{
|
||||
Assert.IsNotNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport.SetLayerSummary(message);
|
||||
}
|
||||
|
||||
public void SetLayerALUAndMemStats(long alu, long bytes)
|
||||
{
|
||||
Assert.IsNotNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport.SetLayerALUAndMemStats(alu, bytes);
|
||||
}
|
||||
|
||||
public void AddLayerDispatch(DispatchInfo dispatchInfo)
|
||||
{
|
||||
Assert.IsNotNull(CurrentModelExecutionReport);
|
||||
CurrentModelExecutionReport.AddLayerDispatch(dispatchInfo);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return GenerateStringReport(out var memoryPeakSummary, false);
|
||||
}
|
||||
|
||||
public string GenerateStringReport(out MemoryPeakSummary memoryPeakSummary, bool spreadsheetFormat)
|
||||
{
|
||||
var stringBuilder = new StringBuilder(1000);
|
||||
|
||||
//**************** MODEL EXECUTIONS REPORT - START ****************
|
||||
stringBuilder.Append($"**************** MODEL EXECUTIONS REPORT - START ****************\n");
|
||||
stringBuilder.Append($"Number of completed executions : {CompletedModelExecutionReports.Count}\n");
|
||||
if (CurrentModelExecutionReport != null)
|
||||
stringBuilder.Append("Warning: last model execution was not completed. It will be logged, but information might be incomplete.\n");
|
||||
stringBuilder.Append("\n");
|
||||
int i = 0;
|
||||
for (; i < CompletedModelExecutionReports.Count; ++i)
|
||||
{
|
||||
stringBuilder.Append($"--------- Execution index : {i} - START ---------\n");
|
||||
MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CompletedModelExecutionReports[i], spreadsheetFormat);
|
||||
stringBuilder.Append($"--------- Execution index : {i} - STOP ---------\n");
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
if (CurrentModelExecutionReport != null)
|
||||
{
|
||||
stringBuilder.Append($"--------- Uncompleted execution - START ---------\n");
|
||||
MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CurrentModelExecutionReport, spreadsheetFormat);
|
||||
stringBuilder.Append($"--------- Uncompleted execution - STOP ---------\n");
|
||||
stringBuilder.Append("\n");
|
||||
}
|
||||
stringBuilder.Append($"**************** MODEL EXECUTION REPORT - STOP ****************\n");
|
||||
stringBuilder.Append("\n");
|
||||
//**************** MODEL EXECUTIONS REPORT - STOP ****************
|
||||
|
||||
//**************** MEMORY SNAPSHOTS REPORTS - START ****************
|
||||
memoryPeakSummary = MemorySnapshotsReport.GenerateStringReport(stringBuilder, spreadsheetFormat);
|
||||
//**************** MEMORY SNAPSHOTS REPORTS - STOP ****************
|
||||
|
||||
return stringBuilder.ToString();
|
||||
}
|
||||
|
||||
#if UNITY_EDITOR
|
||||
public static string ToTextFile(IModelExecutionsReporter report, bool spreadsheetFormat, out MemoryPeakSummary memoryPeakSummary, string filename = null)
|
||||
{
|
||||
string stringToSave = report.GenerateStringReport(out memoryPeakSummary, spreadsheetFormat);
|
||||
string fullPath = Application.temporaryCachePath;
|
||||
if (filename == null)
|
||||
{
|
||||
fullPath = Path.Combine(fullPath, "ModelExecutionReport");
|
||||
fullPath = Path.ChangeExtension(fullPath, "txt");
|
||||
}
|
||||
else
|
||||
{
|
||||
fullPath = Path.Combine(fullPath, filename);
|
||||
}
|
||||
File.WriteAllText(fullPath, stringToSave);
|
||||
return fullPath;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Unity.Barracuda
|
||||
|
||||
#endif //ENABLE_BARRACUDA_STATS
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user