Resolve WES-100 "Natml integration"

This commit is contained in:
Jelle De Geest
2023-04-03 14:14:49 +00:00
parent edf1805a92
commit f95e34c6fe
425 changed files with 525 additions and 96655 deletions

View File

@@ -2,7 +2,8 @@
"name": "InterfacesScripts",
"rootNamespace": "",
"references": [
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a"
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
"GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
],
"includePlatforms": [],
"excludePlatforms": [],

View File

@@ -1,7 +1,6 @@
fileFormatVersion: 2
guid: 7f2d0ee6dd21e1d4eb25b71b7a749d25
folderAsset: yes
DefaultImporter:
AssemblyDefinitionImporter:
externalObjects: {}
userData:
assetBundleName:

View File

@@ -6,6 +6,6 @@ using UnityEngine;
/// </summary>
public enum ModelIndex
{
FINGERSPELLING,
NONE
NONE,
FINGERSPELLING
}

View File

@@ -1,8 +1,7 @@
using NatML;
using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.Barracuda;
/// <summary>
/// This scriptable will hold tupples of Courseindices and models
/// </summary>
@@ -22,28 +21,49 @@ public class ModelList : ScriptableObject
/// <summary>
/// The model itself
/// </summary>
public NNModel model;
public MLModelData modelWINDOWS;
/// <summary>
/// The model itself
/// </summary>
public MLModelData modelMAC;
}
/// <summary>
/// Index of the currently active model
/// </summary>
public int currentModelIndex = 0;
/// <summary>
/// A list of all the models
/// </summary>
public List<ModelTuple> models = new List<ModelTuple>();
/// <summary>
/// Index of the currently active model
/// </summary>
public int currentModelIndex = 0;
/// <summary>
/// Get a model by modelindex
/// </summary>
/// <param name="modelIndex">ModelIndex of the model</param>
/// <returns>Model associated with this index, null if no model was found</returns>
public NNModel GetCurrentModel()
public MLModelData GetCurrentModel()
{
return models.Find(x => x.model == models[currentModelIndex].model)?.model;
// Select Model based on OS
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
return models.Find(x => x.modelWINDOWS == models[currentModelIndex].modelWINDOWS)?.modelWINDOWS;
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
return models.Find(x => x.modelMAC == models[currentModelIndex].modelMAC)?.modelMAC;
#endif
return null;
}
/// <summary>
/// Function to check if the modelIndex has been set
/// </summary>
/// <returns></returns>
public bool HasValidModel()
{
return models[currentModelIndex].index != (int)ModelIndex.NONE;
}
/// <summary>

View File

@@ -27,6 +27,7 @@ public class Theme : ScriptableObject
/// </summary>
public ModelIndex modelIndex;
/// <summary>
/// List of all learnable words/letters
/// </summary>

View File

@@ -7,10 +7,4 @@ ScriptedImporter:
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
optimizeModel: 1
forceArbitraryBatchSize: 1
treatErrorsAsWarnings: 0
importMode: 1
weightsTypeMode: 0
activationTypeMode: 0
script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}

Binary file not shown.

View File

@@ -0,0 +1,10 @@
fileFormatVersion: 2
guid: fdbf401e965a6bf4a87637cd519f2715
ScriptedImporter:
internalIDToNameTable: []
externalObjects: {}
serializedVersion: 2
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}

View File

@@ -15,7 +15,7 @@ MonoBehaviour:
title: Handalfabet
description: Van A tot Z
index: 0
model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
modelIndex: 1
learnables:
- name: A
image: {fileID: 21300000, guid: 4eb4ef55f866f114dafb722f4bd05c76, type: 3}

View File

@@ -6,8 +6,8 @@
"UnityEditor.TestRunner",
"CommonScripts",
"InterfacesScripts",
"Unity.Barracuda",
"SignPredictor"
"SignPredictor",
"NatML.ML"
],
"includePlatforms": [
"Editor"

View File

@@ -1,5 +1,5 @@
using NatML;
using NUnit.Framework;
using Unity.Barracuda;
using UnityEngine;
/// <summary>
/// Test the ModelList class
@@ -45,7 +45,11 @@ public class ModelListTest
ModelIndex value = (ModelIndex)random.Next(modelList.models.Count);
modelList.SetCurrentModel(value);
Assert.AreEqual(modelList.models[modelList.currentModelIndex].model, modelList.GetCurrentModel());
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelWINDOWS, modelList.GetCurrentModel());
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelMAC, modelList.GetCurrentModel());
#endif
// Check if empty model fails gracefully (returns null)
Assert.IsNull(ScriptableObject.CreateInstance<ModelList>().GetCurrentModel());
@@ -69,7 +73,11 @@ public class ModelListTest
ModelList.ModelTuple m = modelList.models[modelList.currentModelIndex];
Assert.AreEqual(m.index, value);
Assert.IsTrue(m.model is NNModel || m.model is null);
#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
Assert.IsTrue(m.modelWINDOWS is MLModelData || m.modelWINDOWS is null);
#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
Assert.IsTrue(m.modelMAC is MLModelData || m.modelMAC is null);
#endif
}
}
ModelList emptyList = ScriptableObject.CreateInstance<ModelList>();

View File

@@ -5,8 +5,9 @@
"Unity.TextMeshPro",
"AccountsScripts",
"InterfacesScripts",
"Tween",
"SignPredictor"
"SignPredictor",
"NatML.ML",
"Tween"
],
"includePlatforms": [],
"excludePlatforms": [],

View File

@@ -152,8 +152,7 @@ public class CoursesController : AbstractFeedback
void Start()
{
StartCourseController();
signPredictor.SetModel(course.theme.modelIndex);
signPredictor.ChangeModel(course.theme.modelIndex);
AddSelfAsListener();
}
/// <summary>

View File

@@ -38,7 +38,7 @@ RenderSettings:
m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.37311918, g: 0.3807398, b: 0.35872716, a: 1}
m_IndirectSpecularColor: {r: 0.37311953, g: 0.38074014, b: 0.3587274, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:
@@ -6416,472 +6416,3 @@ CanvasRenderer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2039368310}
m_CullTransparentMesh: 1
--- !u!114 &5233312447201393291
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447201393293}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 67db9e8f0e2ae9c40bc1e2b64352a6b4, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Navigation:
m_Mode: 3
m_WrapAround: 0
m_SelectOnUp: {fileID: 0}
m_SelectOnDown: {fileID: 0}
m_SelectOnLeft: {fileID: 0}
m_SelectOnRight: {fileID: 0}
m_Transition: 1
m_Colors:
m_NormalColor: {r: 1, g: 1, b: 1, a: 1}
m_HighlightedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
m_PressedColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 1}
m_SelectedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
m_DisabledColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 0.5019608}
m_ColorMultiplier: 1
m_FadeDuration: 0.1
m_SpriteState:
m_HighlightedSprite: {fileID: 0}
m_PressedSprite: {fileID: 0}
m_SelectedSprite: {fileID: 0}
m_DisabledSprite: {fileID: 0}
m_AnimationTriggers:
m_NormalTrigger: Normal
m_HighlightedTrigger: Highlighted
m_PressedTrigger: Pressed
m_SelectedTrigger: Selected
m_DisabledTrigger: Disabled
m_Interactable: 1
m_TargetGraphic: {fileID: 0}
m_FillRect: {fileID: 5233312447919013132}
m_HandleRect: {fileID: 0}
m_Direction: 0
m_MinValue: 0
m_MaxValue: 1
m_WholeNumbers: 0
m_Value: 0
m_OnValueChanged:
m_PersistentCalls:
m_Calls: []
--- !u!224 &5233312447201393292
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447201393293}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 5233312448534255807}
- {fileID: 5233312448785575104}
m_Father: {fileID: 5233312447513285389}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 0}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 50}
m_Pivot: {x: 0.5, y: 0}
--- !u!1 &5233312447201393293
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312447201393292}
- component: {fileID: 5233312447201393291}
m_Layer: 5
m_Name: Progress
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &5233312447513285388
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447513285390}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 44e682a32ee15cc489bf50f3a06f717b, type: 3}
m_Name:
m_EditorClassIdentifier:
feedbackText: {fileID: 0}
feedbackProgress: {fileID: 0}
feedbackProgressImage: {fileID: 0}
signPredictor: {fileID: 1991376311}
--- !u!224 &5233312447513285389
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447513285390}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 5233312448025626847}
- {fileID: 5233312447201393292}
m_Father: {fileID: 0}
m_RootOrder: 5
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0}
m_AnchorMax: {x: 0.5, y: 0}
m_AnchoredPosition: {x: 960, y: 200}
m_SizeDelta: {x: 500, y: 150}
m_Pivot: {x: 0.5, y: 0}
--- !u!1 &5233312447513285390
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312447513285389}
- component: {fileID: 5233312447513285388}
m_Layer: 5
m_Name: Feedback
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!224 &5233312447919013132
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447919013135}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 5233312448785575104}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 0, y: 0}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 10, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!222 &5233312447919013133
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447919013135}
m_CullTransparentMesh: 1
--- !u!114 &5233312447919013134
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312447919013135}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Material: {fileID: 0}
m_Color: {r: 1, g: 0, b: 0, a: 1}
m_RaycastTarget: 1
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
m_Maskable: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_Sprite: {fileID: 10905, guid: 0000000000000000f000000000000000, type: 0}
m_Type: 1
m_PreserveAspect: 0
m_FillCenter: 1
m_FillMethod: 4
m_FillAmount: 1
m_FillClockwise: 1
m_FillOrigin: 0
m_UseSpriteMesh: 0
m_PixelsPerUnitMultiplier: 1
--- !u!1 &5233312447919013135
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312447919013132}
- component: {fileID: 5233312447919013133}
- component: {fileID: 5233312447919013134}
m_Layer: 5
m_Name: Fill
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!222 &5233312448025626832
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448025626834}
m_CullTransparentMesh: 1
--- !u!114 &5233312448025626833
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448025626834}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: f4688fdb7df04437aeb418b961361dc5, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Material: {fileID: 0}
m_Color: {r: 1, g: 1, b: 1, a: 1}
m_RaycastTarget: 1
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
m_Maskable: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_text: Detecteren ...
m_isRightToLeft: 0
m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
m_fontSharedMaterials: []
m_fontMaterial: {fileID: 0}
m_fontMaterials: []
m_fontColor32:
serializedVersion: 2
rgba: 4282188031
m_fontColor: {r: 0.5803922, g: 0.58431375, b: 0.6, a: 1}
m_enableVertexGradient: 0
m_colorMode: 3
m_fontColorGradient:
topLeft: {r: 1, g: 1, b: 1, a: 1}
topRight: {r: 1, g: 1, b: 1, a: 1}
bottomLeft: {r: 1, g: 1, b: 1, a: 1}
bottomRight: {r: 1, g: 1, b: 1, a: 1}
m_fontColorGradientPreset: {fileID: 0}
m_spriteAsset: {fileID: 0}
m_tintAllSprites: 0
m_StyleSheet: {fileID: 0}
m_TextStyleHashCode: -1183493901
m_overrideHtmlColors: 0
m_faceColor:
serializedVersion: 2
rgba: 4294967295
m_fontSize: 48
m_fontSizeBase: 48
m_fontWeight: 400
m_enableAutoSizing: 0
m_fontSizeMin: 18
m_fontSizeMax: 72
m_fontStyle: 1
m_HorizontalAlignment: 2
m_VerticalAlignment: 512
m_textAlignment: 65535
m_characterSpacing: 0
m_wordSpacing: 0
m_lineSpacing: 0
m_lineSpacingMax: 0
m_paragraphSpacing: 0
m_charWidthMaxAdj: 0
m_enableWordWrapping: 1
m_wordWrappingRatios: 0.4
m_overflowMode: 0
m_linkedTextComponent: {fileID: 0}
parentLinkedComponent: {fileID: 0}
m_enableKerning: 1
m_enableExtraPadding: 0
checkPaddingRequired: 0
m_isRichText: 1
m_parseCtrlCharacters: 1
m_isOrthographic: 1
m_isCullingEnabled: 0
m_horizontalMapping: 0
m_verticalMapping: 0
m_uvLineOffset: 0
m_geometrySortingOrder: 0
m_IsTextObjectScaleStatic: 0
m_VertexBufferAutoSizeReduction: 0
m_useMaxVisibleDescender: 1
m_pageToDisplay: 1
m_margin: {x: 0, y: 0, z: 0, w: 0}
m_isUsingLegacyAnimationComponent: 0
m_isVolumetricText: 0
m_hasFontAssetChanged: 0
m_baseMaterial: {fileID: 0}
m_maskOffset: {x: 0, y: 0, z: 0, w: 0}
--- !u!1 &5233312448025626834
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312448025626847}
- component: {fileID: 5233312448025626832}
- component: {fileID: 5233312448025626833}
m_Layer: 5
m_Name: Text
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!224 &5233312448025626847
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448025626834}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 5233312447513285389}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 1}
m_AnchorMax: {x: 0.5, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 500, y: 100}
m_Pivot: {x: 0.5, y: 1}
--- !u!1 &5233312448534255792
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312448534255807}
- component: {fileID: 5233312448534255805}
- component: {fileID: 5233312448534255806}
m_Layer: 5
m_Name: Background
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!222 &5233312448534255805
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448534255792}
m_CullTransparentMesh: 1
--- !u!114 &5233312448534255806
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448534255792}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Material: {fileID: 0}
m_Color: {r: 1, g: 1, b: 1, a: 1}
m_RaycastTarget: 1
m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
m_Maskable: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_Sprite: {fileID: 10907, guid: 0000000000000000f000000000000000, type: 0}
m_Type: 1
m_PreserveAspect: 0
m_FillCenter: 1
m_FillMethod: 4
m_FillAmount: 1
m_FillClockwise: 1
m_FillOrigin: 0
m_UseSpriteMesh: 0
m_PixelsPerUnitMultiplier: 1
--- !u!224 &5233312448534255807
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448534255792}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 5233312447201393292}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!224 &5233312448785575104
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5233312448785575105}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 5233312447919013132}
m_Father: {fileID: 5233312447201393292}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!1 &5233312448785575105
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5233312448785575104}
m_Layer: 5
m_Name: Fill Area
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1

View File

@@ -244,7 +244,7 @@ public class HangmanController : AbstractFeedback
{
StartController();
signPredictor.SetModel(ModelIndex.FINGERSPELLING);
signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
AddSelfAsListener();
}
/// <summary>

View File

@@ -74,12 +74,15 @@ public class KeypointManager
}
if (width > height){
delta_x = ((float)0.1)*width;
delta_y = delta_x + ((width - height)/2);
}else{
delta_y = ((float)0.1)*height;
delta_x = delta_y + ((height - width)/2);
if (width > height)
{
delta_x = ((float)0.1) * width;
delta_y = delta_x + ((width - height) / 2);
}
else
{
delta_y = ((float)0.1) * height;
delta_x = delta_y + ((height - width) / 2);
}
float starting_x = min_x - delta_x;
@@ -124,10 +127,10 @@ public class KeypointManager
float eye_left_x = pose_x[1];
float eye_left_y = pose_y[1];
float starting_x = shoulder_center_x - (bbox_size/2) * shoulder_distance;
float starting_y = eye_left_y - shoulder_distance/2;
float starting_x = shoulder_center_x - (bbox_size / 2) * shoulder_distance;
float starting_y = eye_left_y - shoulder_distance / 2;
float ending_x = shoulder_center_x + (bbox_size/2) * shoulder_distance;
float ending_x = shoulder_center_x + (bbox_size / 2) * shoulder_distance;
float ending_y = starting_y + (bbox_size - ((float)0.5)) * shoulder_distance;
float bbox_center_x = (starting_x + ending_x) / 2;

View File

@@ -15,6 +15,8 @@ MonoBehaviour:
currentModelIndex: 0
models:
- index: 0
model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
modelWINDOWS: {fileID: 0}
modelMAC: {fileID: 0}
- index: 1
model: {fileID: 0}
modelWINDOWS: {fileID: 8538825877217656561, guid: fdbf401e965a6bf4a87637cd519f2715, type: 3}
modelMAC: {fileID: 0}

View File

@@ -1,11 +1,5 @@
using DigitalRuby.Tween;
using Mediapipe.Unity.Tutorial;
using System;
using System.Collections;
using TMPro;
using UnityEngine;
using UnityEngine.Events;
using UnityEngine.UI;
/// <summary>
/// Class to display feedback during a course

View File

@@ -3,12 +3,12 @@
"rootNamespace": "",
"references": [
"GUID:6055be8ebefd69e48b49212b09b47b2f",
"GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
"GUID:04c4d86a70aa56c55a78c61f1ab1a56d",
"GUID:edc93f477bb73a743a97d6882ed330b3",
"GUID:58e104b97fb3752438ada2902a36dcbf",
"GUID:7f2d0ee6dd21e1d4eb25b71b7a749d25",
"GUID:f55a02e98b01bc849b30d9650ccd8f15"
"GUID:f55a02e98b01bc849b30d9650ccd8f15",
"GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
],
"includePlatforms": [],
"excludePlatforms": [],

View File

@@ -1,334 +1,362 @@
// Copyright (c) 2021 homuler
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.
// ATTENTION!: This code is for a tutorial.
using Mediapipe;
using Mediapipe.Unity;
using NatML;
using NatML.Features;
using NatML.Internal;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Unity.Barracuda;
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.UI;
namespace Mediapipe.Unity.Tutorial
/// <summary>
///
/// </summary>
public class SignPredictor : MonoBehaviour
{
public class SignPredictor : MonoBehaviour
/// <summary>
/// Predictor class which is used to predict the sign using an MLEdgeModel
/// </summary>
public class NatMLSignPredictor : IMLPredictor<List<float>>
{
/// <summary>
/// ModelList, used to change model using ModelIndex
/// The MLEdgeModel used for predictions
/// </summary>
public ModelList modelList;
private readonly MLEdgeModel edgeModel;
/// <summary>
/// Reference to the model info file
/// The type used to create features which are input for the model
/// </summary>
public TextAsset modelInfoFile;
private MLFeatureType featureType;
/// <summary>
/// Config file to set up the graph
/// Creation of a NatMLSignPredictor instance
/// </summary>
[SerializeField]
private TextAsset configAsset;
/// <summary>
/// Index to indicate which camera is being used
/// </summary>
private int camdex = 0;
/// <summary>
/// The screen object on which the video is displayed
/// </summary>
[SerializeField]
private RawImage screen;
/// <summary>
/// A secondary optional screen object on which the video is displayed
/// </summary>
[SerializeField]
private RawImage screen2;
/// <summary>
/// MediaPipe graph
/// </summary>
private CalculatorGraph graph;
/// <summary>
/// Resource manager for graph resources
/// </summary>
private ResourceManager resourceManager;
/// <summary>
/// Webcam texture
/// </summary>
private WebCamTexture webcamTexture;
/// <summary>
/// Input texture
/// </summary>
private Texture2D inputTexture;
/// <summary>
/// Screen pixel data
/// </summary>
private Color32[] pixelData;
/// <summary>
/// Stopwatch to give a timestamp to video frames
/// </summary>
private Stopwatch stopwatch;
/// <summary>
/// The mediapipe stream which contains the pose landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
/// <summary>
/// The mediapipe stream which contains the left hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
/// <summary>
/// The mediapipe stream which contains the right hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
/// <summary>
/// create precense stream
/// </summary>
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
/// <summary>
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
/// </summary>
private KeypointManager keypointManager;
/// <summary>
/// The worker on which we schedule the signpredictor model execution
/// </summary>
private IWorker worker;
/// <summary>
/// Width of th webcam
/// </summary>
private int width;
/// <summary>
/// Height of the webcam
/// </summary>
private int height;
/// <summary>
/// The enumerator of the worker which executes the sign predictor model
/// </summary>
private IEnumerator enumerator;
/// <summary>
/// The prediction of the sign predictor model
/// </summary>
public Dictionary<string, float> learnableProbabilities;
/// <summary>
/// Bool indicating whether or not the resource manager has already been initialized
/// </summary>
private static bool resourceManagerIsInitialized = false;
/// <summary>
/// an inputTensor for the sign predictor
/// </summary>
private Tensor inputTensor;
public List<Listener> listeners = new List<Listener>();
/// <summary>
/// Google Mediapipe setup & run
/// </summary>
/// <returns>IEnumerator</returns>
/// <exception cref="System.Exception"></exception>
private IEnumerator Start()
/// <param name="edgeModel"></param>
public NatMLSignPredictor(MLEdgeModel edgeModel)
{
// Webcam setup
if (WebCamTexture.devices.Length == 0)
{
throw new System.Exception("Web Camera devices are not found");
}
// Start the webcam
WebCamDevice webCamDevice = WebCamTexture.devices[0];
webcamTexture = new WebCamTexture(webCamDevice.name);
webcamTexture.Play();
yield return new WaitUntil(() => webcamTexture.width > 16);
// Set webcam aspect ratio
width = webcamTexture.width;
height = webcamTexture.height;
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
screen.texture = webcamTexture;
if (screen2 != null)
{
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
}
if (modelList.GetCurrentModel() != null)
{
// TODO this method is kinda meh you should use
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
pixelData = new Color32[width * height];
if (!resourceManagerIsInitialized)
{
resourceManager = new StreamingAssetsResourceManager();
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
yield return resourceManager.PrepareAssetAsync("handedness.txt");
resourceManagerIsInitialized = true;
}
stopwatch = new Stopwatch();
// Setting up the graph
graph = new CalculatorGraph(configAsset.text);
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
posestream.StartPolling().AssertOk();
leftstream.StartPolling().AssertOk();
rightstream.StartPolling().AssertOk();
graph.StartRun().AssertOk();
stopwatch.Start();
keypointManager = new KeypointManager(modelInfoFile);
// check if model exists at path
//var model = ModelLoader.Load(Resources.Load<NNModel>("Models/Fingerspelling/model_A-L"));
worker = modelList.GetCurrentModel().CreateWorker();
StartCoroutine(SignRecognitionCoroutine());
StartCoroutine(MediapipeCoroutine());
}
}
/// <summary>
/// Called at the start of course/Minigame, will set the model before the start of SIgnPredictor is called.
/// </summary>
/// <param name="index">The index of the model to be used</param>
public void SetModel(ModelIndex index)
{
this.modelList.SetCurrentModel(index);
this.edgeModel = edgeModel;
featureType = edgeModel.inputs[0];
}
/// <summary>
/// Coroutine which executes the mediapipe pipeline
/// Predicts the sign using the MLEdgeModel
/// </summary>
/// <param name="inputs"></param>
/// <returns></returns>
private IEnumerator MediapipeCoroutine()
public List<float> Predict(params MLFeature[] inputs)
{
while (true)
List<float> predictions = null;
IMLEdgeFeature iedgeFeature = (IMLEdgeFeature)inputs[0];
MLEdgeFeature edgeFeature = iedgeFeature.Create(featureType);
MLFeatureCollection<MLEdgeFeature> result = edgeModel.Predict(edgeFeature);
if (0 < result.Count)
{
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
//Debug.Log(Time.timeAsDouble + " Added new packet to mediapipe graph");
yield return new WaitForEndOfFrame();
NormalizedLandmarkList _poseLandmarks = null;
NormalizedLandmarkList _leftHandLandmarks = null;
NormalizedLandmarkList _rightHandLandmarks = null;
//Debug.Log("Extracting keypoints");
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks, false); return true; });
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks, false); return true; });
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks, false); return true; });
//Debug.Log(Time.timeAsDouble + " Retrieved landmarks ");
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
predictions = new MLArrayFeature<float>(result[0]).Flatten().ToArray().ToList();
predictions = predictions.ConvertAll((c) => Mathf.Exp(c));
float sum = predictions.Sum();
predictions = predictions.ConvertAll((c) => c / sum);
}
edgeFeature.Dispose();
result.Dispose();
return predictions;
}
/// <summary>
/// Coroutine which calls the sign predictor model
/// Disposing the MLEdgeModel
/// </summary>
/// <returns></returns>
private IEnumerator SignRecognitionCoroutine()
public void Dispose()
{
while (true)
edgeModel.Dispose();
}
}
public List<Listener> listeners = new List<Listener>();
/// <summary>
/// Predictor which is used to create the asyncPredictor (should not be used if asyncPredictor exists)
/// </summary>
private NatMLSignPredictor predictor;
/// <summary>
/// The asynchronous predictor which is used to predict the sign using an MLEdgemodel
/// </summary>
private MLAsyncPredictor<List<float>> asyncPredictor;
/// <summary>
/// Reference to the model used in the SignPredictor
/// </summary>
private MLEdgeModel model;
/// <summary>
/// Modellist used to change model using ModelIndex
/// </summary>
public ModelList modelList;
/// <summary>
/// Chosen model data based on the operating system
/// </summary>
private MLModelData modelData;
/// <summary>
/// Reference to the model info file
/// </summary>
public TextAsset modelInfoFile;
/// <summary>
/// Config file to set up the graph
/// </summary>
[SerializeField]
private TextAsset configAsset;
/// <summary>
/// Index to indicate which camera is being used
/// </summary>
private int camdex = 0;
/// <summary>
/// The screen object on which the video is displayed
/// </summary>
[SerializeField]
private RawImage screen;
/// <summary>
/// A secondary optional screen object on which the video is displayed
/// </summary>
[SerializeField]
private RawImage screen2;
/// <summary>
/// MediaPipe graph
/// </summary>
private CalculatorGraph graph;
/// <summary>
/// Resource manager for graph resources
/// </summary>
private ResourceManager resourceManager;
/// <summary>
/// Webcam texture
/// </summary>
private WebCamTexture webcamTexture;
/// <summary>
/// Input texture
/// </summary>
private Texture2D inputTexture;
/// <summary>
/// Screen pixel data
/// </summary>
private Color32[] pixelData;
/// <summary>
/// Stopwatch to give a timestamp to video frames
/// </summary>
private Stopwatch stopwatch;
/// <summary>
/// The mediapipe stream which contains the pose landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
/// <summary>
/// The mediapipe stream which contains the left hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
/// <summary>
/// The mediapipe stream which contains the right hand landmarks
/// </summary>
private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
/// <summary>
/// create precense stream
/// </summary>
public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
/// <summary>
/// A keypointmanager which does normalization stuff, keeps track of the landmarks
/// </summary>
private KeypointManager keypointManager;
/// <summary>
/// Width of th webcam
/// </summary>
private int width;
/// <summary>
/// Height of the webcam
/// </summary>
private int height;
/// <summary>
/// The prediction of the sign predictor model
/// </summary>
public Dictionary<string, float> learnableProbabilities;
/// <summary>
/// Bool indicating whether or not the resource manager has already been initialized
/// </summary>
private static bool resourceManagerIsInitialized = false;
/// <summary>
/// Google Mediapipe setup & run
/// </summary>
/// <returns>IEnumerator</returns>
/// <exception cref="System.Exception"></exception>
private IEnumerator Start()
{
// Webcam setup
if (WebCamTexture.devices.Length == 0)
{
throw new System.Exception("Web Camera devices are not found");
}
// Start the webcam
WebCamDevice webCamDevice = WebCamTexture.devices[0];
webcamTexture = new WebCamTexture(webCamDevice.name);
webcamTexture.Play();
yield return new WaitUntil(() => webcamTexture.width > 16);
// Set webcam aspect ratio
width = webcamTexture.width;
height = webcamTexture.height;
float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
screen.texture = webcamTexture;
if (screen2 != null)
{
screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
}
// TODO this method is kinda meh you should use
inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
pixelData = new Color32[width * height];
if (!resourceManagerIsInitialized)
{
resourceManager = new StreamingAssetsResourceManager();
yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
yield return resourceManager.PrepareAssetAsync("handedness.txt");
resourceManagerIsInitialized = true;
}
stopwatch = new Stopwatch();
// Setting up the graph
graph = new CalculatorGraph(configAsset.text);
posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
posestream.StartPolling().AssertOk();
leftstream.StartPolling().AssertOk();
rightstream.StartPolling().AssertOk();
graph.StartRun().AssertOk();
stopwatch.Start();
// Creating a KeypointManager
keypointManager = new KeypointManager(modelInfoFile);
// Check if a model is ready to load
yield return new WaitUntil(() => modelList.HasValidModel());
// Create Model
Task<MLEdgeModel> t = Task.Run(() => MLEdgeModel.Create(modelList.GetCurrentModel()));
yield return new WaitUntil(() => t.IsCompleted);
model = t.Result;
predictor = new NatMLSignPredictor(model);
asyncPredictor = predictor.ToAsync();
// Start the Coroutine
StartCoroutine(SignRecognitionCoroutine());
StartCoroutine(MediapipeCoroutine());
}
/// <summary>
/// Coroutine which executes the mediapipe pipeline
/// </summary>
/// <returns></returns>
private IEnumerator MediapipeCoroutine()
{
while (true)
{
inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
yield return new WaitForEndOfFrame();
NormalizedLandmarkList _poseLandmarks = null;
NormalizedLandmarkList _leftHandLandmarks = null;
NormalizedLandmarkList _rightHandLandmarks = null;
yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks); return true; });
yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks); return true; });
yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks); return true; });
keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
}
}
/// <summary>
/// Coroutine which calls the sign predictor model
/// </summary>
/// <returns></returns>
private IEnumerator SignRecognitionCoroutine()
{
while (true)
{
List<List<float>> inputData = keypointManager.GetKeypoints();
if (inputData != null && asyncPredictor.readyForPrediction)
{
List<List<float>> input = keypointManager.GetKeypoints();
if (input != null)
// Getting the size of the input data
int framecount = inputData.Count;
int keypointsPerFrame = inputData[0].Count;
// Creating ArrayFeature
int[] shape = { framecount, keypointsPerFrame };
float[] input = new float[framecount * keypointsPerFrame];
int i = 0;
inputData.ForEach((e) => e.ForEach((f) => input[i++] = f));
MLArrayFeature<float> feature = new MLArrayFeature<float>(input, shape);
// Predicting
Task<List<float>> task = Task.Run(async () => await asyncPredictor.Predict(feature));
yield return new WaitUntil(() => task.IsCompleted);
List<float> result = task.Result;
if (0 < result.Count)
{
//UnityEngine.Debug.Log("input: " + input.Count);
int frameCount = input.Count;
int keypoints_per_frame = input[0].Count;
// Create a tensor with the input
inputTensor = new Tensor(frameCount, keypoints_per_frame);
// Fill the tensor with the input
for (int i = 0; i < frameCount; i++)
{
for (int j = 0; j < keypoints_per_frame; j++)
{
inputTensor[i, j] = input[i][j];
}
}
int stepsPerFrame = 190;
enumerator = worker.StartManualSchedule(inputTensor);
int step = 0;
while (enumerator.MoveNext())
{
if (++step % stepsPerFrame == 0)
{
//Debug.Log(Time.timeAsDouble + " : " + step);
yield return null;
}
}
var output = worker.PeekOutput();
inputTensor.Dispose();
// Get the output as an array
float[] outputArray = output.ToReadOnlyArray();
//Debug.Log($"out = [{outputArray.Aggregate(" ", (t, f) => $"{t}{f} ")}]");
// Calculate the softmax of the output
float max = outputArray.Max();
float[] softmaxedOutput = outputArray.Select(x => Mathf.Exp(x - max)).ToArray();
float sum = softmaxedOutput.Sum();
float[] softmaxedOutput2 = softmaxedOutput.Select(x => x / sum).ToArray();
// Get the index of the highest probability
int maxIndex = softmaxedOutput2.ToList().IndexOf(softmaxedOutput2.Max());
// Get the letter from the index
char letter = (char)(maxIndex + 65);
float accuracy = (Mathf.RoundToInt(softmaxedOutput2[maxIndex] * 100));
// Set the letterProbabilities, currently used by Courses
learnableProbabilities = new Dictionary<string, float>();
for (int i = 0; i < softmaxedOutput2.Length; i++)
// Temporary fix
List<string> signs = new List<string>()
{
learnableProbabilities.Add(((char)(i + 65)).ToString(), softmaxedOutput2[i]);
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
};
for (int j = 0; j < result.Count; j++)
{
learnableProbabilities.Add(signs[j].ToUpper(), result[j]);
}
//Debug.Log($"prob = [{learnableProbabilities.Aggregate(" ", (t, kv) => $"{t}{kv.Key}:{kv.Value} ")}]");
foreach(Listener listener in listeners)
foreach (Listener listener in listeners)
{
yield return listener.ProcessIncomingCall();
}
@@ -339,77 +367,85 @@ namespace Mediapipe.Unity.Tutorial
yield return null;
}
}
}
/// <summary>
/// Propper destruction on the Mediapipegraph
/// </summary>
private void OnDestroy()
{
if (webcamTexture != null)
{
webcamTexture.Stop();
}
if (graph != null)
{
try
{
graph.CloseInputStream("input_video").AssertOk();
graph.WaitUntilDone().AssertOk();
}
finally
{
graph.Dispose();
}
}
// inputTensor must still be disposed, if it exists
inputTensor?.Dispose();
worker?.Dispose();
}
/// <summary>
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
/// </summary>
public void SwapCam()
{
if (WebCamTexture.devices.Length > 0)
{
// Stop the old camera
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
if (webcamTexture.isPlaying)
{
screen.texture = null;
webcamTexture.Stop();
webcamTexture = null;
}
// Find the new camera
camdex += 1;
camdex %= WebCamTexture.devices.Length;
// Start the new camera
WebCamDevice device = WebCamTexture.devices[camdex];
webcamTexture = new WebCamTexture(device.name);
screen.texture = webcamTexture;
webcamTexture.Play();
}
}
/// <summary>
/// Swaps the display screens
/// </summary>
public void SwapScreen()
{
if(screen2.texture == null && screen.texture != null)
{
screen2.texture = webcamTexture;
screen.texture = null;
}
else if (screen2.texture != null && screen.texture == null)
{
screen.texture = webcamTexture;
screen2.texture = null;
}
yield return null;
}
}
/// <summary>
/// Propper destruction on the Mediapipegraph
/// </summary>
private void OnDestroy()
{
if (webcamTexture != null)
{
webcamTexture.Stop();
}
if (graph != null)
{
try
{
graph.CloseInputStream("input_video").AssertOk();
graph.WaitUntilDone().AssertOk();
}
finally
{
graph.Dispose();
}
}
if (asyncPredictor != null)
{
asyncPredictor.Dispose();
}
}
/// <summary>
/// So long as there are cameras to use, you swap the camera you are using to another in the list.
/// </summary>
public void SwapCam()
{
if (WebCamTexture.devices.Length > 0)
{
// Stop the old camera
// If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
if (webcamTexture.isPlaying)
{
screen.texture = null;
webcamTexture.Stop();
webcamTexture = null;
}
// Find the new camera
camdex += 1;
camdex %= WebCamTexture.devices.Length;
// Start the new camera
WebCamDevice device = WebCamTexture.devices[camdex];
webcamTexture = new WebCamTexture(device.name);
screen.texture = webcamTexture;
webcamTexture.Play();
}
}
/// <summary>
/// Swaps the display screens
/// </summary>
public void SwapScreen()
{
if (screen2.texture == null && screen.texture != null)
{
screen2.texture = webcamTexture;
screen.texture = null;
}
else if (screen2.texture != null && screen.texture == null)
{
screen.texture = webcamTexture;
screen2.texture = null;
}
}
public void ChangeModel(ModelIndex index)
{
this.modelList.SetCurrentModel(index);
}
}

View File

@@ -179,7 +179,7 @@ public partial class SpellingBeeController : AbstractFeedback
{
StartController();
signPredictor.SetModel(currentTheme.modelIndex);
signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
AddSelfAsListener();
}
/// <summary>

View File

@@ -1,8 +0,0 @@
fileFormatVersion: 2
guid: f6ebab52a13ea425ba87006839f1d776
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,148 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Onnx;
using UnityEditor;
using UnityEngine.Analytics;
namespace Unity.Barracuda.Editor
{
internal class BarracudaAnalytics
{
static bool s_EventRegistered = false;
const int k_MaxEventsPerHour = 1000;
const int k_MaxNumberOfElements = 1000;
const string k_VendorKey = "unity.barracuda";
const string k_ImportEventName = "uBarracudaImport";
static bool EnableAnalytics()
{
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_ImportEventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
if (result == AnalyticsResult.Ok)
s_EventRegistered = true;
return s_EventRegistered;
}
struct BarracudaImportAnalyticsData
{
public string model_type;
public string original_layers;
public string imported_layers;
public string import_warnings;
}
public static void SendBarracudaImportEvent(object originalModel, Model importedModel)
{
//The event shouldn't be able to report if this is disabled but if we know we're not going to report
//Lets early out and not waste time gathering all the data
if (!EditorAnalytics.enabled)
return;
if (!EnableAnalytics())
return;
var data = new BarracudaImportAnalyticsData();
try
{
data.original_layers = AnalyzeONNXModel(originalModel);
data.imported_layers = AnalyzeNNModel(importedModel);
data.model_type = string.IsNullOrEmpty(data.original_layers) ? "NN" : "ONNX";
data.import_warnings = AnalyzeWarnings(importedModel);
}
catch (Exception e)
{
D.LogError($"Failed collecting Barracuda analytics: {e}");
}
EditorAnalytics.SendEventWithLimit(k_ImportEventName, data);
}
static string AnalyzeONNXModel(object originalModel)
{
if (!(originalModel is ModelProto))
return "";
var layers = new Dictionary<string, int>();
var onnxModel = originalModel as ModelProto;
foreach (var node in onnxModel.Graph.Node)
{
var layerDescription = node.OpType;
if (!layers.ContainsKey(layerDescription))
layers[layerDescription] = 1;
else
layers[layerDescription] += 1;
}
return DictionaryToJson(layers);
}
static string AnalyzeNNModel(Model importedModel)
{
var layers = new Dictionary<string, int>();
foreach (Layer layer in importedModel.layers)
{
var layerDescription = LayerToString(layer);
if (!layers.ContainsKey(layerDescription))
layers[layerDescription] = 1;
else
layers[layerDescription] += 1;
}
return DictionaryToJson(layers);
}
static string LayerToString(Layer layer)
{
var layerDescription = layer.type.ToString();
if (layer.type == Layer.Type.Conv2D || layer.type == Layer.Type.Conv2DTrans ||
layer.type == Layer.Type.Conv3D || layer.type == Layer.Type.Conv3DTrans ||
layer.type == Layer.Type.DepthwiseConv2D)
{
layerDescription += "_" + ConvShapeToString(layer);
}
if (layer.activation != Layer.Activation.None)
layerDescription += "_" + layer.activation.ToString();
return layerDescription;
}
static string ConvShapeToString(Layer layer)
{
if (layer.type == Layer.Type.Conv2D ||
layer.type == Layer.Type.DepthwiseConv2D ||
layer.type == Layer.Type.Conv2DTrans)
return string.Join("_",
layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
$"{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
if (layer.type == Layer.Type.Conv3D ||
layer.type == Layer.Type.Conv3DTrans)
return string.Join("_",
layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
$"{it.shape.kernelSpatialDepth}x{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
return "";
}
static string AnalyzeWarnings(Model importedModel)
{
return "[" + string.Join(",",importedModel.Warnings.Select(item => $"'{item.LayerName}:{item.Message}'")) + "]";
}
static string DictionaryToJson(Dictionary<string, int> dict)
{
var entries = dict.Select(d => $"\"{d.Key}\":{string.Join(",", d.Value)}");
return "{" + string.Join(",", entries) + "}";
}
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 92cb0e57f8c0c4255a2d2d93f844424d
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

View File

@@ -1,106 +0,0 @@
fileFormatVersion: 2
guid: 8682ff569c4c7457a8a8e3a527aad537
TextureImporter:
fileIDToRecycleName: {}
externalObjects: {}
serializedVersion: 4
mipmaps:
mipMapMode: 0
enableMipMap: 0
sRGBTexture: 0
linearTexture: 0
fadeOut: 0
borderMipMap: 0
mipMapsPreserveCoverage: 0
alphaTestReferenceValue: 0.5
mipMapFadeDistanceStart: 1
mipMapFadeDistanceEnd: 3
bumpmap:
convertToNormalMap: 0
externalNormalMap: 0
heightScale: 0.25
normalMapFilter: 0
isReadable: 0
grayScaleToAlpha: 0
generateCubemap: 6
cubemapConvolution: 0
seamlessCubemap: 0
textureFormat: 1
maxTextureSize: 2048
textureSettings:
serializedVersion: 2
filterMode: -1
aniso: 1
mipBias: -1
wrapU: 1
wrapV: 1
wrapW: -1
nPOTScale: 0
lightmap: 0
compressionQuality: 50
spriteMode: 0
spriteExtrude: 1
spriteMeshType: 1
alignment: 0
spritePivot: {x: 0.5, y: 0.5}
spritePixelsToUnits: 100
spriteBorder: {x: 0, y: 0, z: 0, w: 0}
spriteGenerateFallbackPhysicsShape: 1
alphaUsage: 1
alphaIsTransparency: 1
spriteTessellationDetail: -1
textureType: 2
textureShape: 1
maxTextureSizeSet: 0
compressionQualitySet: 0
textureFormatSet: 0
platformSettings:
- buildTarget: DefaultTexturePlatform
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- buildTarget: Standalone
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- buildTarget: iPhone
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- buildTarget: Android
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
spriteSheet:
serializedVersion: 2
sprites: []
outline: []
physicsShape: []
spritePackingTag:
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,63 +0,0 @@
using System.IO;
using Unity.Barracuda.Editor;
using UnityEditor;
using UnityEngine;
#if UNITY_2020_2_OR_NEWER
using UnityEditor.AssetImporters;
using UnityEditor.Experimental.AssetImporters;
#else
using UnityEditor.Experimental.AssetImporters;
#endif
namespace Unity.Barracuda
{
/// <summary>
/// Asset Importer of barracuda models.
/// </summary>
[ScriptedImporter(3, new[] {"nn"})]
public class NNModelImporter : ScriptedImporter {
private const string iconName = "NNModelIcon";
private Texture2D iconTexture;
/// <summary>
/// Scripted importer callback
/// </summary>
/// <param name="ctx">Asset import context</param>
public override void OnImportAsset(AssetImportContext ctx)
{
var model = File.ReadAllBytes(ctx.assetPath);
// Analyze model and send analytics if enabled
var nnModel = ModelLoader.Load(ctx.assetPath, skipWeights:true);
BarracudaAnalytics.SendBarracudaImportEvent(null, nnModel);
var assetData = ScriptableObject.CreateInstance<NNModelData>();
assetData.Value = model;
assetData.name = "Data";
assetData.hideFlags = HideFlags.HideInHierarchy;
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = assetData;
ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
ctx.AddObjectToAsset("model data", assetData);
ctx.SetMainObject(asset);
}
private Texture2D LoadIconTexture()
{
if (iconTexture == null)
{
string[] allCandidates = AssetDatabase.FindAssets(iconName);
if (allCandidates.Length > 0)
{
iconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
}
}
return iconTexture;
}
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 19ed1486aa27d4903b34839f37b8f69f
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

View File

@@ -1,165 +0,0 @@
fileFormatVersion: 2
guid: 44179f4142e33e24ca4feb8dfe55e56c
TextureImporter:
fileIDToRecycleName: {}
externalObjects: {}
serializedVersion: 9
mipmaps:
mipMapMode: 0
enableMipMap: 0
sRGBTexture: 1
linearTexture: 0
fadeOut: 0
borderMipMap: 0
mipMapsPreserveCoverage: 0
alphaTestReferenceValue: 0.5
mipMapFadeDistanceStart: 1
mipMapFadeDistanceEnd: 3
bumpmap:
convertToNormalMap: 0
externalNormalMap: 0
heightScale: 0.25
normalMapFilter: 0
isReadable: 0
streamingMipmaps: 0
streamingMipmapsPriority: 0
grayScaleToAlpha: 0
generateCubemap: 6
cubemapConvolution: 0
seamlessCubemap: 0
textureFormat: 1
maxTextureSize: 2048
textureSettings:
serializedVersion: 2
filterMode: -1
aniso: -1
mipBias: -100
wrapU: -1
wrapV: -1
wrapW: -1
nPOTScale: 1
lightmap: 0
compressionQuality: 50
spriteMode: 0
spriteExtrude: 1
spriteMeshType: 1
alignment: 0
spritePivot: {x: 0.5, y: 0.5}
spritePixelsToUnits: 100
spriteBorder: {x: 0, y: 0, z: 0, w: 0}
spriteGenerateFallbackPhysicsShape: 1
alphaUsage: 1
alphaIsTransparency: 0
spriteTessellationDetail: -1
textureType: 0
textureShape: 1
singleChannelComponent: 0
maxTextureSizeSet: 0
compressionQualitySet: 0
textureFormatSet: 0
platformSettings:
- serializedVersion: 2
buildTarget: DefaultTexturePlatform
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: Standalone
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: iPhone
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: tvOS
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: Android
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: PS4
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: Windows Store Apps
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- serializedVersion: 2
buildTarget: WebGL
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 0
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
spriteSheet:
serializedVersion: 2
sprites: []
outline: []
physicsShape: []
bones: []
spriteID:
vertices: []
indices:
edges: []
weights: []
spritePackingTag:
pSDRemoveMatte: 0
pSDShowRemoveMatteOption: 0
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,106 +0,0 @@
using UnityEngine;
using UnityEditor;
#if UNITY_2020_2_OR_NEWER
using UnityEditor.AssetImporters;
using UnityEditor.Experimental.AssetImporters;
#else
using UnityEditor.Experimental.AssetImporters;
#endif
using System;
using System.IO;
using System.Runtime.CompilerServices;
using Unity.Barracuda.Editor;
using Unity.Barracuda.ONNX;
[assembly: InternalsVisibleToAttribute("Barracuda.EditorTests")]
[assembly: InternalsVisibleToAttribute("Unity.Barracuda.Tests")]
namespace Unity.Barracuda
{
/// <summary>
/// Asset Importer for Open Neural Network Exchange (ONNX) files.
/// For more information about ONNX file format see: https://github.com/onnx/onnx
/// </summary>
[ScriptedImporter(34, new[] { "onnx" })]
public class ONNXModelImporter : ScriptedImporter
{
// Configuration
/// <summary>
/// Enable ONNX model optimization during import. Set via importer UI
/// </summary>
public bool optimizeModel = true;
/// <summary>
/// Fix batch size for ONNX models. Set via importer UI
/// </summary>
public bool forceArbitraryBatchSize = true;
/// <summary>
/// Treat errors as warnings. Set via importer UI
/// </summary>
public bool treatErrorsAsWarnings = false;
[SerializeField, HideInInspector]
internal ONNXModelConverter.ImportMode importMode = ONNXModelConverter.ImportMode.Standard;
[SerializeField, HideInInspector]
internal ONNXModelConverter.DataTypeMode weightsTypeMode = ONNXModelConverter.DataTypeMode.Default;
[SerializeField, HideInInspector]
internal ONNXModelConverter.DataTypeMode activationTypeMode = ONNXModelConverter.DataTypeMode.Default;
internal const string iconName = "ONNXModelIcon";
private Texture2D m_IconTexture;
/// <summary>
/// Scripted importer callback
/// </summary>
/// <param name="ctx">Asset import context</param>
public override void OnImportAsset(AssetImportContext ctx)
{
ONNXModelConverter.ModelImported += BarracudaAnalytics.SendBarracudaImportEvent;
var converter = new ONNXModelConverter(optimizeModel, treatErrorsAsWarnings, forceArbitraryBatchSize, importMode);
var model = converter.Convert(ctx.assetPath);
if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceHalf)
model.ConvertWeights(DataType.Half);
else if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceFloat)
model.ConvertWeights(DataType.Float);
NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
{
ModelWriter.Save(writer, model);
assetData.Value = memoryStream.ToArray();
}
assetData.name = "Data";
assetData.hideFlags = HideFlags.HideInHierarchy;
NNModel asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = assetData;
ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
ctx.AddObjectToAsset("model data", assetData);
ctx.SetMainObject(asset);
}
// Icon helper
private Texture2D LoadIconTexture()
{
if (m_IconTexture == null)
{
string[] allCandidates = AssetDatabase.FindAssets(iconName);
if (allCandidates.Length > 0)
{
m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
}
}
return m_IconTexture;
}
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 683b6cb6d0a474744822c888b46772c9
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,461 +0,0 @@
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using UnityEditor;
#if UNITY_2020_2_OR_NEWER
using UnityEditor.AssetImporters;
using UnityEditor.Experimental.AssetImporters;
#else
using UnityEditor.Experimental.AssetImporters;
#endif
using UnityEngine;
using System;
using System.IO;
using System.Reflection;
using Unity.Barracuda.ONNX;
using ImportMode=Unity.Barracuda.ONNX.ONNXModelConverter.ImportMode;
using DataTypeMode=Unity.Barracuda.ONNX.ONNXModelConverter.DataTypeMode;
namespace Unity.Barracuda.Editor
{
/// <summary>
/// Asset Importer Editor of ONNX models
/// </summary>
[CustomEditor(typeof(ONNXModelImporter))]
[CanEditMultipleObjects]
public class ONNXModelImporterEditor : ScriptedImporterEditor
{
static PropertyInfo s_InspectorModeInfo;
static ONNXModelImporterEditor()
{
s_InspectorModeInfo = typeof(SerializedObject).GetProperty("inspectorMode", BindingFlags.NonPublic | BindingFlags.Instance);
}
/// <summary>
/// Scripted importer editor UI callback
/// </summary>
public override void OnInspectorGUI()
{
var onnxModelImporter = target as ONNXModelImporter;
if (onnxModelImporter == null)
return;
InspectorMode inspectorMode = InspectorMode.Normal;
if (s_InspectorModeInfo != null)
inspectorMode = (InspectorMode)s_InspectorModeInfo.GetValue(assetSerializedObject);
serializedObject.Update();
bool debugView = inspectorMode != InspectorMode.Normal;
SerializedProperty iterator = serializedObject.GetIterator();
for (bool enterChildren = true; iterator.NextVisible(enterChildren); enterChildren = false)
{
if (iterator.propertyPath != "m_Script")
EditorGUILayout.PropertyField(iterator, true);
}
// Additional options exposed from ImportMode
SerializedProperty importModeProperty = serializedObject.FindProperty(nameof(onnxModelImporter.importMode));
bool skipMetadataImport = ((ImportMode)importModeProperty.intValue).HasFlag(ImportMode.SkipMetadataImport);
if (EditorGUILayout.Toggle("Skip Metadata Import", skipMetadataImport) != skipMetadataImport)
{
importModeProperty.intValue ^= (int)ImportMode.SkipMetadataImport;
}
if (debugView)
{
importModeProperty.intValue = (int)(ImportMode)EditorGUILayout.EnumFlagsField("Import Mode", (ImportMode)importModeProperty.intValue);
SerializedProperty weightsTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.weightsTypeMode));
SerializedProperty activationTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.activationTypeMode));
weightsTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Weights type", (DataTypeMode)weightsTypeMode.intValue);
activationTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Activation type", (DataTypeMode)activationTypeMode.intValue);
}
else
{
if (onnxModelImporter.optimizeModel)
EditorGUILayout.HelpBox("Model optimizations are on\nRemove and re-import model if you observe incorrect behavior", MessageType.Info);
if (onnxModelImporter.importMode == ImportMode.Legacy)
EditorGUILayout.HelpBox("Legacy importer is in use", MessageType.Warning);
}
serializedObject.ApplyModifiedProperties();
ApplyRevertGUI();
}
}
/// <summary>
/// Asset Importer Editor of NNModel (the serialized file generated by ONNXModelImporter)
/// </summary>
[CustomEditor(typeof(NNModel))]
public class NNModelEditor : UnityEditor.Editor
{
// Use a static store for the foldouts, so it applies to all inspectors
static Dictionary<string, bool> s_UIHelperFoldouts = new Dictionary<string, bool>();
private Model m_Model;
private List<string> m_Inputs = new List<string>();
private List<string> m_InputsDesc = new List<string>();
private List<string> m_Outputs = new List<string>();
private List<string> m_OutputsDesc = new List<string>();
private List<string> m_Memories = new List<string>();
private List<string> m_MemoriesDesc = new List<string>();
private List<string> m_Layers = new List<string>();
private List<string> m_LayersDesc = new List<string>();
private List<string> m_Constants = new List<string>();
private List<string> m_ConstantsDesc = new List<string>();
Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
Vector2 m_MetadataScrollPosition = Vector2.zero;
// warnings
private Dictionary<string, string> m_WarningsNeutral = new Dictionary<string, string>();
private Dictionary<string, string> m_WarningsInfo = new Dictionary<string, string>();
private Dictionary<string, string> m_WarningsWarning = new Dictionary<string, string>();
private Dictionary<string, string> m_WarningsError = new Dictionary<string, string>();
private Vector2 m_WarningsNeutralScrollPosition = Vector2.zero;
private Vector2 m_WarningsInfoScrollPosition = Vector2.zero;
private Vector2 m_WarningsWarningScrollPosition = Vector2.zero;
private Vector2 m_WarningsErrorScrollPosition = Vector2.zero;
private long m_NumEmbeddedWeights;
private long m_NumConstantWeights;
private long m_TotalWeightsSizeInBytes;
private Vector2 m_InputsScrollPosition = Vector2.zero;
private Vector2 m_OutputsScrollPosition = Vector2.zero;
private Vector2 m_MemoriesScrollPosition = Vector2.zero;
private Vector2 m_LayerScrollPosition = Vector2.zero;
private Vector2 m_ConstantScrollPosition = Vector2.zero;
private const float k_Space = 5f;
private Texture2D m_IconTexture;
private Texture2D LoadIconTexture()
{
if (m_IconTexture != null)
return m_IconTexture;
string[] allCandidates = AssetDatabase.FindAssets(ONNXModelImporter.iconName);
if (allCandidates.Length > 0)
m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
return m_IconTexture;
}
/// <summary>
/// Editor static preview rendering callback
/// </summary>
/// <param name="assetPath">Asset path</param>
/// <param name="subAssets">Child assets</param>
/// <param name="width">width</param>
/// <param name="height">height</param>
/// <returns></returns>
public override Texture2D RenderStaticPreview(string assetPath, UnityEngine.Object[] subAssets, int width, int height)
{
Texture2D icon = LoadIconTexture();
if (icon == null)
return null;
Texture2D tex = new Texture2D(width, height);
EditorUtility.CopySerialized(icon, tex);
return tex;
}
private void AddDimension(StringBuilder stringBuilder, string name, int value, bool lastDim=false)
{
string strValue = (value >= 1) ? value.ToString() : "*";
stringBuilder.AppendFormat("{0}:{1}", name, strValue);
if (!lastDim)
stringBuilder.Append(", ");
}
private string GetUIStringFromShape(int[] shape)
{
StringBuilder stringBuilder = new StringBuilder("shape: (", 50);
if (shape.Length == 8)
{
bool is8D = (shape[0] > 1 || shape[1] > 1 || shape[3] > 1 || shape[4] > 1);
if (is8D) AddDimension(stringBuilder, "s", shape[0]);
if (is8D) AddDimension(stringBuilder, "r", shape[1]);
AddDimension(stringBuilder, "n", shape[2]);
if (is8D) AddDimension(stringBuilder, "t", shape[3]);
if (is8D) AddDimension(stringBuilder, "d", shape[4]);
AddDimension(stringBuilder, "h", shape[5]);
AddDimension(stringBuilder, "w", shape[6]);
AddDimension(stringBuilder, "c", shape[7], true);
}
else
{
UnityEngine.Debug.Assert(shape.Length == 4);
AddDimension(stringBuilder, "n", shape[0]);
AddDimension(stringBuilder, "h", shape[1]);
AddDimension(stringBuilder, "w", shape[2]);
AddDimension(stringBuilder, "c", shape[3], true);
}
stringBuilder.Append(")");
return stringBuilder.ToString();
}
void OnEnable()
{
var nnModel = target as NNModel;
if (nnModel == null)
return;
if (nnModel.modelData == null)
return;
m_Model = nnModel.GetDeserializedModel();
if (m_Model == null)
return;
m_Inputs = m_Model.inputs.Select(i => i.name).ToList();
m_InputsDesc = m_Model.inputs.Select(i => GetUIStringFromShape(i.shape)).ToList();
m_Outputs = m_Model.outputs.ToList();
bool allKnownInputShapes = true;
var inputShapes = new Dictionary<string, TensorShape>();
foreach (var i in m_Model.inputs)
{
allKnownInputShapes = allKnownInputShapes && ModelAnalyzer.IsInputShapeAcceptablyKnowForShapeInference(i);
if (!allKnownInputShapes)
break;
inputShapes.Add(i.name, new TensorShape(i.shape));
}
if (allKnownInputShapes)
{
m_OutputsDesc = m_Model.outputs.Select(i => {
string output = "shape: (n:*, h:*, w:*, c:*)";
try
{
TensorShape shape;
if (ModelAnalyzer.TryGetOutputTensorShape(m_Model, inputShapes, i, out shape))
output = GetUIStringFromShape(shape.ToArray());
}
catch (Exception e)
{
Debug.LogError($"Unexpected error while evaluating model output {i}. {e}");
}
return output; }).ToList();
}
else
{
m_OutputsDesc = m_Model.outputs.Select(i => "shape: (n:*, h:*, w:*, c:*)").ToList();
}
m_Memories = m_Model.memories.Select(i => i.input).ToList();
m_MemoriesDesc = m_Model.memories.Select(i => $"shape:{i.shape.ToString()} output:{i.output}").ToList();
var layers = m_Model.layers.Where(i => i.type != Layer.Type.Load);
var constants = m_Model.layers.Where(i => i.type == Layer.Type.Load);
m_Layers = layers.Select(i => i.type.ToString()).ToList();
m_LayersDesc = layers.Select(i => i.ToString()).ToList();
m_Constants = constants.Select(i => i.type.ToString()).ToList();
m_ConstantsDesc = constants.Select(i => i.ToString()).ToList();
m_NumEmbeddedWeights = layers.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
m_NumConstantWeights = constants.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
// weights are not loaded for UI, recompute size
m_TotalWeightsSizeInBytes = 0;
for (var l = 0; l < m_Model.layers.Count; ++l)
for (var d = 0; d < m_Model.layers[l].datasets.Length; ++d)
m_TotalWeightsSizeInBytes += m_Model.layers[l].datasets[d].length * m_Model.layers[l].datasets[d].itemSizeInBytes;
m_Metadata = new Dictionary<string, string>(m_Model.Metadata);
for (int i = 0; i < m_Model.Warnings.Count; i++)
{
var warning = m_Model.Warnings[i].LayerName;
var warningDesc = m_Model.Warnings[i].Message;
MessageType messageType = MessageType.Warning;
if(warningDesc.StartsWith("MessageType"))
{
messageType = (MessageType)(warningDesc[12] - '0');
warningDesc = warningDesc.Substring(13);
}
switch (messageType)
{
case MessageType.None:
m_WarningsNeutral[warning] = warningDesc;
break;
case MessageType.Info:
m_WarningsInfo[warning] = warningDesc;
break;
case MessageType.Warning:
m_WarningsWarning[warning] = warningDesc;
break;
case MessageType.Error:
m_WarningsError[warning] = warningDesc;
break;
}
}
}
private void OpenNNModelAsTempFileButton(NNModel nnModel)
{
if (nnModel == null)
return;
if (nnModel.modelData == null)
return;
if (GUILayout.Button("Open imported NN model as temp file"))
{
string tempPath = Application.temporaryCachePath;
string filePath = Path.Combine(tempPath, nnModel.name);
string filePathWithExtension = Path.ChangeExtension(filePath, "nn");
File.WriteAllBytes(filePathWithExtension, nnModel.modelData.Value);
System.Diagnostics.Process.Start(filePathWithExtension);
}
}
/// <summary>
/// Editor UI rendering callback
/// </summary>
public override void OnInspectorGUI()
{
if (m_Model == null)
return;
// HACK: When inspector settings are applied and the file is re-imported there doesn't seem to be a clean way to
// get a notification from Unity, so we detect this change
var nnModel = target as NNModel;
if (nnModel && m_Model != nnModel.GetDeserializedModel())
OnEnable(); // Model data changed underneath while inspector was active, so reload
GUI.enabled = true;
OpenNNModelAsTempFileButton(nnModel);
GUILayout.Label($"Source: {m_Model.IrSource}");
GUILayout.Label($"Version: {m_Model.IrVersion}");
GUILayout.Label($"Producer Name: {m_Model.ProducerName}");
if (m_Metadata.Any())
{
ListUIHelper($"Metadata {m_Metadata.Count}",
m_Metadata.Keys.ToList(), m_Metadata.Values.ToList(), ref m_MetadataScrollPosition);
}
if(m_WarningsError.Any())
{
ListUIHelper($"Errors {m_WarningsError.Count.ToString()}", m_WarningsError.Keys.ToList(), m_WarningsError.Values.ToList(), ref m_WarningsErrorScrollPosition);
EditorGUILayout.HelpBox("Model contains errors. Behavior might be incorrect", MessageType.Error, true);
}
if(m_WarningsWarning.Any())
{
ListUIHelper($"Warnings {m_WarningsWarning.Count.ToString()}", m_WarningsWarning.Keys.ToList(), m_WarningsWarning.Values.ToList(), ref m_WarningsWarningScrollPosition);
EditorGUILayout.HelpBox("Model contains warnings. Behavior might be incorrect", MessageType.Warning, true);
}
if(m_WarningsInfo.Any())
{
ListUIHelper($"Information: ", m_WarningsInfo.Keys.ToList(), m_WarningsInfo.Values.ToList(), ref m_WarningsInfoScrollPosition);
EditorGUILayout.HelpBox("Model contains import information.", MessageType.Info, true);
}
if(m_WarningsNeutral.Any())
{
ListUIHelper($"Comments: ", m_WarningsNeutral.Keys.ToList(), m_WarningsNeutral.Values.ToList(), ref m_WarningsNeutralScrollPosition);
}
var constantWeightInfo = m_Constants.Count > 0 ? $" using {m_NumConstantWeights:n0} weights" : "";
ListUIHelper($"Inputs ({m_Inputs.Count})", m_Inputs, m_InputsDesc, ref m_InputsScrollPosition);
ListUIHelper($"Outputs ({m_Outputs.Count})", m_Outputs, m_OutputsDesc, ref m_OutputsScrollPosition);
ListUIHelper($"Memories ({m_Memories.Count})", m_Memories, m_MemoriesDesc, ref m_MemoriesScrollPosition);
ListUIHelper($"Layers ({m_Layers.Count} using {m_NumEmbeddedWeights:n0} embedded weights)", m_Layers, m_LayersDesc, ref m_LayerScrollPosition, m_Constants.Count == 0 ? 1.5f: 1f);
ListUIHelper($"Constants ({m_Constants.Count}{constantWeightInfo})", m_Constants, m_ConstantsDesc, ref m_ConstantScrollPosition);
GUILayout.Label($"Total weight size: {m_TotalWeightsSizeInBytes:n0} bytes");
}
private static void ListUIHelper(string sectionTitle, IReadOnlyList<string> names, IReadOnlyList<string> descriptions, ref Vector2 scrollPosition, float maxHeightMultiplier = 1f)
{
int n = names.Count();
UnityEngine.Debug.Assert(descriptions.Count == n);
if (descriptions.Count < n)
return;
GUILayout.Space(k_Space);
if (!s_UIHelperFoldouts.TryGetValue(sectionTitle, out bool foldout))
foldout = true;
foldout = EditorGUILayout.Foldout(foldout, sectionTitle, true, EditorStyles.foldoutHeader);
s_UIHelperFoldouts[sectionTitle] = foldout;
if (foldout)
{
// GUILayout.Label(sectionTitle, EditorStyles.boldLabel);
float height = Mathf.Min(n * 20f + 2f, 150f * maxHeightMultiplier);
if (n == 0)
return;
scrollPosition = GUILayout.BeginScrollView(scrollPosition, GUI.skin.box, GUILayout.MinHeight(height));
Event e = Event.current;
float lineHeight = 16.0f;
StringBuilder fullText = new StringBuilder();
fullText.Append(sectionTitle);
fullText.AppendLine();
for (int i = 0; i < n; ++i)
{
string name = names[i];
string description = descriptions[i];
fullText.Append($"{name} {description}");
fullText.AppendLine();
}
for (int i = 0; i < n; ++i)
{
Rect r = EditorGUILayout.GetControlRect(false, lineHeight);
string name = names[i];
string description = descriptions[i];
// Context menu, "Copy"
if (e.type == EventType.ContextClick && r.Contains(e.mousePosition))
{
e.Use();
var menu = new GenericMenu();
// need to copy current value to be used in delegate
// (C# closures close over variables, not their values)
menu.AddItem(new GUIContent($"Copy current line"), false, delegate
{
EditorGUIUtility.systemCopyBuffer = $"{name} {description}";
});
menu.AddItem(new GUIContent($"Copy section"), false, delegate
{
EditorGUIUtility.systemCopyBuffer = fullText.ToString();
});
menu.ShowAsContext();
}
// Color even line for readability
if (e.type == EventType.Repaint)
{
GUIStyle st = "CN EntryBackEven";
if ((i & 1) == 0)
st.Draw(r, false, false, false, false);
}
// layer name on the right side
Rect locRect = r;
locRect.xMax = locRect.xMin;
GUIContent gc = new GUIContent(name.ToString(CultureInfo.InvariantCulture));
// calculate size so we can left-align it
Vector2 size = EditorStyles.miniBoldLabel.CalcSize(gc);
locRect.xMax += size.x;
GUI.Label(locRect, gc, EditorStyles.miniBoldLabel);
locRect.xMax += 2;
// message
Rect msgRect = r;
msgRect.xMin = locRect.xMax;
GUI.Label(msgRect, new GUIContent(description.ToString(CultureInfo.InvariantCulture)), EditorStyles.miniLabel);
}
GUILayout.EndScrollView();
}
}
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 08ecb3218a86c6741aed5b2a299b203b
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,17 +0,0 @@
{
"name": "Unity.Barracuda.Editor",
"references": [
"Unity.Barracuda",
"Unity.Barracuda.ONNX"
],
"optionalUnityReferences": [],
"includePlatforms": [
"Editor"
],
"excludePlatforms": [],
"allowUnsafeCode": false,
"overrideReferences": false,
"precompiledReferences": [],
"autoReferenced": true,
"defineConstraints": []
}

View File

@@ -1,7 +0,0 @@
fileFormatVersion: 2
guid: 9f1e7d835703842dda0e25142ed6c3c9
AssemblyDefinitionImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,8 +0,0 @@
fileFormatVersion: 2
guid: a03a1fa0e3b784e19a9e9d31b945b252
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,8 +0,0 @@
fileFormatVersion: 2
guid: 5bec48e8f6ff349488387cf35fbae752
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,7 +0,0 @@
using System.Reflection;
// DON'T EDIT
// Will be replaced by Tools/Build/build.py
[assembly: AssemblyVersion("3.0.0.0")]
[assembly: AssemblyFileVersion("3.0.0.0")]

View File

@@ -1,3 +0,0 @@
fileFormatVersion: 2
guid: f7f9574517c146ada866c486dc392731
timeCreated: 1533296387

View File

@@ -1,8 +0,0 @@
fileFormatVersion: 2
guid: 12a6bedd18899cd4189f66d8188f29ff
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 67f00a1befd4144eca5685250d893f09
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,194 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq; // ToList()
using UnityEngine;
using UnityEngine.Assertions;
namespace Unity.Barracuda {
internal class BarracudaBackendsFactory
{
public static WorkerFactory.Type ResolveAutoType(WorkerFactory.Type type)
{
if (type != WorkerFactory.Type.Auto)
return type;
return GetBestTypeForDevice(WorkerFactory.Device.Auto);
}
internal static WorkerFactory.Type GetBestTypeForDevice(WorkerFactory.Device device)
{
switch (device)
{
case WorkerFactory.Device.Auto:
case WorkerFactory.Device.GPU:
return WorkerFactory.Type.ComputePrecompiled;
default:
return WorkerFactory.Type.CSharpBurst;
}
}
internal static WorkerFactory.Type ValidateType(WorkerFactory.Type type)
{
type = ResolveAutoType(type);
Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !ComputeShaderSingleton.Instance.supported)
{
type = WorkerFactory.Type.PixelShader;
}
return type;
}
private static IOps CreateOps(WorkerFactory.Type type, ITensorAllocator allocator, bool verbose)
{
switch(type)
{
case WorkerFactory.Type.ComputePrecompiled:
return new PrecompiledComputeOps(allocator, verbose);
case WorkerFactory.Type.Compute:
return new ComputeOps(allocator, verbose);
case WorkerFactory.Type.ComputeRef:
return new ReferenceComputeOps(allocator);
case WorkerFactory.Type.PixelShader:
return new PixelShaderOps(allocator);
case WorkerFactory.Type.CSharpBurst:
return new BurstCPUOps(allocator);
case WorkerFactory.Type.CSharp:
return new UnsafeArrayCPUOps(allocator);
default:
return new ReferenceCPUOps(allocator);
}
}
internal static IWorker CreateWorker(WorkerFactory.Type type, Model model, string[] additionalOutputs, string[] trimOutputs, WorkerFactory.WorkerConfiguration workerConfiguration, IModelExecutionsReporter modelExecutionsReporter = null)
{
type = ResolveAutoType(type);
var compareAgainstType = ResolveAutoType(workerConfiguration.compareAgainstType);
Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
Assert.AreNotEqual(compareAgainstType, WorkerFactory.Type.Auto);
bool compare = type != compareAgainstType;
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !SystemInfo.supportsComputeShaders && !Application.isEditor)
{
type = WorkerFactory.Type.PixelShader;
}
IVars vars;
// PixelShader worker uses Blit/Textures, cannot re-use vars unless the dispatch mechanism allows rendering to sub part of the texture
if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
vars = new GenericVarsWithReuse();
else
{
if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) || WorkerFactory.IsType(compareAgainstType, WorkerFactory.Device.GPU))
vars = new ComputeVarsWithSharedModel();
else
vars = new DefaultVars();
}
ITensorAllocator allocator = vars.GetAllocator();
if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
allocator = new TensorCachingByShapeAllocator();
if (workerConfiguration.verbose)
D.Log($"Storage type: {vars.GetType()}. Allocator type: {allocator.GetType()}.");
IOps ops = CreateOps(type, allocator, workerConfiguration.verbose);
if (compare)
ops = new CompareOps(ops,
CreateOps(compareAgainstType, allocator, workerConfiguration.verbose), workerConfiguration.compareLogLevel, workerConfiguration.compareEpsilon);
if (workerConfiguration.verbose || modelExecutionsReporter != null)
ops = new VerboseOps(ops, workerConfiguration.verbose);
if (Application.isEditor || modelExecutionsReporter != null)
ops = new StatsOps(ops);
model = ValidateModel(
PatchModel(model, additionalOutputs, trimOutputs));
ops.SetModelExecutionsReporter(modelExecutionsReporter);
return new GenericWorker(model, ops, vars, workerConfiguration.verbose, workerConfiguration.takeoverWeights);
}
internal static Model PatchModel(Model model, string[] additionalOutputs, string[] trimOutputs = null)
{
bool trimModel = trimOutputs != null;
if (trimOutputs != null)
{
foreach (var o in trimOutputs.Except(model.outputs))
if (additionalOutputs == null || !additionalOutputs.Contains(o))
D.LogWarning($"Output specified in trimOutputs was not found in the model: {o}");
var newModel = model.ShallowCopy();
newModel.outputs = trimOutputs.Intersect(model.outputs).ToList();
model = newModel;
}
if (additionalOutputs != null)
{
foreach (var o in additionalOutputs.Except(model.layers.Select(l => l.name)))
D.LogWarning($"Layer specified in additionalOutputs was not found in the model: {o}");
// 'new' means that output name does not yet exist in model.outputs
// 'valid' means that output name matches one of the existing model.layer names
var newAndValidAdditionalOutputs =
additionalOutputs.Except(model.outputs).Intersect(model.layers.Select(l => l.name));
var newModel = model.ShallowCopy();
newModel.outputs.AddRange(newAndValidAdditionalOutputs);
model = newModel;
}
if (trimModel)
{
var newModel = model.ShallowCopy();
var upstream = ModelAnalyzer.FindUpstreamLayers(model, newModel.outputs.ToArray());
foreach (var l in model.layers)
if (!upstream.Contains(l))
newModel.layers.Remove(l);
model = newModel;
}
model = ModelOptimizer.RemoveNoop(model);
return model;
}
internal static Model ValidateModel(Model model)
{
// validate, model contains no broken links
var brokenLinks = ModelAnalyzer.FindBrokenLinks(model);
if (brokenLinks.Length > 0)
D.LogWarning($"Model contains {brokenLinks.Length} broken links: {string.Join(",", brokenLinks)}");
// validate, all model outputs are unique
// https://stackoverflow.com/questions/18547354/c-sharp-linq-find-duplicates-in-list
var duplicateOutputs = model.outputs.GroupBy(x => x)
.Where(g => g.Count() > 1)
.Select(y => y.Key);
foreach (var o in duplicateOutputs)
D.LogWarning($"Output is specified more than once in the model: {o}");
// validate, model contains no unconnected layers
var unconnectedOutputs = ModelAnalyzer.FindUnconnectedOutputs(model);
foreach (var o in unconnectedOutputs)
D.LogWarning($"Layer is specified as output, but is missing in the model: {o}");
return model;
}
}
} // namespace Unity.Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 355dc370391814b1c874848bb843b91c
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,245 +0,0 @@
using System.Threading;
using UnityEngine;
using Unity.Jobs;
namespace Unity.Barracuda {
// BarracudaBurstCPU.Core.cs -- definition of class BurstCPUOps, Pin(), BurstTensorData
// BarracudaBurstCPU.Ops.cs -- impl. IOps, job schedulers
// BarracudaBurstCPU.Jobs.cs -- impl. jobs
/// <summary>
/// Burst specific internal `Tensor` data storage
/// </summary>
public class BurstTensorData : UnsafeArrayTensorData, IDependableTensorData
{
private JobHandle m_ReadFence;
private JobHandle m_WriteFence;
private bool m_SafeToDispose = true;
/// <inheritdoc/>
public JobHandle fence { get { return m_ReadFence; } set { m_ReadFence = value; m_WriteFence = value; m_SafeToDispose = false; } }
/// <inheritdoc/>
public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = BurstCPUOps.Dependencies(value, m_WriteFence); m_SafeToDispose = false; } }
/// <inheritdoc/>
public unsafe void* rawPtr => array.RawAddressAt(offset);
/// <summary>
/// Creates new array
/// </summary>
/// <param name="count">count</param>
public BurstTensorData(int count, DataType dataType) : base(count, dataType)
{
}
/// <summary>
/// Creates new array
/// </summary>
/// <param name="shape">shape</param>
public BurstTensorData(TensorShape shape, DataType dataType) : base(shape, dataType)
{
}
/// <summary>
/// Uses shared array
/// </summary>
/// <param name="sharedArray">shared array</param>
public BurstTensorData(ArrayTensorData sharedArray) : base(sharedArray)
{
}
/// <summary>
/// Uses shared array
/// </summary>
/// <param name="sharedArray">shared array</param>
public BurstTensorData(SharedArrayTensorData sharedArray) : base(sharedArray)
{
}
/// <summary>
/// Uses unsafe array
/// </summary>
/// <param name="unsafeArray">unsafe array</param>
public BurstTensorData(UnsafeArrayTensorData unsafeArray) : base(unsafeArray.array, unsafeArray.offset, unsafeArray.count, unsafeArray.m_Readonly)
{
}
/// <summary>
/// Finalizer
/// </summary>
~BurstTensorData()
{
if (!m_SafeToDispose)
D.LogWarning($"Found unreferenced, but undisposed Tensor data that potentially participates in an unfinished job and might lead to hazardous memory overwrites: {ToString()}");
}
/// <summary>
/// Dispose contents
/// </summary>
public override void Dispose()
{
// It isn't safe to Complete jobs from a finalizer thread, so
if (Thread.CurrentThread == BurstCPUOps.MainThread)
CompleteAllPendingOperations();
base.Dispose();
}
internal void CompleteAllPendingOperations()
{
fence.Complete();
reuse.Complete();
m_SafeToDispose = true;
}
/// <summary>
/// Reserve (allocate) storage for `count` elements
/// </summary>
/// <param name="count">count</param>
public override void Reserve(int count)
{
if (count > maxCapacity)
{
// going to reallocate memory in base.Reserve()
// thus need to finish current work
CompleteAllPendingOperations();
}
base.Reserve(count);
}
/// <summary>
/// Upload data to internal storage
/// </summary>
/// <param name="data">data</param>
/// <param name="shape">shape</param>
/// <param name="managedBufferStartIndex">`data` start index</param>
public override void Upload(float[] data, TensorShape shape, int managedBufferStartIndex = 0)
{
CompleteAllPendingOperations();
base.Upload(data, shape, managedBufferStartIndex);
}
/// <summary>
/// Return data from internal storage
/// </summary>
/// <param name="shape">shape</param>
/// <returns>managed array</returns>
public override float[] Download(TensorShape shape)
{
// Download() as optimization gives direct access to the internal buffer
// thus need to prepare internal buffer for potential writes
CompleteAllPendingOperations();
return base.Download(shape);
}
/// <summary>
/// Return shared array from internal storage
/// </summary>
/// <returns>shared array from internal storage</returns>
public override BarracudaArray SharedAccess(out int offset)
{
// SharedAccess() by design gives direct access to the interna
// thus need to prepare internal buffer for potential writes
CompleteAllPendingOperations();
return base.SharedAccess(out offset);
}
/// <summary>
/// Schedule async internal data download
/// </summary>
/// <param name="count">count to download</param>
/// <returns>`true` if download is completed</returns>
public override bool ScheduleAsyncDownload(int count)
{
return fence.IsCompleted;
}
/// <summary>
/// Object summary as string
/// </summary>
/// <returns>object summary</returns>
public override string ToString()
{
string readyToRead = m_SafeToDispose ? "true": "unknown";
string readyForReuse = m_SafeToDispose ? "true": "unknown";
try
{
readyToRead = fence.IsCompleted.ToString();
readyForReuse = reuse.IsCompleted.ToString();
}
catch (UnityException) {}
return string.Format("(CPU burst: {0} length: {1} offset: {2} uploaded: {3} ready-to-read: {4} ready-for-reuse: {5})",
GetHashCode(), m_Array?.Length, m_Offset, m_Count, readyToRead, readyForReuse);
}
}
/// <summary>
/// Burst specific implementation of `IOps`
/// </summary>
public partial class BurstCPUOps : UnsafeArrayCPUOps
{
/// <summary>
/// Create `BurstCPUOps`
/// </summary>
/// <param name="allocator">allocator</param>
public BurstCPUOps(ITensorAllocator allocator = null)
: base(allocator)
{
if (PreferBLAS == BLAS.Native && !blas.IsNative())
PreferBLAS = BLAS.Disabled;
}
/// <summary>
/// Pin `Tensor` to Burst backend device, if `uploadCache` is false, data is not uploaded to device
/// </summary>
/// <param name="X">`Tensor`</param>
/// <param name="uploadCache">`bool`</param>
/// <returns>`BurstTensorData`</returns>
new public static BurstTensorData Pin(Tensor X, bool uploadCache = true)
{
X.FlushCache(uploadCache);
var onDevice = X.tensorOnDevice as BurstTensorData;
if (onDevice == null)
{
// try to adopt CPU arrays
var asUnsafeArray = X.tensorOnDevice as UnsafeArrayTensorData;
var asSharedArray = X.tensorOnDevice as SharedArrayTensorData;
var asArray = X.tensorOnDevice as ArrayTensorData;
if (asUnsafeArray != null) X.AttachToDevice(new BurstTensorData(asUnsafeArray));
else if (asSharedArray != null) X.AttachToDevice(new BurstTensorData(asSharedArray));
else if (asArray != null) X.AttachToDevice(new BurstTensorData(asArray));
else
{
if (uploadCache)
X.UploadToDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array and upload
else
X.AllocateOnDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array but do not upload
}
}
return X.tensorOnDevice as BurstTensorData;
}
/// <summary>
/// Prepare `Tensor` for use with Burst backend
/// </summary>
/// <param name="X">`Tensor`</param>
/// <returns>`Tensor`</returns>
public override Tensor Prepare(Tensor X)
{
Pin(X);
return X;
}
public override Tensor PrepareNoAlloc(Tensor X)
{
Pin(X, uploadCache: false);
return X;
}
}
} // namespace Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: f44c1c453c1754aaeb1e8608df82452b
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,471 +0,0 @@
using UnityEngine;
using UnityEngine.Assertions;
using System;
using System.Collections.Generic;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs;
using Unity.Mathematics;
namespace Unity.Barracuda {
//#region Job output context helper
internal static class BurstSchedulingHelper
{
#region Private scheduling helpers with pointer aliasing verification
private static unsafe JobHandle ScheduleXSBOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrX,
void* ptrS,
void* ptrB,
void* ptrO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
{
T jobDataInternalCopy = jobData;
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
jobDataInternalCopy.S = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrS};
jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
}
private static unsafe JobHandle ScheduleXBOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrX,
void* ptrB,
void* ptrO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
{
T jobDataInternalCopy = jobData;
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
}
private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrX,
void* ptrO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
{
T jobDataInternalCopy = jobData;
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
}
private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrX,
void* ptrO)
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
{
Assert.IsTrue(ptrO != ptrX);
T jobDataInternalCopy = jobData;
jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
}
private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrO)
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
{
T jobDataInternalCopy = jobData;
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
}
private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
JobHandle fenceBeforeJobStart,
void* ptrO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
{
T jobDataInternalCopy = jobData;
jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
}
#endregion
#region Private fencing helper for readability
private static JobHandle GetFenceBeforeJobStartXSBO(
IDependableMemoryResource pinX,
IDependableMemoryResource pinS,
IDependableMemoryResource pinB,
IDependableMemoryResource pinO)
{
return BurstCPUOps.Dependencies(pinX.fence, pinS.fence, pinB.fence, pinO.reuse);
}
private static JobHandle GetFenceBeforeJobStartXBO(
IDependableMemoryResource pinX,
IDependableMemoryResource pinB,
IDependableMemoryResource pinO)
{
return BurstCPUOps.Dependencies(pinX.fence, pinB.fence, pinO.reuse);
}
private static JobHandle GetFenceBeforeJobStartXO(
IDependableMemoryResource pinX,
IDependableMemoryResource pinO)
{
return BurstCPUOps.Dependencies(pinX.fence, pinO.reuse);
}
private static void SetXSBOFences(this JobHandle jobFence,
IDependableMemoryResource pinX,
IDependableMemoryResource pinS,
IDependableMemoryResource pinB,
IDependableMemoryResource pinO)
{
pinX.reuse = jobFence;
pinS.reuse = jobFence;
pinB.reuse = jobFence;
pinO.fence = jobFence;
}
private static void SetXBOFences(this JobHandle jobFence,
IDependableMemoryResource pinX,
IDependableMemoryResource pinB,
IDependableMemoryResource pinO)
{
pinX.reuse = jobFence;
pinB.reuse = jobFence;
pinO.fence = jobFence;
}
private static void SetXOFences(this JobHandle jobFence,
IDependableMemoryResource pinX,
IDependableMemoryResource pinO)
{
pinX.reuse = jobFence;
pinO.fence = jobFence;
}
#endregion
#region Immediate scheduling helper
internal enum FencingHelperMode
{
UpdateResourcesFencesOnScheduling,
CustomResourcesFencesHandling,
}
internal static unsafe JobHandle ScheduleXSBO<T>(this T jobData,
IDependableMemoryResource rX,
IDependableMemoryResource rS,
IDependableMemoryResource rB,
IDependableMemoryResource rO,
int arrayLength, int innerloopBatchCount,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
{
var fenceBeforeJobStart = GetFenceBeforeJobStartXSBO(rX, rS, rB, rO);
JobHandle jobFence;
{
jobFence = ScheduleXSBOInternal(jobData, fenceBeforeJobStart, rX.rawPtr, rS.rawPtr, rB.rawPtr, rO.rawPtr, arrayLength, innerloopBatchCount);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
jobFence.SetXSBOFences(rX, rS, rB, rO);
}
return jobFence;
}
internal static unsafe JobHandle ScheduleXBO<T>(this T jobData,
IDependableMemoryResource X,
IDependableMemoryResource B,
IDependableMemoryResource O,
int arrayLength, int innerloopBatchCount,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
{
var fenceBeforeJobStart = GetFenceBeforeJobStartXBO(X, B, O);
JobHandle jobFence;
{
jobFence = ScheduleXBOInternal(jobData, fenceBeforeJobStart, X.rawPtr, B.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
jobFence.SetXBOFences(X, B, O);
}
return jobFence;
}
internal static unsafe JobHandle ScheduleO<T>(this T jobData,
IDependableMemoryResource O,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
{
var fenceBeforeJobStart = O.reuse;
JobHandle jobFence;
{
jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, O.rawPtr);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
O.fence = jobFence;
}
return jobFence;
}
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
IDependableMemoryResource X,
IDependableMemoryResource O,
int arrayLength, int innerloopBatchCount,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
{
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
JobHandle jobFence;
{
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
jobFence.SetXOFences(X, O);
}
return jobFence;
}
internal static unsafe JobHandle ScheduleO<T>(this T jobData,
BurstTensorData pinO,
int offsetO,
int arrayLength, int innerloopBatchCount,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
{
var fenceBeforeJobStart = pinO.reuse;
JobHandle jobFence;
{
void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, ptrO, arrayLength, innerloopBatchCount);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
pinO.fence = jobFence;
}
return jobFence;
}
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
BurstTensorData pinX,
int offsetX,
BurstTensorData pinO,
int offsetO,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
{
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(pinX, pinO);
JobHandle jobFence;
{
void* ptrX = pinX.array.RawAddressAt(pinX.offset+offsetX);
void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, ptrX, ptrO);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
jobFence.SetXOFences(pinX, pinO);
}
return jobFence;
}
internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
IDependableMemoryResource X,
IDependableMemoryResource O,
FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
{
var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
JobHandle jobFence;
{
jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr);
}
if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
jobFence.SetXOFences(X, O);
}
return jobFence;
}
#endregion
}
#region Schedulling helper for parrallel jobs
internal struct ParallelJobsContext : IDisposable
{
internal static Dictionary<IDependableMemoryResource, JobHandle> s_ReadDependencyTracker =
new Dictionary<IDependableMemoryResource, JobHandle>(100);
private readonly IDependableMemoryResource outputResource;
private JobHandle combinedJobFence;
public ParallelJobsContext(IDependableMemoryResource output)
{
outputResource = output;
combinedJobFence = new JobHandle();
Assert.AreEqual(0, s_ReadDependencyTracker.Count,
"s_ReadDependencyTracker should be empty meaning ParrallelJobs was not disposed properly.");
}
//For now only CopyStrideJobHelper and tests need ParallelJobsContext. If this code need to be duplicated for more case in the future:
//- Maybe add generic version by having CopyStrideJobHelper and other helper struct implement an interface (but beware of GC).
//- Or make ParallelJobsContext partial and code generated by jobs template.
public JobHandle ScheduleXO(
BurstCPUOps.CopyStrideJobHelper jobData,//See comment above.
BurstTensorData pinX, int offsetX,
BurstTensorData pinO, int offsetO)
{
Assert.IsTrue(pinO == outputResource);
var jobFence = jobData.ScheduleXO(pinX, offsetX, pinO, offsetO, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
TrackJobReadDependencies(pinX, jobFence);
AddJobDependencyToOutputFence(jobFence);
return jobFence;
}
public JobHandle ScheduleXO<T>(
T jobData,
BurstTensorData pinX,
BurstTensorData pinO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
{
Assert.IsTrue(pinO == outputResource);
var jobFence = jobData.ScheduleXO(pinX, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
TrackJobReadDependencies(pinX, jobFence);
AddJobDependencyToOutputFence(jobFence);
return jobFence;
}
public JobHandle ScheduleXBO<T>(
T jobData,
BurstTensorData pinX,
BurstTensorData pinB,
BurstTensorData pinO,
int arrayLength, int innerloopBatchCount)
where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
{
Assert.IsTrue(pinO == outputResource);
var jobFence = jobData.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
TrackJobReadDependencies(pinX, jobFence);
TrackJobReadDependencies(pinB, jobFence);
AddJobDependencyToOutputFence(jobFence);
return jobFence;
}
internal void AddJobDependencyToOutputFence(JobHandle jobFence)
{
//Once all jobs writing to O will be done, further jobs will be able to read from O.
//We combine job fences from all job writing to O here and assign to O.fence in Dispose().
combinedJobFence = JobHandle.CombineDependencies(combinedJobFence, jobFence);
}
internal void TrackJobReadDependencies(IDependableMemoryResource T, JobHandle jobFence)
{
//Once all jobs reading from T will be done, further jobs will be able to write to T.
//We combine job fences from all jobs reading from T here and assign to T.reuse in Dispose().
if (T != null)
{
if (s_ReadDependencyTracker.ContainsKey(T))
s_ReadDependencyTracker[T] = JobHandle.CombineDependencies(s_ReadDependencyTracker[T], jobFence);
else
s_ReadDependencyTracker[T] = jobFence;
}
}
public void Dispose()
{
foreach (var key in s_ReadDependencyTracker.Keys)
{
key.reuse = s_ReadDependencyTracker[key];
}
outputResource.fence = combinedJobFence;
s_ReadDependencyTracker.Clear();
}
}
#endregion
#region Memory allocation wrapper usable by job fencing helpers
internal unsafe class FencedMemoryAlloc : IDependableMemoryResource
{
private JobHandle m_ReadFence;
private JobHandle m_WriteFence;
private void* data;
public void* rawPtr => data;
public half* halfdata { get { Assert.AreEqual(DataType.Half, type); return (half*) data; } }
public float* floatdata { get { Assert.AreEqual(DataType.Float, type);return (float*) data; } }
public DataType type;
public int elementCount;
public int elementSize;
/// <inheritdoc/>
public JobHandle fence { get { return m_ReadFence; } set { m_ReadFence = value; m_WriteFence = value; } }
/// <inheritdoc/>
public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = value; } }
public void Allocate(int numElement, DataType dataType, int alignment, Allocator allocator)
{
m_ReadFence = new JobHandle();
m_WriteFence = new JobHandle();
elementCount = numElement;
elementSize = BarracudaArray.DataItemSize(dataType);
type = dataType;
Assert.IsTrue(data == null, "Please call ClearState() when freeing underlying memory.");
Assert.IsTrue(alignment % elementSize == 0);
data = UnsafeUtility.Malloc(elementCount * elementSize, alignment, allocator);
Assert.IsTrue(data != null);
}
public void ClearState()
{
m_ReadFence = new JobHandle();
m_WriteFence = new JobHandle();
elementCount = 0;
elementSize = 0;
type = DataType.Float;
data = null;
}
public FencedMemoryAlloc()
{
ClearState();
}
}
#endregion
} // namespace Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 5071bbeadb81d034f827f20e95c52ee6
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 5211ff135b3b87f42be25a8505a28df7
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: d05274a6ecc82404abe715a573ea8e74
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,864 +0,0 @@
// This is auto-generated -- do not modify directly
using UnityEngine;
using System;
using Unity.Burst;
using Unity.Burst.Intrinsics;
using Unity.Collections;
using Unity.Jobs;
using Unity.Mathematics;
using static Unity.Burst.Intrinsics.X86.Avx;
using static Unity.Burst.Intrinsics.X86.Fma;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs.LowLevel.Unsafe;
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
namespace Unity.Barracuda {
public partial class BurstCPUOps
{
#region Dense/Conv jobs declaration for mode: _Full_Float
internal partial struct DepthwiseConv2DJobHelper
{
public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinS = Pin(S);
var pinB = Pin(B);
var pinO = Pin(O, uploadCache: false);
return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool WHalf = pinS.array.Type == DataType.Half;
bool BHalf = pinB.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
if (AHalf && WHalf)
{
var job = new DepthwiseConv2DJob_Full_Half();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && WHalf)
{
var job = new DepthwiseConv2DJob_ActAsFloat_WeightAsHalf();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && !WHalf)
{
var job = new DepthwiseConv2DJob_Full_Float();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else //if (AHalf && !WHalf)
{
UnityEngine.Assertions.Assert.IsTrue(false, "DepthwiseConv2DJob does not support activation as half while weights are floats.");
return new JobHandle();
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct DepthwiseConv2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public DepthwiseConv2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.kernelCount * sizeof(float);
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
// reset accumulators to 0
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
// gather X * K results in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
float* dst = outputAccumulators;
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
float* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
int k = 0;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
*dst += (float)((*src) * (*kernel));
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
*dst += (float)((*src) * (*kernel));
}
}
{ // write accumulators to memory and add bias
int k = 0;
float* src = outputAccumulators;
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
float* bias = Bptr;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
*dst = (float)((*src) + (*bias));
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
*dst = (float)((*src) + (*bias));
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
internal partial struct Dense3JobHelper
{
public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinS = Pin(S);
var pinB = Pin(B);
var pinO = Pin(O, uploadCache: false);
return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool WHalf = pinS.array.Type == DataType.Half;
bool BHalf = pinB.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
if (AHalf && WHalf)
{
var job = new Dense3Job_Full_Half();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && WHalf)
{
var job = new Dense3Job_ActAsFloat_WeightAsHalf();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && !WHalf)
{
var job = new Dense3Job_Full_Float();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else //if (AHalf && !WHalf)
{
UnityEngine.Assertions.Assert.IsTrue(false, "Dense3Job does not support activation as half while weights are floats.");
return new JobHandle();
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct Dense3Job_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public Dense3JobHelper data;
public const int blockSize = 16;
public void Execute(int threadID)
{
float* A = this.Xptr;
float* B = this.Sptr;
float* C = this.Bptr;
float* S = this.Optr;
int AM = data.AM;
int BM = data.BM;
int SM = data.SM;
int AN = data.AN;
int BN = data.BN;
int SN = data.SN;
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
int batch = (threadID / dispatchThreadXY);
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
int batchOffSetA = (batch * AM * AN);
int batchOffSetS = (batch * SM * SN);
int rowA = i * blockSize;
int colB = j * blockSize;
unsafe
{
float* blockTempA = null;
float* blockTempB = null;
float* blockTempS = null;
float* blockS = S + rowA + SM * colB + batchOffSetS;
int strideS = SM;
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
{
blockTempS = AllocBlock(blockSize, blockSize);
strideS = blockSize;
blockS = blockTempS;
}
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
for (int l = 0; l < AN; l += blockSize) // inner-loop
{
float* blockA = A + rowA + AM * l + batchOffSetA;
float* blockB = B + l * BN + colB;
int strideA = AM;
int strideB = BN;
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
{
if (blockTempA == null)
blockTempA = AllocBlock(blockSize, blockSize);
strideA = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
blockA = blockTempA;
}
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
{
if (blockTempB == null)
blockTempB = AllocBlock(blockSize, blockSize);
strideB = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempB[x + blockSize * y] = (float)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
blockB = blockTempB;
}
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
}
if (blockS == blockTempS) // copy back
{
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
{
if (((rowA + x) < SM) && ((colB + y) < SN))
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
}
}
FreeBlock(blockTempA);
FreeBlock(blockTempB);
FreeBlock(blockTempS);
}
}
static void MultiplyBlockUnrollHx16(float* Ap, int Astride, float* Bp, int Bstride, float* Sp, int Sstride)
{
for (int i = 0; i < blockSize; i++)
{
float sum0 = *(Sp + i + Sstride * 0);
float sum1 = *(Sp + i + Sstride * 1);
float sum2 = *(Sp + i + Sstride * 2);
float sum3 = *(Sp + i + Sstride * 3);
float sum4 = *(Sp + i + Sstride * 4);
float sum5 = *(Sp + i + Sstride * 5);
float sum6 = *(Sp + i + Sstride * 6);
float sum7 = *(Sp + i + Sstride * 7);
float sum8 = *(Sp + i + Sstride * 8);
float sum9 = *(Sp + i + Sstride * 9);
float sumA = *(Sp + i + Sstride * 10);
float sumB = *(Sp + i + Sstride * 11);
float sumC = *(Sp + i + Sstride * 12);
float sumD = *(Sp + i + Sstride * 13);
float sumE = *(Sp + i + Sstride * 14);
float sumF = *(Sp + i + Sstride * 15);
for (int l = 0; l < blockSize; l++)
{
float A = *(Ap + i + Astride * l);
float B0 = *(Bp + l * Bstride + 0);
float B1 = *(Bp + l * Bstride + 1);
float B2 = *(Bp + l * Bstride + 2);
float B3 = *(Bp + l * Bstride + 3);
float B4 = *(Bp + l * Bstride + 4);
float B5 = *(Bp + l * Bstride + 5);
float B6 = *(Bp + l * Bstride + 6);
float B7 = *(Bp + l * Bstride + 7);
float B8 = *(Bp + l * Bstride + 8);
float B9 = *(Bp + l * Bstride + 9);
float BA = *(Bp + l * Bstride + 10);
float BB = *(Bp + l * Bstride + 11);
float BC = *(Bp + l * Bstride + 12);
float BD = *(Bp + l * Bstride + 13);
float BE = *(Bp + l * Bstride + 14);
float BF = *(Bp + l * Bstride + 15);
sum0 += A * B0;
sum1 += A * B1;
sum2 += A * B2;
sum3 += A * B3;
sum4 += A * B4;
sum5 += A * B5;
sum6 += A * B6;
sum7 += A * B7;
sum8 += A * B8;
sum9 += A * B9;
sumA += A * BA;
sumB += A * BB;
sumC += A * BC;
sumD += A * BD;
sumE += A * BE;
sumF += A * BF;
}
*(Sp + i + Sstride * 0 ) = (float)(sum0);
*(Sp + i + Sstride * 1 ) = (float)(sum1);
*(Sp + i + Sstride * 2 ) = (float)(sum2);
*(Sp + i + Sstride * 3 ) = (float)(sum3);
*(Sp + i + Sstride * 4 ) = (float)(sum4);
*(Sp + i + Sstride * 5 ) = (float)(sum5);
*(Sp + i + Sstride * 6 ) = (float)(sum6);
*(Sp + i + Sstride * 7 ) = (float)(sum7);
*(Sp + i + Sstride * 8 ) = (float)(sum8);
*(Sp + i + Sstride * 9 ) = (float)(sum9);
*(Sp + i + Sstride * 10) = (float)(sumA);
*(Sp + i + Sstride * 11) = (float)(sumB);
*(Sp + i + Sstride * 12) = (float)(sumC);
*(Sp + i + Sstride * 13) = (float)(sumD);
*(Sp + i + Sstride * 14) = (float)(sumE);
*(Sp + i + Sstride * 15) = (float)(sumF);
}
}
}
#endregion
#region Dense/Conv jobs declaration for mode: _ActAsFloat_WeightAsHalf
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct DepthwiseConv2DJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public DepthwiseConv2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.kernelCount * sizeof(float);
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
// reset accumulators to 0
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
// gather X * K results in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
float* dst = outputAccumulators;
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
int k = 0;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
*dst += (float)((*src) * (*kernel));
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
*dst += (float)((*src) * (*kernel));
}
}
{ // write accumulators to memory and add bias
int k = 0;
float* src = outputAccumulators;
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
half* bias = Bptr;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
*dst = (float)((*src) + (*bias));
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
*dst = (float)((*src) + (*bias));
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct Dense3Job_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public Dense3JobHelper data;
public const int blockSize = 16;
public void Execute(int threadID)
{
float* A = this.Xptr;
half* B = this.Sptr;
half* C = this.Bptr;
float* S = this.Optr;
int AM = data.AM;
int BM = data.BM;
int SM = data.SM;
int AN = data.AN;
int BN = data.BN;
int SN = data.SN;
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
int batch = (threadID / dispatchThreadXY);
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
int batchOffSetA = (batch * AM * AN);
int batchOffSetS = (batch * SM * SN);
int rowA = i * blockSize;
int colB = j * blockSize;
unsafe
{
float* blockTempA = null;
half* blockTempB = null;
float* blockTempS = null;
float* blockS = S + rowA + SM * colB + batchOffSetS;
int strideS = SM;
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
{
blockTempS = AllocBlock(blockSize, blockSize);
strideS = blockSize;
blockS = blockTempS;
}
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
for (int l = 0; l < AN; l += blockSize) // inner-loop
{
float* blockA = A + rowA + AM * l + batchOffSetA;
half* blockB = B + l * BN + colB;
int strideA = AM;
int strideB = BN;
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
{
if (blockTempA == null)
blockTempA = AllocBlock(blockSize, blockSize);
strideA = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
blockA = blockTempA;
}
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
{
if (blockTempB == null)
blockTempB = AllocBlockHalf(blockSize, blockSize);
strideB = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
blockB = blockTempB;
}
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
}
if (blockS == blockTempS) // copy back
{
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
{
if (((rowA + x) < SM) && ((colB + y) < SN))
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
}
}
FreeBlock(blockTempA);
FreeBlock(blockTempB);
FreeBlock(blockTempS);
}
}
static void MultiplyBlockUnrollHx16(float* Ap, int Astride, half* Bp, int Bstride, float* Sp, int Sstride)
{
for (int i = 0; i < blockSize; i++)
{
float sum0 = *(Sp + i + Sstride * 0);
float sum1 = *(Sp + i + Sstride * 1);
float sum2 = *(Sp + i + Sstride * 2);
float sum3 = *(Sp + i + Sstride * 3);
float sum4 = *(Sp + i + Sstride * 4);
float sum5 = *(Sp + i + Sstride * 5);
float sum6 = *(Sp + i + Sstride * 6);
float sum7 = *(Sp + i + Sstride * 7);
float sum8 = *(Sp + i + Sstride * 8);
float sum9 = *(Sp + i + Sstride * 9);
float sumA = *(Sp + i + Sstride * 10);
float sumB = *(Sp + i + Sstride * 11);
float sumC = *(Sp + i + Sstride * 12);
float sumD = *(Sp + i + Sstride * 13);
float sumE = *(Sp + i + Sstride * 14);
float sumF = *(Sp + i + Sstride * 15);
for (int l = 0; l < blockSize; l++)
{
float A = *(Ap + i + Astride * l);
float B0 = *(Bp + l * Bstride + 0);
float B1 = *(Bp + l * Bstride + 1);
float B2 = *(Bp + l * Bstride + 2);
float B3 = *(Bp + l * Bstride + 3);
float B4 = *(Bp + l * Bstride + 4);
float B5 = *(Bp + l * Bstride + 5);
float B6 = *(Bp + l * Bstride + 6);
float B7 = *(Bp + l * Bstride + 7);
float B8 = *(Bp + l * Bstride + 8);
float B9 = *(Bp + l * Bstride + 9);
float BA = *(Bp + l * Bstride + 10);
float BB = *(Bp + l * Bstride + 11);
float BC = *(Bp + l * Bstride + 12);
float BD = *(Bp + l * Bstride + 13);
float BE = *(Bp + l * Bstride + 14);
float BF = *(Bp + l * Bstride + 15);
sum0 += A * B0;
sum1 += A * B1;
sum2 += A * B2;
sum3 += A * B3;
sum4 += A * B4;
sum5 += A * B5;
sum6 += A * B6;
sum7 += A * B7;
sum8 += A * B8;
sum9 += A * B9;
sumA += A * BA;
sumB += A * BB;
sumC += A * BC;
sumD += A * BD;
sumE += A * BE;
sumF += A * BF;
}
*(Sp + i + Sstride * 0 ) = (float)(sum0);
*(Sp + i + Sstride * 1 ) = (float)(sum1);
*(Sp + i + Sstride * 2 ) = (float)(sum2);
*(Sp + i + Sstride * 3 ) = (float)(sum3);
*(Sp + i + Sstride * 4 ) = (float)(sum4);
*(Sp + i + Sstride * 5 ) = (float)(sum5);
*(Sp + i + Sstride * 6 ) = (float)(sum6);
*(Sp + i + Sstride * 7 ) = (float)(sum7);
*(Sp + i + Sstride * 8 ) = (float)(sum8);
*(Sp + i + Sstride * 9 ) = (float)(sum9);
*(Sp + i + Sstride * 10) = (float)(sumA);
*(Sp + i + Sstride * 11) = (float)(sumB);
*(Sp + i + Sstride * 12) = (float)(sumC);
*(Sp + i + Sstride * 13) = (float)(sumD);
*(Sp + i + Sstride * 14) = (float)(sumE);
*(Sp + i + Sstride * 15) = (float)(sumF);
}
}
}
#endregion
#region Dense/Conv jobs declaration for mode: _Full_Half
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct DepthwiseConv2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public DepthwiseConv2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.kernelCount * sizeof(half);
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
// reset accumulators to 0
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
// gather X * K results in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
half* dst = outputAccumulators;
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
int k = 0;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
*dst += (half)((*src) * (*kernel));
for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
*dst += (half)((*src) * (*kernel));
}
}
{ // write accumulators to memory and add bias
int k = 0;
half* src = outputAccumulators;
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
half* bias = Bptr;
for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
*dst = (half)((*src) + (*bias));
for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
*dst = (half)((*src) + (*bias));
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct Dense3Job_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public Dense3JobHelper data;
public const int blockSize = 16;
public void Execute(int threadID)
{
half* A = this.Xptr;
half* B = this.Sptr;
half* C = this.Bptr;
half* S = this.Optr;
int AM = data.AM;
int BM = data.BM;
int SM = data.SM;
int AN = data.AN;
int BN = data.BN;
int SN = data.SN;
int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
int batch = (threadID / dispatchThreadXY);
int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
int batchOffSetA = (batch * AM * AN);
int batchOffSetS = (batch * SM * SN);
int rowA = i * blockSize;
int colB = j * blockSize;
unsafe
{
half* blockTempA = null;
half* blockTempB = null;
half* blockTempS = null;
half* blockS = S + rowA + SM * colB + batchOffSetS;
int strideS = SM;
if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
{
blockTempS = AllocBlockHalf(blockSize, blockSize);
strideS = blockSize;
blockS = blockTempS;
}
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockS[x + strideS * y] = (half)((colB + y) < BN ? C[colB + y] : 0.0f);
for (int l = 0; l < AN; l += blockSize) // inner-loop
{
half* blockA = A + rowA + AM * l + batchOffSetA;
half* blockB = B + l * BN + colB;
int strideA = AM;
int strideB = BN;
if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
{
if (blockTempA == null)
blockTempA = AllocBlockHalf(blockSize, blockSize);
strideA = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempA[x + blockSize * y] = (half)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
blockA = blockTempA;
}
if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
{
if (blockTempB == null)
blockTempB = AllocBlockHalf(blockSize, blockSize);
strideB = blockSize;
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
blockB = blockTempB;
}
MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
}
if (blockS == blockTempS) // copy back
{
for (int y = 0; y < blockSize; y++)
for (int x = 0; x < blockSize; x++)
{
if (((rowA + x) < SM) && ((colB + y) < SN))
S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
}
}
FreeBlock(blockTempA);
FreeBlock(blockTempB);
FreeBlock(blockTempS);
}
}
static void MultiplyBlockUnrollHx16(half* Ap, int Astride, half* Bp, int Bstride, half* Sp, int Sstride)
{
for (int i = 0; i < blockSize; i++)
{
float sum0 = *(Sp + i + Sstride * 0);
float sum1 = *(Sp + i + Sstride * 1);
float sum2 = *(Sp + i + Sstride * 2);
float sum3 = *(Sp + i + Sstride * 3);
float sum4 = *(Sp + i + Sstride * 4);
float sum5 = *(Sp + i + Sstride * 5);
float sum6 = *(Sp + i + Sstride * 6);
float sum7 = *(Sp + i + Sstride * 7);
float sum8 = *(Sp + i + Sstride * 8);
float sum9 = *(Sp + i + Sstride * 9);
float sumA = *(Sp + i + Sstride * 10);
float sumB = *(Sp + i + Sstride * 11);
float sumC = *(Sp + i + Sstride * 12);
float sumD = *(Sp + i + Sstride * 13);
float sumE = *(Sp + i + Sstride * 14);
float sumF = *(Sp + i + Sstride * 15);
for (int l = 0; l < blockSize; l++)
{
float A = *(Ap + i + Astride * l);
float B0 = *(Bp + l * Bstride + 0);
float B1 = *(Bp + l * Bstride + 1);
float B2 = *(Bp + l * Bstride + 2);
float B3 = *(Bp + l * Bstride + 3);
float B4 = *(Bp + l * Bstride + 4);
float B5 = *(Bp + l * Bstride + 5);
float B6 = *(Bp + l * Bstride + 6);
float B7 = *(Bp + l * Bstride + 7);
float B8 = *(Bp + l * Bstride + 8);
float B9 = *(Bp + l * Bstride + 9);
float BA = *(Bp + l * Bstride + 10);
float BB = *(Bp + l * Bstride + 11);
float BC = *(Bp + l * Bstride + 12);
float BD = *(Bp + l * Bstride + 13);
float BE = *(Bp + l * Bstride + 14);
float BF = *(Bp + l * Bstride + 15);
sum0 += A * B0;
sum1 += A * B1;
sum2 += A * B2;
sum3 += A * B3;
sum4 += A * B4;
sum5 += A * B5;
sum6 += A * B6;
sum7 += A * B7;
sum8 += A * B8;
sum9 += A * B9;
sumA += A * BA;
sumB += A * BB;
sumC += A * BC;
sumD += A * BD;
sumE += A * BE;
sumF += A * BF;
}
*(Sp + i + Sstride * 0 ) = (half)(sum0);
*(Sp + i + Sstride * 1 ) = (half)(sum1);
*(Sp + i + Sstride * 2 ) = (half)(sum2);
*(Sp + i + Sstride * 3 ) = (half)(sum3);
*(Sp + i + Sstride * 4 ) = (half)(sum4);
*(Sp + i + Sstride * 5 ) = (half)(sum5);
*(Sp + i + Sstride * 6 ) = (half)(sum6);
*(Sp + i + Sstride * 7 ) = (half)(sum7);
*(Sp + i + Sstride * 8 ) = (half)(sum8);
*(Sp + i + Sstride * 9 ) = (half)(sum9);
*(Sp + i + Sstride * 10) = (half)(sumA);
*(Sp + i + Sstride * 11) = (half)(sumB);
*(Sp + i + Sstride * 12) = (half)(sumC);
*(Sp + i + Sstride * 13) = (half)(sumD);
*(Sp + i + Sstride * 14) = (half)(sumE);
*(Sp + i + Sstride * 15) = (half)(sumF);
}
}
}
#endregion
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 417ca864422a2384ab3013114bf9f845
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 30d1de61c64693a4895a66fecf45a004
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,890 +0,0 @@
// This is auto-generated -- do not modify directly
using UnityEngine;
using System;
using Unity.Burst;
using Unity.Burst.Intrinsics;
using Unity.Collections;
using Unity.Jobs;
using Unity.Mathematics;
using static Unity.Burst.Intrinsics.X86.Avx;
using static Unity.Burst.Intrinsics.X86.Fma;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs.LowLevel.Unsafe;
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
namespace Unity.Barracuda {
public partial class BurstCPUOps
{
#region Reduce jobs declaration for mode: _Full_Float
internal partial struct ReduceMaxJobHelper
{
public JobHandle ScheduleXO(BurstTensorData pinX, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new ReduceMaxJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new ReduceMaxJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
internal partial struct ReduceMaxJobHelper
{
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinO = Pin(O, uploadCache: false);
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new ReduceMaxJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new ReduceMaxJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public ReduceMaxJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float maxV = float.MinValue;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
maxV = math.max(maxV, v);
}
Optr[y * data.offsetReduce + x] = (float)maxV;
}
}
internal partial struct ReduceSumJobHelper
{
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinO = Pin(O, uploadCache: false);
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new ReduceSumJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new ReduceSumJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceSumJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public ReduceSumJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float sumV = 0;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
sumV += v;
}
Optr[y * data.offsetReduce + x] = (float)(sumV);
}
}
internal partial struct ReduceMeanJobHelper
{
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinO = Pin(O, uploadCache: false);
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new ReduceMeanJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new ReduceMeanJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceMeanJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public ReduceMeanJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float sumV = 0;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
sumV += v;
}
Optr[y * data.offsetReduce + x] = (float)(sumV / (float)data.reduceDim);
}
}
internal partial struct ExpBiasReduceJobHelper
{
public JobHandle ScheduleXBO(BurstTensorData pinX, FencedMemoryAlloc pinB, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool WHalf = pinB.type == DataType.Half;
bool OHalf = pinO.type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf && WHalf)
{
var job = new ExpBiasReduceJob_Full_Half();
job.data = this;
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && WHalf)
{
var job = new ExpBiasReduceJob_ActAsFloat_WeightAsHalf();
job.data = this;
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && !WHalf)
{
var job = new ExpBiasReduceJob_Full_Float();
job.data = this;
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else //if (AHalf && !WHalf)
{
UnityEngine.Assertions.Assert.IsTrue(false, "ExpBiasReduceJob does not support activation as half while weights are floats.");
return new JobHandle();
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ExpBiasReduceJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public ExpBiasReduceJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float accum = 0.0f;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
float b = Bptr[y * data.offsetReduce + x];
accum += math.exp(v - b);
}
Optr[y * data.offsetReduce + x] = (float)accum;
}
}
internal partial struct SoftmaxEndJobHelper
{
public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool WHalf = pinS.type == DataType.Half;
bool BHalf = pinB.type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
if (AHalf && WHalf)
{
var job = new SoftmaxEndJob_Full_Half();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && WHalf)
{
var job = new SoftmaxEndJob_ActAsFloat_WeightAsHalf();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && !WHalf)
{
var job = new SoftmaxEndJob_Full_Float();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else //if (AHalf && !WHalf)
{
UnityEngine.Assertions.Assert.IsTrue(false, "SoftmaxEndJob does not support activation as half while weights are floats.");
return new JobHandle();
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct SoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public SoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
}
}
internal partial struct LogSoftmaxEndJobHelper
{
public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool WHalf = pinS.type == DataType.Half;
bool BHalf = pinB.type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
if (AHalf && WHalf)
{
var job = new LogSoftmaxEndJob_Full_Half();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && WHalf)
{
var job = new LogSoftmaxEndJob_ActAsFloat_WeightAsHalf();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else if (!AHalf && !WHalf)
{
var job = new LogSoftmaxEndJob_Full_Float();
job.data = this;
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
}
else //if (AHalf && !WHalf)
{
UnityEngine.Assertions.Assert.IsTrue(false, "LogSoftmaxEndJob does not support activation as half while weights are floats.");
return new JobHandle();
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct LogSoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public LogSoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
}
}
internal partial struct MaxPool2DJobHelper
{
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinO = Pin(O, uploadCache: false);
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new MaxPool2DJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new MaxPool2DJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct MaxPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public MaxPool2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.inChannels * sizeof(float);
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
bool firstNotRejectedPixelInKernel = true;
// gather max results in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
float* dst = outputAccumulators;
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
int k = 0;
if (firstNotRejectedPixelInKernel) // first pass, write-through
{
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = *src;
}
else
{
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = (*dst) > (*src) ? (*dst) : (*src);
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = (*dst) > (*src) ? (*dst) : (*src);
}
firstNotRejectedPixelInKernel = false;
}
}
// safety net, if kernel was completely outside of X
// fill with padding_value (0) to avoid uninitialized memory
if (firstNotRejectedPixelInKernel)
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
{ // write accumulators to memory
int k = 0;
float* src = outputAccumulators;
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = *src;
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
internal partial struct AvgPool2DJobHelper
{
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
var pinX = Pin(X);
var pinO = Pin(O, uploadCache: false);
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
{
bool AHalf = pinX.array.Type == DataType.Half;
bool OHalf = pinO.array.Type == DataType.Half;
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
if (AHalf)
{
var job = new AvgPool2DJob_Full_Half();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
else
{
var job = new AvgPool2DJob_Full_Float();
job.data = this;
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
}
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct AvgPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public AvgPool2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.inChannels * sizeof(float);
float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
// reset accumulators & counter
int counter = 0;
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
// gather sums in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
float* dst = outputAccumulators;
float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
int k = 0;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst += *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst += *src;
counter++;
}
}
// safety net, if kernel was completely outside of X
counter = math.max(1, counter);
{ // write accumulators to memory
int k = 0;
float invCounter = 1f / counter;
float* src = outputAccumulators;
float* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = (float)(*src * invCounter);
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = (float)(*src * invCounter);
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
#endregion
#region Reduce jobs declaration for mode: _ActAsFloat_WeightAsHalf
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ExpBiasReduceJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public ExpBiasReduceJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float accum = 0.0f;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
float b = Bptr[y * data.offsetReduce + x];
accum += math.exp(v - b);
}
Optr[y * data.offsetReduce + x] = (float)accum;
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct SoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public SoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct LogSoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
public LogSoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
}
}
#endregion
#region Reduce jobs declaration for mode: _Full_Half
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public ReduceMaxJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float maxV = float.MinValue;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
maxV = math.max(maxV, v);
}
Optr[y * data.offsetReduce + x] = (half)maxV;
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceSumJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public ReduceSumJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float sumV = 0;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
sumV += v;
}
Optr[y * data.offsetReduce + x] = (half)(sumV);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ReduceMeanJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public ReduceMeanJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float sumV = 0;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
sumV += v;
}
Optr[y * data.offsetReduce + x] = (half)(sumV / (float)data.reduceDim);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct ExpBiasReduceJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public ExpBiasReduceJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = i / data.offsetReduce;
float accum = 0.0f;
for (int z = 0; z < data.reduceDim; ++z)
{
float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
float b = Bptr[y * data.offsetReduce + x];
accum += math.exp(v - b);
}
Optr[y * data.offsetReduce + x] = (half)accum;
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct SoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public SoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (half)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
unsafe struct LogSoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public LogSoftmaxEndJobHelper data;
public void Execute(int i)
{
int x = i % data.offsetReduce;
int y = ((i / data.offsetReduce) % data.reduceDim);
int z = ((i / data.offsetReduce) / data.reduceDim);
Optr[i] = (half)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct MaxPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public MaxPool2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.inChannels * sizeof(half);
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
bool firstNotRejectedPixelInKernel = true;
// gather max results in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
half* dst = outputAccumulators;
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
int k = 0;
if (firstNotRejectedPixelInKernel) // first pass, write-through
{
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = *src;
}
else
{
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = (*dst) > (*src) ? (*dst) : (*src);
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = (*dst) > (*src) ? (*dst) : (*src);
}
firstNotRejectedPixelInKernel = false;
}
}
// safety net, if kernel was completely outside of X
// fill with padding_value (0) to avoid uninitialized memory
if (firstNotRejectedPixelInKernel)
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
{ // write accumulators to memory
int k = 0;
half* src = outputAccumulators;
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = *src;
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
unsafe struct AvgPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
{
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
public AvgPool2DJobHelper data;
const int unrollSize = 16;
public void Execute(int y)
{
int accumulatorMemSize = data.inChannels * sizeof(half);
half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
for (int n = 0; n < data.outBatch; ++n)
for (int x = 0; x < data.outWidth; ++x)
{
// reset accumulators & counter
int counter = 0;
UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
// gather sums in accumulators
for (int dy = 0; dy < data.kernelHeight; ++dy)
{
int readY = y * data.strideY + dy - data.padY;
if (readY < 0) continue;
if (readY >= data.inHeight) continue;
for (int dx = 0; dx < data.kernelWidth; ++dx)
{
int readX = x * data.strideX + dx - data.padY;
if (readX < 0) continue;
if (readX >= data.inWidth) continue;
half* dst = outputAccumulators;
half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
int k = 0;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst += *src;
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst += *src;
counter++;
}
}
// safety net, if kernel was completely outside of X
counter = math.max(1, counter);
{ // write accumulators to memory
int k = 0;
float invCounter = 1f / counter;
half* src = outputAccumulators;
half* dst = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
for (int q = 0; q < unrollSize; q++, src++, dst++)
*dst = (half)(*src * invCounter);
for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
*dst = (half)(*src * invCounter);
}
}
UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
}
}
#endregion
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: f555ca3db5aa9674f9cdba4d5b715e79
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 1f9c24a13966b425fa5bfd1a4007c3f4
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: dd2cfd0651655b44ca226eb4f0b952aa
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 6bc05bfa1b9544e8a813df0c3eaab6b0
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: badd0d6a0383049eab2cb58e1d0d6fa9
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,143 +0,0 @@
using System.Diagnostics;
using UnityEngine;
using System.Runtime.InteropServices;
namespace Unity.Barracuda {
internal class ComputeDebugUtils
{
/// <summary>
/// DEBUG ONLY: `debugKernels` allow to track out of bound read/write and assertion in kernels.
/// When set to true be sure to define KERNEL_ASSERTS or FORCE_DEBUG in the particular kernel(s)
/// you want to debug (see in DebugUtils.cginc).
/// Production code should not set this to 'true' as this will significantly degrade performances.
/// </summary>
public static bool debugKernels = false;
/// <summary>
/// DEBUG ONLY: if ComputeDebugUtils.debugKernels is true and debugger is attached, debugger will break when a kernel assertion is catch.
/// </summary>
public static bool breakOnAssertion = false;
//Keep in sync with DebugUtils.cginc KERNEL_ASSERT_CONTEXT defines
private enum KernelAssertContext
{
ReadOnlyTensor_Read = 0,
ReadWriteTensor_Read = 1,
ReadWriteTensor_Write = 2,
SharedTensor_Read = 3,
Assertion = 4,
AssertionWithValue = 5
}
static ComputeDebugUtils()
{
string[] args = System.Environment.GetCommandLineArgs ();
for (int i = 0; i < args.Length; i++) {
if (args [i] == "-barracuda-debug-gpu-kernels")
{
debugKernels = true;
}
}
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct KernelAssertInfo
{
public KernelAssertInfo(uint[] data)
{
UnityEngine.Debug.Assert(numUintInKernelAssertInfo == data.Length);
UnityEngine.Debug.Assert(numUintInKernelAssertInfo == 8,
"Please change KernelAssertInfo constructor if altering the struct.");
lockValue = data[0];
lineNumber = data[1];
context = data[2];
index = data[3];
bufferSize = data[4];
debugValue = data[5];
padding1 = data[6];
padding2 = data[7];
}
public readonly uint lockValue;
public readonly uint lineNumber;
public readonly uint context;
public readonly uint index;
public readonly uint bufferSize;
public readonly uint debugValue;
public readonly uint padding1;
public readonly uint padding2;
}
private static readonly int numUintInKernelAssertInfo = Marshal.SizeOf(typeof(KernelAssertInfo))/sizeof(uint);
private static ComputeBuffer kernelDebugInfo = null;
private static void LogAssertion(KernelAssertInfo info, string kernelName)
{
if (info.lockValue != 0)
{
string source;
switch (info.context)
{
case (int) KernelAssertContext.ReadOnlyTensor_Read:
source = $"Out of bound while Reading a ReadonlyTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
break;
case (int) KernelAssertContext.ReadWriteTensor_Read:
source = $"Out of bound while Reading a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
break;
case (int) KernelAssertContext.ReadWriteTensor_Write:
source = $"Out of bound while Writing to a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
break;
case (int) KernelAssertContext.SharedTensor_Read:
source = $"Out of bound while Reading a SharedTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
break;
case (int) KernelAssertContext.Assertion:
source = $"Assertion at line {info.lineNumber}";
break;
case (int) KernelAssertContext.AssertionWithValue:
source = $"Assertion at line {info.lineNumber}, debug value is {info.debugValue}";
break;
default:
source = "Unknown error";
break;
}
string message = $"{source} in kernel {kernelName}.";
D.LogError(message);
if (breakOnAssertion)
{
Debugger.Break();
}
}
}
public static void PrepareDispatch()
{
//Lazy alloc, will be released by GC.
if (debugKernels && kernelDebugInfo == null)
{
kernelDebugInfo = new ComputeBuffer(1, numUintInKernelAssertInfo*sizeof(uint));
}
if (debugKernels)
{
Shader.SetGlobalBuffer("KernelAssertInfoBuffer", kernelDebugInfo);
kernelDebugInfo.SetData(new uint[numUintInKernelAssertInfo]); //TODO use a kernel to zero out the buffer to avoid a extra sync.
}
}
public static void VerifyDispatch(string kernelName)
{
if (debugKernels)
{
UnityEngine.Debug.Assert(kernelDebugInfo != null);
var data = new uint[numUintInKernelAssertInfo];
kernelDebugInfo.GetData(data, 0, 0, numUintInKernelAssertInfo);
LogAssertion(new KernelAssertInfo(data), kernelName);
}
}
}
} // namespace Unity.Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 72797c6856a1f9642a53f0b22d65e5dc
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 1126b6ab4d825624a9135b0501f4d793
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 5fea18c74a3be4c7680b4ee28cbe1a86
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,12 +0,0 @@
fileFormatVersion: 2
guid: e7398940fb81d45ee8e648e0b0f467f2
timeCreated: 1503433373
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 3e48b2167ab1b453bb10a8fdac9dc531
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: c077f9591cc6d4804bc89b66a2a67c0d
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,12 +0,0 @@
fileFormatVersion: 2
guid: 3d3848101f7774555899e75a86641621
timeCreated: 1506427659
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,93 +0,0 @@
namespace Unity.Barracuda {
/// <summary>
/// `CompareOps` utilities
/// </summary>
public class CompareOpsUtils
{
/// <summary>
/// `CompareOps` log level enum
/// </summary>
public enum LogLevel
{
/// <summary>
/// Warning
/// </summary>
Warning,
/// <summary>
/// Error
/// </summary>
Error
}
static internal void CheckSame(Tensor X, Tensor Y, Layer.Type type, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
{
CheckSame(X, Y, type.ToString(), logLevel, epsilon, inputs);
}
static internal void CheckSame(Tensor X, Tensor Y, string opName, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
{
if (!X.Approximately(Y, epsilon))
{
if (logLevel == LogLevel.Error)
{
string mainLogMessage = $"Tensors not equal after {opName}, epsilon {epsilon}";
D.LogError(mainLogMessage);
}
else
{
string mainLogMessage = $"Tensors not equal after {opName} max error: {X.MaxDifference(Y)}";
D.LogWarning(mainLogMessage);
D.Log("First: " + X.shape);
D.Log("Second:" + Y.shape);
X.PrintDataPart(X.channels * X.width * 2);
Y.PrintDataPart(Y.channels * Y.width * 2);
for (var i = 0; i < inputs.Length; i++)
{
inputs[i].PrintDataPart(32, "input_" + i);
}
}
}
if (X.tensorOnDevice != Y.tensorOnDevice)
Y.Dispose();
}
static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, Layer.Type type, LogLevel logLevel)
{
return CheckApproximately(X, Y, count, epsilon, type.ToString(), logLevel);
}
static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, string opName, LogLevel logLevel)
{
if (!X.Approximately(Y, epsilon, count))
{
string mainLogMessage = $"Tensors not equal after {opName}";
if (logLevel == LogLevel.Error)
D.LogError(mainLogMessage);
else
D.LogWarning(mainLogMessage);
D.Log("First: " + X.shape);
D.Log("Second:" + Y.shape);
if (count < 0)
count = X.channels * X.width * 2;
X.PrintDataPart(count);
Y.PrintDataPart(count);
return false;
}
if (X.tensorOnDevice != Y.tensorOnDevice)
Y.Dispose();
return true;
}
}
} // namespace Unity.Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 5e3e5424b979b5c43997409257895b6b
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,132 +0,0 @@
using UnityEngine;
using UnityEngine.Rendering;
namespace Unity.Barracuda
{
/// <summary>
/// GPU compute info
/// </summary>
public class ComputeInfo
{
/// <summary>
/// Channel order enum
/// </summary>
public enum ChannelsOrder
{
/// <summary>
/// Channels last
/// </summary>
NHWC,
/// <summary>
/// Channels first
/// </summary>
NCHW
}
/// <summary>
/// GPU supports shared memory
/// </summary>
public static bool supportsComputeSharedMemory = true;
/// <summary>
/// GPU supports Dense 32x32 kernels
/// </summary>
public static bool supportsDense32x32 = true;
/// <summary>
/// GPU supports Dense 64x64 kernels
/// </summary>
public static bool supportsDense64x64 = true;
/// <summary>
/// GPU supports compute
/// </summary>
public static bool supportsCompute = true;
/// <summary>
/// Max compute work group size supported by GPU
/// </summary>
public static uint maxComputeWorkGroupSize = 1024;
/// <summary>
/// GPU vendor
/// </summary>
public static string graphicsDeviceVendor = "";
/// <summary>
/// Helper for hardware selection
/// </summary>
public static bool IsMobileGPU() { return
(Application.platform == RuntimePlatform.Android) ||
(Application.platform == RuntimePlatform.IPhonePlayer) ||
graphicsDeviceVendor.Contains("Intel");
}
public static bool IsiPhoneGPU() { return
(Application.platform == RuntimePlatform.IPhonePlayer);
}
public static bool IsQualcommGPU() { return
(Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("Qualcomm");
}
public static bool IsARMGPU() { return
(Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("ARM");
}
/// <summary>
/// EXPERIMENTAL: Select Channel order of the compute backends.
/// Production code should stick to default (NHWC) for now.
/// </summary>
public static ChannelsOrder channelsOrder = ChannelsOrder.NHWC;
/// <summary>
/// Static constructor, initializes and caches data
/// </summary>
static ComputeInfo()
{
string[] args = System.Environment.GetCommandLineArgs ();
for (int i = 0; i < args.Length; i++) {
if (args [i] == "-barracuda-compute-use-nchw")
{
channelsOrder = ChannelsOrder.NCHW;
}
}
supportsCompute = SystemInfo.supportsComputeShaders;
graphicsDeviceVendor = SystemInfo.graphicsDeviceVendor;
// TODO switch to SystemInfo.maxComputeWorkGroupSize when we bump min spec to 2019.3
if (Application.platform == RuntimePlatform.Android)
{
maxComputeWorkGroupSize = (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan) ? 256u : 128u;
var gpuName = SystemInfo.graphicsDeviceName ?? "";
var osName = SystemInfo.operatingSystem ?? "";
// Known issue with Adreno Vulkan drivers on Android 8.x
if (gpuName.Contains("Adreno") && osName.StartsWith("Android OS 8") &&
SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan)
maxComputeWorkGroupSize = 128u;
}
else if (Application.platform == RuntimePlatform.IPhonePlayer || Application.platform == RuntimePlatform.tvOS)
{
var gpuName = SystemInfo.graphicsDeviceName;
if (gpuName != null && gpuName.StartsWith("Apple A"))
{
int gpuNumber = 0, idx = "Apple A".Length;
while (idx < gpuName.Length && '0' <= gpuName[idx] && gpuName[idx] <= '9')
{
gpuNumber = gpuNumber * 10 + gpuName[idx++] - '0';
}
// TODO check on lower end iOS devices
maxComputeWorkGroupSize = (gpuNumber <= 10) ? 224u : 256u;
}
else
{
maxComputeWorkGroupSize = 256u;
}
}
}
}
}

View File

@@ -1,3 +0,0 @@
fileFormatVersion: 2
guid: 96aee99fc4154e2a991ac0edd6056c2b
timeCreated: 1558541124

View File

@@ -1,404 +0,0 @@
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using UnityEngine;
using UnityEngine.Profiling;
namespace Unity.Barracuda
{
internal enum ComputeShaderContext
{
Reference,
Optimized
}
/// <summary>
/// Stores compute kernel cache for GPU compute backends
/// </summary>
public sealed class ComputeShaderSingleton
{
/// <summary>
/// Enable kernel usage tracking
/// </summary>
public bool EnableDebug = false;
private static readonly ComputeShaderSingleton instance = new ComputeShaderSingleton ();
// Maps kernel name -> shader name
private Dictionary<string, string> mKernelToShaderName = new Dictionary<string, string>();
// Maps shader name -> ComputeShader
private Dictionary<string, ComputeShader> mShaderNameToComputeShader = new Dictionary<string, ComputeShader>();
private HashSet<string> mUsedOptimizedKernels = new HashSet<string>();
private HashSet<string> mUsedReferenceKernels = new HashSet<string>();
private ComputeShaderSingleton()
{
RegisterKernels("Barracuda/TextureUtils",
new[] {"TextureToTensor", "TensorToTextureNoLUT", "TensorToTexture3DLUT"});
RegisterKernels("Barracuda/ActivationA",
new[]
{
"Relu_Flat", "Relu_FlatStrict", "Relu_Loop", "Relu6_Flat", "Relu6_FlatStrict", "Relu6_Loop",
"Tanh_Flat", "Tanh_FlatStrict", "Tanh_Loop", "Swish_Flat", "Swish_FlatStrict", "Swish_Loop",
"Sigmoid_Flat", "Sigmoid_FlatStrict", "Sigmoid_Loop", "LeakyRelu_Flat", "LeakyRelu_FlatStrict",
"LeakyRelu_Loop", "Clip_Flat", "Clip_FlatStrict", "Clip_Loop", "PRelu_Flat", "PRelu_Loop"
});
RegisterKernels("Barracuda/ActivationB",
new[]
{
"Reciprocal_Flat", "Reciprocal_FlatStrict", "Reciprocal_Loop", "Sqrt_Flat", "Sqrt_FlatStrict",
"Sqrt_Loop", "HardSigmoid_Flat", "HardSigmoid_FlatStrict", "HardSigmoid_Loop"
});
RegisterKernels("Barracuda/ActivationBase",
new string[]
{
"Abs_Flat", "Abs_FlatStrict", "Abs_Loop", "Neg_Flat", "Neg_FlatStrict", "Neg_Loop", "Ceil_Flat",
"Ceil_FlatStrict", "Ceil_Loop", "Floor_Flat", "Floor_FlatStrict", "Floor_Loop",
"Round_Flat", "Round_FlatStrict", "Round_Loop", "Selu_Flat",
"Selu_FlatStrict", "Selu_Loop", "Softplus_Flat", "Softplus_FlatStrict", "Softplus_Loop", "Elu_Flat",
"Elu_FlatStrict", "Elu_Loop", "Exp_Flat", "Exp_FlatStrict", "Exp_Loop", "Log_Flat",
"Log_FlatStrict", "Log_Loop", "Pow_Flat", "Pow_FlatStrict", "Pow_Loop", "LogicalNot_Flat",
"LogicalNot_FlatStrict", "LogicalNot_Loop", "Sign_Flat", "Sign_FlatStrict", "Sign_Loop",
"Acos_Flat", "Acos_FlatStrict", "Acos_Loop",
"Acosh_Flat", "Acosh_FlatStrict", "Acosh_Loop", "Asin_Flat", "Asin_FlatStrict", "Asin_Loop",
"Asinh_Flat", "Asinh_FlatStrict", "Asinh_Loop", "Atan_Flat", "Atan_FlatStrict", "Atan_Loop",
"Atanh_Flat", "Atanh_FlatStrict", "Atanh_Loop", "Cos_Flat", "Cos_FlatStrict", "Cos_Loop",
"Cosh_Flat", "Cosh_FlatStrict", "Cosh_Loop", "Sin_Flat", "Sin_FlatStrict", "Sin_Loop", "Sinh_Flat",
"Sinh_FlatStrict", "Sinh_Loop", "Tan_Flat", "Tan_FlatStrict", "Tan_Loop", "Erf_Flat", "Erf_FlatStrict", "Erf_Loop",
"Relu_NHWC", "Relu_NCHW", "Relu_CNyx_NHWC", "Relu_Nyxc_NHWC", "Relu6_NHWC", "Relu6_NCHW", "Relu6_CNyx_NHWC",
"Relu6_Nyxc_NHWC", "PRelu_NHWC", "PRelu_NCHW", "PRelu_CNyx2_NHWC", "Selu_NHWC", "Selu_NCHW",
"Selu_CNyx_NHWC", "Selu_Nyxc_NHWC", "Tanh_NHWC", "Tanh_NCHW", "Tanh_CNyx_NHWC", "Tanh_Nyxc_NHWC",
"Swish_NHWC", "Swish_NCHW", "Swish_CNyx_NHWC", "Swish_Nyxc_NHWC", "Softplus_NHWC", "Softplus_NCHW",
"Softplus_CNyx_NHWC", "Softplus_Nyxc_NHWC", "Sigmoid_NHWC", "Sigmoid_NCHW", "Sigmoid_CNyx_NHWC",
"Sigmoid_Nyxc_NHWC", "HardSigmoid_NHWC", "HardSigmoid_NCHW", "HardSigmoid_CNyx_NHWC", "HardSigmoid_Nyxc_NHWC",
"Elu_NHWC", "Elu_NCHW", "Elu_CNyx_NHWC", "Elu_Nyxc_NHWC", "LeakyRelu_NHWC",
"LeakyRelu_NCHW", "LeakyRelu_CNyx_NHWC", "LeakyRelu_Nyxc_NHWC", "Exp_NHWC", "Exp_NCHW",
"Exp_CNyx_NHWC", "Exp_Nyxc_NHWC", "Log_NHWC", "Log_NCHW", "Log_CNyx_NHWC", "Log_Nyxc_NHWC",
"Sqrt_NHWC", "Sqrt_NCHW", "Sqrt_CNyx_NHWC", "Sqrt_Nyxc_NHWC", "Pow_NHWC", "Pow_NCHW",
"Pow_CNyx_NHWC", "Pow_Nyxc_NHWC",
"Clip_NHWC", "Clip_NCHW", "Clip_CNyx_NHWC", "Clip_Nyxc_NHWC", "Acos_NHWC",
"Acos_NCHW", "Acos_CNyx_NHWC", "Acos_Nyxc_NHWC", "Acosh_NHWC", "Acosh_NCHW", "Acosh_CNyx_NHWC",
"Acosh_Nyxc_NHWC", "Asin_NHWC", "Asin_NCHW", "Asin_CNyx_NHWC", "Asin_Nyxc_NHWC", "Asinh_NHWC",
"Asinh_NCHW", "Asinh_CNyx_NHWC", "Asinh_Nyxc_NHWC", "Atan_NHWC", "Atan_NCHW", "Atan_CNyx_NHWC",
"Atan_Nyxc_NHWC", "Atanh_NHWC", "Atanh_NCHW", "Atanh_CNyx_NHWC", "Atanh_Nyxc_NHWC", "Cos_NHWC",
"Cos_NCHW", "Cos_CNyx_NHWC", "Cos_Nyxc_NHWC", "Cosh_NHWC", "Cosh_NCHW", "Cosh_CNyx_NHWC",
"Cosh_Nyxc_NHWC", "Sin_NHWC", "Sin_NCHW", "Sin_CNyx_NHWC", "Sin_Nyxc_NHWC", "Sinh_NHWC",
"Sinh_NCHW", "Sinh_CNyx_NHWC", "Sinh_Nyxc_NHWC", "Tan_NHWC", "Tan_NCHW", "Tan_CNyx_NHWC",
"Tan_Nyxc_NHWC", "Erf_NHWC", "Erf_NCHW", "Erf_CNyx_NHWC", "Erf_Nyxc_NHWC"
});
RegisterKernels("Barracuda/Broadcast_NHWC",
new[]
{
"BroadcastAdd_NHWC", "BroadcastSub_NHWC", "BroadcastMul_NHWC", "BroadcastDiv_NHWC",
"BroadcastPow_NHWC", "BroadcastMin_NHWC", "BroadcastMax_NHWC", "BroadcastMean_NHWC",
"BroadcastGreater_NHWC", "BroadcastGreaterEqual_NHWC", "BroadcastLess_NHWC",
"BroadcastLessEqual_NHWC", "BroadcastEqual_NHWC", "BroadcastLogicalOr_NHWC",
"BroadcastLogicalAnd_NHWC", "BroadcastLogicalXor_NHWC", "BroadcastWhere_NHWC",
"BroadcastDivExpSub_NHWC", "LogSoftmaxEnd_NHWC"
});
RegisterKernels("Barracuda/Broadcast_NCHW",
new[]
{
"BroadcastAdd_NCHW", "BroadcastSub_NCHW", "BroadcastMul_NCHW", "BroadcastDiv_NCHW",
"BroadcastPow_NCHW", "BroadcastMin_NCHW", "BroadcastMax_NCHW", "BroadcastMean_NCHW",
"BroadcastGreater_NCHW", "BroadcastGreaterEqual_NCHW", "BroadcastLess_NCHW",
"BroadcastLessEqual_NCHW", "BroadcastEqual_NCHW", "BroadcastLogicalOr_NCHW",
"BroadcastLogicalAnd_NCHW", "BroadcastLogicalXor_NCHW", "BroadcastWhere_NCHW",
"BroadcastDivExpSub_NCHW", "LogSoftmaxEnd_NCHW"
});
RegisterKernels("Barracuda/Conv2dA_NHWC",
new[]
{
"Conv2D_NHWC", "Conv2D_RegisterBlock4x2_NHWC", "DepthwiseConv2D_NHWC",
"Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NHWC", "Conv2DKernelKxK_T16x16_R4x4_NHWC",
"Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NHWC"
});
RegisterKernels("Barracuda/Conv2dA_NCHW",
new[]
{
"Conv2D_NCHW", "Conv2D_RegisterBlock4x2_NCHW", "DepthwiseConv2D_NCHW",
"Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NCHW", "Conv2DKernelKxK_T16x16_R4x4_NCHW",
"Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NCHW"
});
RegisterKernels("Barracuda/Conv2dBase",
new[]
{
"Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NHWC",
"Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NCHW",
"Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NHWC", "Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NCHW",
"Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NHWC",
"Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NCHW",
"Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NCHW",
"Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NCHW",
"Conv2DTrans_NHWC", "Conv2DTrans_NCHW", "Conv2DTrans_KernelCached_K5x5_T16x16_NHWC",
"Conv2DTrans_KernelCached_K5x5_T16x16_NCHW", "Conv2DTransFlipKernel", "Conv2DTransPadFill_NHWC",
"Conv2DTransPadFill_NCHW", "KernelWinograd_3x3",
"Conv2DWinograd_2x2_Kernel3x3_StrictC8StrictK16_T16x16_R4x4_NCHW",
"Conv2DWinograd_2x2_Kernel3x3_StrictC8LaxK16_T16x16_R4x4_NCHW"
});
RegisterKernels("Barracuda/Conv2dMobile",
new[]
{
//"Conv2D_Default_T8x8_R4x4_NHWC",
//"Conv2D_Default_T8x8_R4x4_NHWC",
"Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
"Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
//"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
//"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
//"Conv2D_Kernel1x1_1x4x4_NHWC",
//"Conv2D_Kernel1x1_1x4x4_NCHW",
"Conv2D_KernelKxK_T16x16_R4x4_NHWC",
"Conv2D_KernelKxK_T16x16_R4x4_NCHW",
"Conv2D_Kernel1x1_T16x16_R4x4_NHWC",
"Conv2D_Kernel1x1_T16x16_R4x4_NCHW",
"Conv2D_KernelKxK_T8x8_R4x4_NHWC",
"Conv2D_KernelKxK_T8x8_R4x4_NCHW",
"Conv2D_Kernel1x1_T8x8_R4x4_NHWC",
"Conv2D_Kernel1x1_T8x8_R4x4_NCHW",
"DepthwiseConv2D_Default_NHWC",
"DepthwiseConv2D_Default_NCHW",
"DepthwiseConv2D_Winograd_2x2_Kernel3x3_NHWC",
"DepthwiseConv2D_Winograd_2x2_Kernel3x3_NCHW",
//"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NHWC",
//"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NCHW",
//"KernelWinograd_5x5"
});
RegisterKernels("Barracuda/Conv3d",
new[]
{
"Conv3D_NHWC", "Conv3D_NCHW", "Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NHWC",
"Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NCHW", "Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NHWC",
"Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NCHW",
"Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NHWC",
"Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NCHW"
});
RegisterKernels("Barracuda/Dense",
new[]
{
"Dense_L1Cached64", "DenseTiled16x16", "DenseTiled32x32", "DenseTiled64x64", "Dense_T8x8_R4x4",
"Dense_T16x16_R4x4", "Dense_Tilled2x2_Cached", "Dense_Tilled4x4_Cached", "MatMulPackB0Bias",
"Dense_V_L1Cached64"
});
RegisterKernels("Barracuda/MatMul",
new[]
{
"MultidimMatMul_T16x16_R4x4_AR3_BR2_NHWC", "MultidimMatMul_T16x16_R4x4_AR3_BR2_NCHW",
"MultidimMatMul_T8x8_R8x8_AR3_BR2_NHWC", "MultidimMatMul_T8x8_R8x8_AR3_BR2_NCHW",
"MultidimMatMul_L1Cached64_AR3_BR2_NHWC", "MultidimMatMul_L1Cached64_AR3_BR2_NCHW"
});
RegisterKernels("Barracuda/Dense3",
new[]
{
"Dense3_T8x8_R8x8_NHWC", "Dense3_T8x8_R8x8_NCHW",
"Dense3_T8x16_R4x4_NHWC", "Dense3_T8x16_R4x4_NCHW",
"Dense3_L1Cached64_NHWC", "Dense3_L1Cached64_NCHW"
});
RegisterKernels("Barracuda/Generic",
new[]
{
"ScaleBias_NHWC", "ScaleBias_NCHW", "ScaleBias_CNyx_NHWC", "ScaleBias_CNyx2_NHWC",
"ScaleBias_Flat_NHWC", "ScaleBias_Flat_NCHW", "ScaleBias_Loop_NHWC", "ScaleBias_Loop_NCHW",
"InstanceNormTail_CNyx2_NHWC", "InstanceNormTail_Flat_NHWC", "InstanceNormTail_Flat_NCHW",
"InstanceNormTail_Loop_NHWC", "InstanceNormTail_Loop_NCHW", "Upsample2D_NHWC", "Upsample2D_NCHW",
"UpsampleBilinear2D_NHWC", "UpsampleBilinear2D_NCHW", "UpsampleBilinear2D_2x2_NHWC",
"UpsampleBilinear2D_2x2_NCHW", "Copy_NHWC", "Copy_NCHW", "ReshapeFromNHWCModel_Flat_NCHW",
"ReshapeFromNHWCModel_Loop_NCHW", "TransposeToChannelFirst"
});
RegisterKernels("Barracuda/Pad",
new[]
{
"Border2D_NHWC", "Border2D_NCHW", "Pad2DEdge_NHWC", "Pad2DEdge_NCHW", "Pad2DReflect_NHWC",
"Pad2DReflect_NCHW", "Pad2DSymmetric_NHWC", "Pad2DSymmetric_NCHW"
});
RegisterKernels("Barracuda/Transpose",
new[]
{
"Transpose2D_NHWC","Transpose2D_NCHW","Transpose_NHWC","Transpose_NCHW","Transpose8D"
});
RegisterKernels("Barracuda/Pool_NHWC",
new[]
{
"AvgPool2D_NHWC", "MaxPool2D_NHWC", "AvgPool2DReduce_NHWC", "MaxPool2DReduce_NHWC",
"GlobalAvgPool2D_NHWC", "GlobalMaxPool2D_NHWC", "AvgVariancePool2DReduce_NHWC",
"GlobalAvgVariancePool2D_NHWC"
});
RegisterKernels("Barracuda/Pool_NCHW",
new[]
{
"AvgPool2D_NCHW", "MaxPool2D_NCHW", "AvgPool2DReduce_NCHW", "MaxPool2DReduce_NCHW",
"GlobalAvgPool2D_NCHW", "GlobalMaxPool2D_NCHW", "AvgVariancePool2DReduce_NCHW",
"GlobalAvgVariancePool2D_NCHW"
});
RegisterKernels("Barracuda/Reduce",
new[]
{
"PartialReduceMin", "PartialReduceMin_Loop",
"GlobalReduceMin", "GlobalReduceMin_Loop",
"PartialReduceMax", "PartialReduceMax_Loop",
"GlobalReduceMax", "GlobalReduceMax_Loop",
"PartialReduceSum", "PartialReduceSum_Loop",
"GlobalReduceSum", "GlobalReduceSum_Loop",
"PartialReduceMean", "PartialReduceMean_Loop",
"GlobalReduceMean", "GlobalReduceMean_Loop",
"PartialReduceProd", "PartialReduceProd_Loop",
"GlobalReduceProd", "GlobalReduceProd_Loop",
"PartialReduceExpBias", "PartialReduceExpBias_Loop",
"GlobalReduceExpBias", "GlobalReduceExpBias_Loop"
});
RegisterKernels("Barracuda/ReduceSlow",
new[]
{
"ArgMax_NHWC", "ArgMax_NCHW", "ArgMin_NHWC", "ArgMin_NCHW"
});
}
private void RegisterKernels(string shaderName, string[] kernels)
{
foreach (var kernel in kernels)
{
mKernelToShaderName[kernel] = shaderName;
}
}
internal ComputeShader FindComputeShader(ComputeShaderContext ctx, string kernelName)
{
if (ctx == ComputeShaderContext.Optimized)
return FindOptimizedComputeShader(kernelName);
return FindReferenceComputeShader(kernelName);
}
private ComputeShader FindReferenceComputeShader(string kernelName)
{
if (EnableDebug) mUsedReferenceKernels.Add(kernelName);
return FindComputeShader("Barracuda/BarracudaReferenceImpl");
}
private ComputeShader FindOptimizedComputeShader(string kernelName)
{
string shaderName = null;
mKernelToShaderName.TryGetValue(kernelName, out shaderName);
// Kernel not found
if (shaderName == null)
return null;
if (EnableDebug) mUsedOptimizedKernels.Add(kernelName);
return FindComputeShader(shaderName);
}
private ComputeShader FindComputeShader(string shaderName)
{
if (!mShaderNameToComputeShader.ContainsKey(shaderName))
{
Profiler.BeginSample(shaderName);
mShaderNameToComputeShader[shaderName] = Resources.Load<ComputeShader>(shaderName);
Profiler.EndSample();
}
return mShaderNameToComputeShader[shaderName];
}
/// <summary>
/// Warmup reference kernels
/// </summary>
/// <param name="kernels">list of kernels to warm up</param>
/// <returns>IEnumerator</returns>
public IEnumerator WarmupReferenceKernels(List<string> kernels)
{
if (kernels?.Count > 0)
FindComputeShader("Barracuda/BarracudaReferenceImpl");
yield break;
}
/// <summary>
/// Warmup optimized kernels
/// </summary>
/// <param name="kernels">list of kernels to warm up</param>
/// <returns>IEnumerator</returns>
public IEnumerator WarmupOptimizedKernels(List<string> kernels)
{
foreach (var kernel in kernels)
{
var shader = mKernelToShaderName[kernel];
if (!mShaderNameToComputeShader.ContainsKey(shader))
{
FindComputeShader(shader);
yield return null;
}
}
yield break;
}
/// <summary>
/// Get used reference kernels list
/// </summary>
/// <returns>list of kernels</returns>
public List<string> GetUsedReferenceKernels()
{
if (!EnableDebug)
{
D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
return null;
}
return mUsedReferenceKernels.ToList();
}
/// <summary>
/// Get used optimized kernels list
/// </summary>
/// <returns>list of kernels</returns>
public List<string> GetUsedOptimizedKernels()
{
if (!EnableDebug)
{
D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
return null;
}
return mUsedOptimizedKernels.ToList();
}
/// <summary>
/// Singleton
/// </summary>
public static ComputeShaderSingleton Instance {
get { return instance; }
}
/// <summary>
/// Check if GPU compute is supported
/// </summary>
public bool supported { get { return SystemInfo.supportsComputeShaders; } }
}
}

View File

@@ -1,12 +0,0 @@
fileFormatVersion: 2
guid: 815b6432da283415d87dabe9ef715cd9
timeCreated: 1495620775
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,12 +0,0 @@
fileFormatVersion: 2
guid: f7473266805a8439287433d3dac88945
timeCreated: 1506427659
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,758 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq; // ToArray(), ToDictionary()
namespace Unity.Barracuda
{
internal class LinearLayerFusing
{
public static bool IsLayerLinear(Layer layer, Dictionary<string, Layer> constantLayers)
{
var constInputs = layer.inputs.Count(x => constantLayers.ContainsKey(x));
bool allConstInputsButOne = (layer.inputs.Length - constInputs) == 1;
return layer.type == Layer.Type.Dense ||
layer.type == Layer.Type.Conv2D || //TODO Conv3D
layer.type == Layer.Type.DepthwiseConv2D ||
layer.type == Layer.Type.ScaleBias ||
IsLayerLinearMathOp(layer) && allConstInputsButOne;
}
public static bool IsLayerLinearMathOp(Layer layer)
{
return layer.type == Layer.Type.Add ||
layer.type == Layer.Type.Mul;
}
public bool AreLayersFusable(Layer l0, Layer l1)
{
bool conditions = true;
if ((l0.type == Layer.Type.DepthwiseConv2D) || (l0.type == Layer.Type.Conv2D) || (l0.type == Layer.Type.ScaleBias) &&
(l1.type == Layer.Type.Conv2D) || (l1.type == Layer.Type.DepthwiseConv2D))
conditions = conditions && !l1.pad.Any(x => x != 0); // padding breaks bias merging for non-zero bias
if (IsLayerLinearMathOp(l0) && (l1.type == Layer.Type.Conv2D))
{
if (l0.datasets == null || l0.datasets.Length != 1)
return false;
conditions = conditions && (l0.datasets[0].shape.length == 1) ||
(l0.datasets[0].shape.batch == 1 && l0.datasets[0].shape.height == 1 && l0.datasets[0].shape.width == 1 && l0.datasets[0].shape.channels == l1.datasets[0].shape.kernelCount);
}
if ((l0.type == Layer.Type.Conv2D) && IsLayerLinearMathOp(l1))
{
if (l1.datasets == null || l1.datasets.Length != 1)
return false;
conditions = conditions && (l1.datasets[0].shape.length == 1) ||
(l1.datasets[0].shape.batch == 1 && l1.datasets[0].shape.height == 1 && l1.datasets[0].shape.width == 1 && l1.datasets[0].shape.channels == l0.datasets[0].shape.kernelCount);
}
return m_LayerFusers.ContainsKey((l0.type, l1.type)) && conditions;
}
private readonly BurstCPUOps m_Ops = new BurstCPUOps();
private readonly Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>> m_LayerFusers =
new Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>>();
private void Add((Layer.Type, Layer.Type) layersType, Func<Layer, Layer, Layer> opFuseAction)
{
m_LayerFusers.Add(layersType, opFuseAction);
}
public LinearLayerFusing()
{
Add((Layer.Type.Add, Layer.Type.Add), (l0, l1) =>
{
Tensor bias0 = l0.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(0);
int rankO = Math.Max(bias0.dimensions, bias1.dimensions);
if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
{
// broadcast rule
int rank0 = l0.axis;
List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias0.shape, rank0);
rank0 = Math.Max(rank0, 1);
int rank1 = l1.axis;
List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias1.shape, rank1);
rank1 = Math.Max(rank1, 1);
rankO = Math.Max(rank0, rank1);
for (int k = 0; k < rankO - rank0; k++)
shape0.Insert(0, 1);
for (int k = 0; k < rankO - rank1; k++)
shape1.Insert(0, 1);
bias0 = bias0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
bias1 = bias1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
}
TensorShape biasShape = TensorExtensions.MaxShape(new [] { bias0, bias1 });
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = new Layer.DataSet[1];
lmerged.datasets[0].name = l0.datasets[0].name;
lmerged.datasets[0].shape = biasShape;
lmerged.datasets[0].itemSizeInBytes = 4;
lmerged.datasets[0].length = biasShape.length;
lmerged.datasets[0].offset = 0;
lmerged.weights = new BarracudaArray(biasShape.length);
lmerged.axis = rankO;
Tensor bias = m_Ops.Add(new [] { bias0, bias1 });
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
bias.Dispose();
bias0.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Mul, Layer.Type.Mul), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor scale1 = l1.DataSetToTensor(0);
int rankO = Math.Max(scale0.dimensions, scale1.dimensions);
if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
{
// broadcast rule
int rank0 = l0.axis;
List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale0.shape, rank0);
rank0 = Math.Max(rank0, 1);
int rank1 = l1.axis;
List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale1.shape, rank1);
rank1 = Math.Max(rank1, 1);
rankO = Math.Max(rank0, rank1);
for (int k = 0; k < rankO - rank0; k++)
shape0.Insert(0, 1);
for (int k = 0; k < rankO - rank1; k++)
shape1.Insert(0, 1);
scale0 = scale0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
scale1 = scale1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
}
TensorShape biasShape = TensorExtensions.MaxShape(new[] { scale0, scale1 });
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = new Layer.DataSet[1];
lmerged.datasets[0].name = l0.datasets[0].name;
lmerged.datasets[0].shape = biasShape;
lmerged.datasets[0].itemSizeInBytes = 4;
lmerged.datasets[0].length = biasShape.length;
lmerged.datasets[0].offset = 0;
lmerged.weights = new BarracudaArray(biasShape.length);
lmerged.axis = rankO;
Tensor bias = m_Ops.Mul(new[] { scale0, scale1 });
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
bias.Dispose();
scale0.Dispose();
scale1.Dispose();
return lmerged;
});
Add((Layer.Type.ScaleBias, Layer.Type.ScaleBias), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor scale1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// s1*(s0*x + b0)+b1 = s1*s0*x + s1*b0+b1
Tensor scale = m_Ops.Mul(new [] { scale1, scale0});
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
BarracudaArray.Copy(scale.ToReadOnlyArray(), 0, lmerged.weights, 0, scale.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, scale.length, bias.length);
scale.Dispose();
bias.Dispose();
scale0.Dispose();
bias0.Dispose();
scale1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.ScaleBias, Layer.Type.Dense), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor weights1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = l1.datasets;
lmerged.weights = new BarracudaArray(l1.weights.Length);
// b = W1 x b0 + b1
Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
// W = W1 x s
Tensor weights = new Tensor(weights1.shape);
for (int x = 0; x < weights1.flatWidth; ++x)
for (int i = 0; i < weights1.flatHeight; ++i)
{
int c = i % bias0.length;
float gamma = scale0[c];
float w = weights1[i, x];
weights[i, x] = w * gamma;
}
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
bias.Dispose();
weights.Dispose();
scale0.Dispose();
bias0.Dispose();
weights1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Dense, Layer.Type.ScaleBias), (l0, l1) =>
{
Tensor weights0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor scale1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// w = s1*w0
Tensor weights = m_Ops.Mul(new [] { scale1, weights0 });
// b = s1*b0+b1
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
weights.Dispose();
bias.Dispose();
weights0.Dispose();
bias0.Dispose();
scale1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Mul, Layer.Type.Conv2D), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor kernel1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.pad = l1.pad;
lmerged.stride = l1.stride;
lmerged.pool = l1.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l1.datasets;
lmerged.weights = new BarracudaArray(l1.weights.Length);
// k = k * s
Tensor kernel = new Tensor(kernel1.shape);
for (int y = 0; y < kernel1.kernelHeight; ++y)
for (int x = 0; x < kernel1.kernelWidth; ++x)
for (int c = 0; c < kernel1.kernelDepth; ++c)
{
float gamma = scale0[scale0.IndexWithBroadcast(0, 0, 0, c)];
for (int k = 0; k < kernel1.kernelCount; ++k)
{
float w = kernel1[y, x, c, k];
kernel[y, x, c, k] = gamma * w;
}
}
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias1.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias1.length);
kernel.Dispose();
scale0.Dispose();
kernel1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Conv2D, Layer.Type.Mul), (l0, l1) =>
{
Tensor kernel0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor scale1 = l1.DataSetToTensor(0);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.pad = l0.pad;
lmerged.stride = l0.stride;
lmerged.pool = l0.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// k = s1*k0
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
// b = s1*b0
Tensor bias = m_Ops.Mul(new[] { scale1, bias0 });
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel.Dispose();
bias.Dispose();
kernel0.Dispose();
bias0.Dispose();
scale1.Dispose();
return lmerged;
});
Add((Layer.Type.Add, Layer.Type.Conv2D), (l0, l1) =>
{
Tensor bias0 = l0.DataSetToTensor(0);
Tensor kernel1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.pad = l1.pad;
lmerged.stride = l1.stride;
lmerged.pool = l1.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l1.datasets;
lmerged.weights = new BarracudaArray(l1.weights.Length);
// k = k
// b = Sum_k[wk * beta] + b
Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
for (int y = 0; y < kernel1.kernelHeight; ++y)
for (int x = 0; x < kernel1.kernelWidth; ++x)
for (int c = 0; c < kernel1.kernelDepth; ++c)
{
float beta = bias0[bias0.IndexWithBroadcast(0, 0, 0, c)];
for (int k = 0; k < kernel1.kernelCount; ++k)
{
float w = kernel1[y, x, c, k];
bias[k] += w * beta;
}
}
BarracudaArray.Copy(kernel1.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel1.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel1.length, bias.length);
bias.Dispose();
bias0.Dispose();
kernel1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Conv2D, Layer.Type.Add), (l0, l1) =>
{
Tensor kernel0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor bias1 = l1.DataSetToTensor(0);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.pad = l0.pad;
lmerged.stride = l0.stride;
lmerged.pool = l0.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// b = b0+b1
Tensor bias = m_Ops.Add( new [] { bias0, bias1 });
BarracudaArray.Copy(kernel0.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel0.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel0.length, bias.length);
bias.Dispose();
kernel0.Dispose();
bias0.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Conv2D, Layer.Type.ScaleBias), (l0, l1) =>
{
Tensor kernel0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor scale1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.pad = l0.pad;
lmerged.stride = l0.stride;
lmerged.pool = l0.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// k = s1*k0
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
// b = s1*b0+b1
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel.Dispose();
bias.Dispose();
kernel0.Dispose();
bias0.Dispose();
scale1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.ScaleBias, Layer.Type.Conv2D), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor kernel1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.pad = l1.pad;
lmerged.stride = l1.stride;
lmerged.pool = l1.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l1.datasets;
lmerged.weights = new BarracudaArray(l1.weights.Length);
// k = k * s
Tensor kernel = new Tensor(kernel1.shape);
// b = Sum_k[wk * beta] + b
Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
for (int y = 0; y < kernel1.kernelHeight; ++y)
for (int x = 0; x < kernel1.kernelWidth; ++x)
for (int c = 0; c < kernel1.kernelDepth; ++c)
{
float beta = bias0[0, 0, 0, c];
float gamma = scale0[0, 0, 0, c];
for (int k = 0; k < kernel1.kernelCount; ++k)
{
float w = kernel1[y, x, c, k];
kernel[y, x, c, k] = gamma * w;
bias[k] += w * beta;
}
}
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel.Dispose();
bias.Dispose();
scale0.Dispose();
bias0.Dispose();
kernel1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.DepthwiseConv2D, Layer.Type.ScaleBias), (l0, l1) =>
{
Tensor kernel0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor scale1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l0.type);
lmerged.pad = l0.pad;
lmerged.stride = l0.stride;
lmerged.pool = l0.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l0.datasets;
lmerged.weights = new BarracudaArray(l0.weights.Length);
// k = s1*k0
Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
// b = s1*b0+b1
Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel.Dispose();
bias.Dispose();
kernel0.Dispose();
bias0.Dispose();
scale1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.ScaleBias, Layer.Type.DepthwiseConv2D), (l0, l1) =>
{
Tensor scale0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
Tensor kernel1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.pad = l1.pad;
lmerged.stride = l1.stride;
lmerged.pool = l1.pool;
lmerged.inputs = l0.inputs;
lmerged.datasets = l1.datasets;
lmerged.weights = new BarracudaArray(l1.weights.Length);
// k = k * s
Tensor kernel = new Tensor(kernel1.shape);
// b = Sum_k[wk * beta] + b
Tensor bias = new Tensor(bias1.shape);
for (int k = 0; k < kernel1.kernelCount; ++k)
{
float b = bias1[k];
float beta = bias0[0, 0, 0, k];
float gamma = scale0[0, 0, 0, k];
for (int y = 0; y < kernel1.kernelHeight; ++y)
for (int x = 0; x < kernel1.kernelWidth; ++x)
{
float w = kernel1[y, x, 0, k];
kernel[y, x, 0, k] = gamma * w;
b += w * beta;
}
bias[k] = b;
}
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel.Dispose();
bias.Dispose();
scale0.Dispose();
bias0.Dispose();
kernel1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Dense, Layer.Type.Dense), (l0, l1) =>
{
var weights0 = l0.DataSetToTensor(0);
var bias0 = l0.DataSetToTensor(1);
var weights1 = l1.DataSetToTensor(0);
var bias1 = l1.DataSetToTensor(1);
TensorShape weightsShape = new TensorShape(weights0.shape.flatHeight, weights1.shape.flatWidth);
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.inputs = l0.inputs;
lmerged.datasets = new Layer.DataSet[2];
lmerged.datasets[0].name = weights0.name;
lmerged.datasets[0].shape = weightsShape;
lmerged.datasets[0].itemSizeInBytes = 4;
lmerged.datasets[0].length = weightsShape.length;
lmerged.datasets[0].offset = 0;
lmerged.datasets[1].name = bias0.name;
lmerged.datasets[1].shape = bias1.shape;
lmerged.datasets[1].itemSizeInBytes = 4;
lmerged.datasets[1].length = bias1.length;
lmerged.datasets[1].offset = weightsShape.length;
lmerged.weights = new BarracudaArray(weightsShape.length + bias1.shape.length);
// W = W1 x W0
Tensor weights = m_Ops.MatMul(weights0, false, weights1, false);
// b = W1 x b0 + b1
Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
weights.Dispose();
bias.Dispose();
weights0.Dispose();
bias0.Dispose();
weights1.Dispose();
bias1.Dispose();
return lmerged;
});
Add((Layer.Type.Conv2D, Layer.Type.Conv2D), (l0, l1) =>
{
Tensor kernel0 = l0.DataSetToTensor(0);
Tensor bias0 = l0.DataSetToTensor(1);
var strides0 = l0.stride;
var pad0 = l0.pad;
Tensor kernel1 = l1.DataSetToTensor(0);
Tensor bias1 = l1.DataSetToTensor(1);
var strides1 = l1.stride;
var pad1 = l1.pad;
// Y = (X * K0 + b0) * K1 + b1
// = (X * K0) * K1 + (b0 * K1 + b1)
// = X * (K0 * k1) + (b0 * K1 + b1)
// = X * K2 + b2
// K2 dimensions:
// kernelDepth and kernelCount:
// X = [n, . , . , c0], K0 = [ . , . , c0, d0] , K1 = [ . , . , c1, d1]
// => Km = [ x , x , c0, d1]
// kernelHeight and kernelHeight:
// Y = (((X + 2*p0 - k0)/s0 + 1) + 2*p1 - k1)/s1 + 1
// = ((X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0)/s0)/s1 + 1
// = (X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0) / (s0*s1) + 1
// = (X + 2*(p0+p1*s0) - (k0 + k1*s0 - s0)) / (s0*s1) + 1
// => pad = p0 + p1*s0
// kernel = k0 + s0*(k1 - 1)
// stride = s0*s1
TensorShape kernelShape = new TensorShape(kernel0.kernelHeight + (kernel1.kernelHeight - 1) * strides0[0],
kernel0.kernelWidth + (kernel1.kernelWidth - 1) * strides0[1],
kernel0.kernelDepth, kernel1.kernelCount);
var pad = new int[4] { pad0[0] + pad1[0] * strides0[0], pad0[1] + pad1[1] * strides0[1],
pad0[2] + pad1[2] * strides0[0], pad0[3] + pad1[3] * strides0[1] };
var strides = new int[2] { strides0[0] * strides1[0], strides0[1] * strides1[1] };
TensorShape biasShape = bias1.shape;
Layer lmerged = new Layer(l0.name, l1.type);
lmerged.inputs = l0.inputs;
lmerged.stride = strides;
lmerged.pad = pad;
lmerged.datasets = new Layer.DataSet[2];
lmerged.datasets[0].name = kernel0.name;
lmerged.datasets[0].shape = kernelShape;
lmerged.datasets[0].itemSizeInBytes = 4;
lmerged.datasets[0].length = kernelShape.length;
lmerged.datasets[0].offset = 0;
lmerged.datasets[1].name = bias0.name;
lmerged.datasets[1].shape = biasShape;
lmerged.datasets[1].itemSizeInBytes = 4;
lmerged.datasets[1].length = biasShape.length;
lmerged.datasets[1].offset = kernelShape.length;
lmerged.weights = new BarracudaArray(kernelShape.length + biasShape.length);
Tensor kernel = new Tensor(kernelShape); // 0-filled by default
// |x0 x1 x3 | x4 |y0 y1| y2 |z0| z1
// |x5 x6 x7 | x8 * k0 k1 => |y3 y4| y5 * l0 l1 => z2 z3
// |x9 x10 x11| x12 k2 k3 y6 y7 y8 l2 l3
// x13 x14 x15 x13
//
// in order to compute z0, we need to do 2 convolutions
//
// |y0 y1/
// | |x0 /x1| x3/ |
// | |x5 /x6| x7/ |
// | x9 x10 x11 |
//
// |x0 x1| is convolved with K and then * l0
// |x5 x6|
// /x1 x3/ is convolved with K and then * l1
// /x6 x7/
//
// by unwrapping the whole process
// z0 = [x0 * k0 * l0 + x1 * k1 * l0 + ....] + [x1 * k1 * l1 + ....]
// l0 * y0-block l1 * y1-block
// resulting conv kernel is the following
//
// z0 = | x0 x1 x3 | * | [k0*l0] [k1*l0 + k1*l1] [l2*l1] |
// | x5 x6 x7 | | [k2*l0 + k2*l2] [k3*l0 + k2*l1 + k1*l2 + k0*l3] [k3*l1 + k3*l3] |
// | x9 x10 x11 | | [k2*l2] [k2*l0 + k2*l3 [k3*l3] |
Tensor kernel0T = m_Ops.Transpose(kernel0, new[] { 2, 0, 1, 3 });
Tensor emptyB = new Tensor(new TensorShape(1, 1, 1, kernel.kernelCount));
for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
{
Tensor kernel1XY = m_Ops.StridedSlice(kernel1, new[] { y1, x1, 0, 0 }, new[] { y1 + 1, x1 + 1, kernel1.kernelDepth, kernel.kernelCount }, new[] { 1, 1, 1, 1 });
Tensor kernelk = m_Ops.Conv2D(kernel0T, kernel1XY, emptyB, new[] { 1, 1 }, new[] { 0, 0, 0, 0 }, Layer.FusedActivation.None);
for (int y0 = 0; y0 < kernel0.kernelHeight; ++y0)
for (int x0 = 0; x0 < kernel0.kernelWidth; ++x0)
{
int ox = x0 + strides0[0] * x1;
int oy = y0 + strides0[1] * y1;
for (int c = 0; c < kernel.kernelDepth; ++c)
for (int k = 0; k < kernel.kernelCount; ++k)
{
kernel[oy, ox, c, k] += kernelk[c,y0,x0,k];
}
}
kernel1XY.Dispose();
kernelk.Dispose();
}
// |y0 y1| * l0 l1 + bl = z0
// |y3 y4| l2 l3
// y0 = Sum_k() + bk, y1 = Sum_k() + bk
// y2 = Sum_k() + bk, y2 = Sum_k() + bk
//
// moving b from the convolution process leads
// z0 = | x0 x1 x3 | * M + bl + l0*bk + l1*bk + l2*bk + l3*bk
// | x5 x6 x7 |
// | x9 x10 x11 |
// N.B: as you can see this breaks if there is some amount of zero-padding to the second conv layer
// because some weights of L will be * 0, essentialy masking out bk
Tensor bias = new Tensor(biasShape, bias1.ToReadOnlyArray());
for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
for (int c = 0; c < kernel1.kernelDepth; ++c)
{
float bias0c = bias0[c];
for (var k = 0; k < kernel.kernelCount; ++k)
{
bias[k] += kernel1[y1, x1, c, k] * bias0c;
}
}
BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
kernel0T.Dispose();
emptyB.Dispose();
kernel.Dispose();
bias.Dispose();
kernel0.Dispose();
bias0.Dispose();
kernel1.Dispose();
bias1.Dispose();
return lmerged;
});
}
public Layer FuseLayers(Layer l0, Layer l1)
{
var fnFuse = m_LayerFusers[(l0.type, l1.type)];
return fnFuse(l0, l1);
}
}
} // namespace Unity.Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: b940ee731fee3c3478e90a161a7a7288
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,259 +0,0 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using UnityEngine.Assertions;
using UnityEngine.Scripting;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs;
[assembly: InternalsVisibleTo("Unity.Barracuda.BurstBLAS")]
namespace Unity.Barracuda
{
[Preserve]
internal class CSharpBLAS : BLASPlugin
{
public bool IsNative()
{
return false; // reference implementation
}
public bool IsCurrentPlatformSupported()
{
return true;
}
public unsafe void SGEMM(float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN, int bs,
bool transposeA = false, bool transposeB = false)
{
MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(Ap, AM, AN, Bp, BM, BN, Cp, CM, CN, bs,
transposeA, transposeB);
}
public unsafe JobHandle ScheduleSGEMM(JobHandle dependsOn,
float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN,
int bs,
bool transposeA = false, bool transposeB = false)
{
var job = new SGEMMJob();
job.Ap = Ap; job.AM = AM; job.AN = AN;
job.Bp = Bp; job.BM = BM; job.BN = BN;
job.Cp = Cp; job.CM = CM; job.CN = CN;
job.transposeA = transposeA;
job.transposeB = transposeB;
job.bs = bs;
return job.Schedule(dependsOn);
}
unsafe struct SGEMMJob : IJob
{
[NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Ap;
public int AM, AN;
[NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Bp;
public int BM, BN;
[NativeDisableUnsafePtrRestriction] public unsafe float* Cp;
public int CM, CN;
public int bs;
public bool transposeA;
public bool transposeB;
public void Execute()
{
MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(
Ap, AM, AN,
Bp, BM, BN,
Cp, CM, CN, bs,
transposeA, transposeB);
}
}
}
internal class MatrixUtils
{
public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float[] blockOut, int bs, bool transpose = false)
{
Array.Clear(blockOut, 0, bs * bs);
var rowFinal = Math.Min(row + bs, M);
var count = Math.Min(col + bs, N) - col;
// @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
if (transpose)
{
// sequential access over blockOut, strided over matrixIn
//for (var i = row; i < rowFinal; i++)
// for (var j = 0; j < count; ++j)
// blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
// sequential access over matrixIn, strided over blockOut
for (var j = 0; j < count; ++j)
for (var i = row; i < rowFinal; i++)
blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
}
else
for (var i = row; i < rowFinal; i++)
{
//D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
Marshal.Copy((IntPtr)(matrixIn + i * N + col), blockOut, (i - row) * bs, count);
}
}
public static unsafe void ClearFloatArray(float* arr, float val, int count)
{
for (int i = 0; i < count; i++)
{
arr[i] = val;
}
}
public static unsafe void CopyFloatArray(float* from, float* to, int count)
{
for (int i = 0; i < count; i++)
{
to[i] = from[i];
}
}
public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float* blockOut, int bs, bool transpose = false)
{
ClearFloatArray(blockOut, 0, bs * bs);
var rowFinal = Math.Min(row + bs, M);
var count = Math.Min(col + bs, N) - col;
// @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
if (transpose)
{
// sequential access over blockOut, strided over matrixIn
//for (var i = row; i < rowFinal; i++)
// for (var j = 0; j < count; ++j)
// blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
// sequential access over matrixIn, strided over blockOut
for (var j = 0; j < count; ++j)
for (var i = row; i < rowFinal; i++)
blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
}
else
for (var i = row; i < rowFinal; i++)
{
//D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
CopyFloatArray(matrixIn + i * N + col, blockOut + (i - row) * bs, count);
}
}
public static unsafe void CopyBlockWithPadding(float[] blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
{
var rowFinal = Math.Min(row + bs, M);
var count = Math.Min(col + bs, N) - col;
for (var i = row; i < rowFinal; i++)
Marshal.Copy(blockOut, (i - row) * bs, (IntPtr)(matrixIn + i * N + col), count);
}
public static unsafe void CopyBlockWithPadding(float* blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
{
var rowFinal = Math.Min(row + bs, M);
var count = Math.Min(col + bs, N) - col;
for (var i = row; i < rowFinal; i++)
CopyFloatArray(blockOut + (i - row) * bs, matrixIn + i * N + col, count);
}
public static unsafe void MultiplyBlockUnrollHx8Padded(float* Ap,
float* Bp,
float* Cp, int bs)
{
for (int i = 0; i < bs; i++)
{
for (int j = 0; j < bs; j += 8)
{
int baseC = i * bs + j;
float sum0 = *(Cp + baseC);
float sum1 = *(Cp + baseC + 1);
float sum2 = *(Cp + baseC + 2);
float sum3 = *(Cp + baseC + 3);
float sum4 = *(Cp + baseC + 4);
float sum5 = *(Cp + baseC + 5);
float sum6 = *(Cp + baseC + 6);
float sum7 = *(Cp + baseC + 7);
for (int l = 0; l < bs; l++)
{
float A = Ap[i * bs + l];
int baseB = l * bs + j;
sum0 += A * *(Bp + baseB);
sum1 += A * *(Bp + baseB + 1);
sum2 += A * *(Bp + baseB + 2);
sum3 += A * *(Bp + baseB + 3);
sum4 += A * *(Bp + baseB + 4);
sum5 += A * *(Bp + baseB + 5);
sum6 += A * *(Bp + baseB + 6);
sum7 += A * *(Bp + baseB + 7);
}
*(Cp + baseC) = sum0;
*(Cp + baseC + 1) = sum1;
*(Cp + baseC + 2) = sum2;
*(Cp + baseC + 3) = sum3;
*(Cp + baseC + 4) = sum4;
*(Cp + baseC + 5) = sum5;
*(Cp + baseC + 6) = sum6;
*(Cp + baseC + 7) = sum7;
}
}
}
public static unsafe void MultiplyBlockUnrollHx8ParallelWithPadding(float* Ap, int AM, int AN,
float* Bp, int BM, int BN,
float* Cp, int CM, int CN, int bs,
bool transposeA = false, bool transposeB = false)
{
if (transposeA)
{
var tmp = AM; AM = AN; AN = tmp;
}
if (transposeB)
{
var tmp = BM; BM = BN; BN = tmp;
}
int N = AM;
{
Assert.IsTrue(bs >= 8, "Matrix Mul block size should be >= 8");
Parallel.For(0, (BN / bs) + (BN % bs > 0 ? 1 : 0), colB =>
{
float[] blockA = new float[bs * bs];
float[] blockB = new float[bs * bs];
float[] blockC = new float[bs * bs];
for (int rowA = 0; rowA < N; rowA += bs)
{
for (int l = 0; l < AN; l += bs)
{
CopyBlockWithPadding(Ap, rowA, AM, l, AN, blockA, bs, transposeA);
CopyBlockWithPadding(Bp, l, BM, colB * bs, BN, blockB, bs, transposeB);
CopyBlockWithPadding(Cp, rowA, CM, colB * bs, CN, blockC, bs);
fixed (float* blockAp = blockA, blockBp = blockB, blockCp = blockC)
{
MultiplyBlockUnrollHx8Padded(blockAp, blockBp, blockCp, bs);
}
CopyBlockWithPadding(blockC, Cp, rowA, CM, colB * bs, CN, bs);
}
}
});
}
}
}
}

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: bf04fe6d135714369af8cab2915b2735
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,985 +0,0 @@
#if ENABLE_BARRACUDA_STATS
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using UnityEngine.Assertions;
namespace Unity.Barracuda {
internal static class MemoryAndExecutionReportHelper
{
public static void GenerateStringReport(StringBuilder stringBuilder, ModelExecutionReport modelExecutionReport,
bool spreadSheetFormat)
{
stringBuilder.Append($"Number of completed layers : {modelExecutionReport.CompletedLayerExecutionReports.Count}\n");
if (modelExecutionReport.CurrentLayerExecutionReport != null)
stringBuilder.Append("Warning: last layer was not completed. It will be logged, but it's information might be incomplete or erroneous.\n");
stringBuilder.Append("\n");
List<LayerExecutionReport> allLayerReports = new List<LayerExecutionReport>();
allLayerReports.AddRange(modelExecutionReport.CompletedLayerExecutionReports);
if (modelExecutionReport.CurrentLayerExecutionReport != null)
allLayerReports.Add(modelExecutionReport.CurrentLayerExecutionReport);
var layerExecutionViews = GenerateExecutionViews(allLayerReports, modelExecutionReport.CompletedLayerExecutionReports.Count);
GenerateReportForViews(stringBuilder, layerExecutionViews, spreadSheetFormat, "", false);
}
public static MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, List<MemorySnapshotReport> memorySnapshots,
bool spreadSheetFormat)
{
CollectAllAsFirstSeen(in memorySnapshots,
out var allTensorAsFirstSeen,
out var allAllocatorAsFirstSeen,
out var allTensorDataAsFirstSeen,
out var allTempMemoriesAsFirstSeen);
var summaryViews = GenerateSummaryViews(memorySnapshots, allTensorAsFirstSeen, allTensorDataAsFirstSeen, allTempMemoriesAsFirstSeen, out var memoryPeakSummary);
GenerateHeaderForSummaryViews(stringBuilder, summaryViews, spreadSheetFormat);
GenerateReportForViews(stringBuilder, summaryViews, spreadSheetFormat, "Tensors allocation and deallocation (diff from previous snapshot):", isSummaryView:true);
stringBuilder.Append("\n");
stringBuilder.Append("\n");
var tensorViews = GenerateTensorsViews(memorySnapshots, allTensorAsFirstSeen);
GenerateHeaderForTensorViews(stringBuilder, tensorViews, spreadSheetFormat);
GenerateReportForViews(stringBuilder, tensorViews, spreadSheetFormat, "All Tensors:", isSummaryView:false);
stringBuilder.Append("\n");
stringBuilder.Append("\n");
var allocatorViews = GenerateAllocatorViews(memorySnapshots, allAllocatorAsFirstSeen);
GenerateHeaderForAllocatorsViews(stringBuilder, allocatorViews, spreadSheetFormat);
GenerateReportForViews(stringBuilder, allocatorViews, spreadSheetFormat, "All Allocators:", isSummaryView:false);
stringBuilder.Append("\n");
stringBuilder.Append("\n");
var tensorDatasViews = GenerateTensorDatasViews(memorySnapshots, allTensorDataAsFirstSeen);
GenerateHeaderForTensorDatasViews(stringBuilder, tensorDatasViews, spreadSheetFormat);
GenerateReportForViews(stringBuilder, tensorDatasViews, spreadSheetFormat, "All TensorDatas:", isSummaryView:false);
stringBuilder.Append("\n");
stringBuilder.Append("\n");
var tempMemoriesDatasViews = GenerateTempMemoriesDatasViews(memorySnapshots, allTempMemoriesAsFirstSeen);
GenerateHeaderForTempMemoriesViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat);
GenerateReportForViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat, "All worker temporary memories:", isSummaryView:false);
stringBuilder.Append("\n");
stringBuilder.Append("\n");
return memoryPeakSummary;
}
#region `Internal data format` declaration
private class SnapshotFields
{
public readonly string[] Titles;
public readonly Dictionary<string, string> Items;
public SnapshotFields(string[] titles)
{
Titles = titles;
Items = new Dictionary<string, string>();
foreach (var title in titles)
{
Items[title] = "";
}
}
public string this[string title]
{
set {
Assert.IsTrue(Items.ContainsKey(title));
Assert.IsTrue(Items[title] == "");
Items[title] = value;
}
get => Items[title];
}
public void AddTitlesToReport(StringBuilder stringBuilder, string separator)
{
foreach (var title in Titles)
{
stringBuilder.Append(title);
stringBuilder.Append(separator);
}
}
public void AddValuesToReport(StringBuilder stringBuilder, string separator)
{
foreach (var title in Titles)
{
stringBuilder.Append(Items[title]);
stringBuilder.Append(separator);
}
}
public void AddAllToReport(StringBuilder stringBuilder, string suffix, string prefix="")
{
bool first = true;
foreach (var title in Titles)
{
if (!first)
stringBuilder.Append(suffix);
stringBuilder.Append(prefix);
stringBuilder.Append(title);
stringBuilder.Append(": ");
stringBuilder.Append(Items[title]);
first = false;
}
}
}
private class SnapshotFieldsWithContexts
{
public readonly string[] FieldTitles;
public readonly string[] ContextTitles;
public SortedDictionary<int, SnapshotFields> Fields { get; }
public SortedDictionary<int, SnapshotFields> Contexts { get; }
public SnapshotFieldsWithContexts(string[] fieldsTitles, string[] contextTitles)
{
FieldTitles = fieldsTitles;
ContextTitles = contextTitles;
Contexts = new SortedDictionary<int, SnapshotFields>();
Fields = new SortedDictionary<int, SnapshotFields>();
}
public void AddContext(int uniqueId)
{
Assert.IsFalse(Contexts.ContainsKey(uniqueId));
Contexts[uniqueId] = new SnapshotFields(ContextTitles);
Fields[uniqueId] = new SnapshotFields(FieldTitles);
}
public void SetContext(int uniqueId, string title, string value)
{
Assert.IsTrue(Contexts.ContainsKey(uniqueId));
Contexts[uniqueId][title] = value;
}
public string this[int uniqueId, string title]
{
set
{
Assert.IsTrue(Fields.ContainsKey(uniqueId));
Fields[uniqueId][title] = value;
}
}
}
private class SnapshotView
{
public SnapshotFields context;
public SnapshotFields summary;
public SnapshotFieldsWithContexts sections;
public SnapshotView(int snapShotIndex, MemorySnapshotReport report)
{
context = new SnapshotFields( new [] {"Snapshot index", "Type", "Name"} );
context["Snapshot index"] = snapShotIndex.ToString();
context["Type"] = report.ContextType;
context["Name"] = report.ContextName;
}
public SnapshotView(int snapShotIndex, LayerExecutionReport report)
{
context = new SnapshotFields( new [] {"Layer index", "Type", "Name"} );
context["Layer index"] = snapShotIndex.ToString();
context["Type"] = report.LayerType;
context["Name"] = report.LayerName;
}
}
#endregion
#region Helpers to find information in Reports
private static TempMemoryInfo FindTempMemoryInSnapshot(MemorySnapshotReport memorySnapshot, int tempMemoryId)
{
return memorySnapshot.TempMemoriesInfo.Find(memoryInfo => memoryInfo.UniqueId == tempMemoryId);
}
private static AllocatorMemoryInfo FindAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int allocatorId)
{
return memorySnapshot.AllocatorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == allocatorId);
}
private static string FindTensorDataAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
{
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
{
var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
if (foundTensorData != null)
return $"{allocatorMemoryInfo.Name} / Id: {allocatorMemoryInfo.UniqueId}";
}
return "";
}
private static TensorDataMemoryInfo FindTensorDataInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
{
bool MatchTensorDataGuidForTensor(TensorMemoryInfo memoryInfo) =>
memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId;
var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(MatchTensorDataGuidForTensor);
if (foundTensor != null)
return foundTensor.tensorDataMemoryInfo;
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
{
var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
if (foundTensorData != null)
return foundTensorData;
}
return null;
}
private static IEnumerable<TensorMemoryInfo> FindAllTensorsInSnapshotUsingTensorDataId(MemorySnapshotReport memorySnapshot, int tensorDataId)
{
SortedSet<TensorMemoryInfo> tensors = new SortedSet<TensorMemoryInfo>( Comparer<TensorMemoryInfo>.Create((a, b) => a.UniqueId.CompareTo(b.UniqueId)));
var foundTensors = memorySnapshot.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
tensors.UnionWith(foundTensors);
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
{
var allocatorFoundTensor = allocatorMemoryInfo.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
tensors.UnionWith(allocatorFoundTensor);
}
return tensors;
}
private static TensorMemoryInfo FindTensorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorId)
{
var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
if (foundTensor != null)
return foundTensor;
foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
{
foundTensor = allocatorMemoryInfo.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
if (foundTensor != null)
return foundTensor;
}
return null;
}
private static void CollectAllAsFirstSeen(in List<MemorySnapshotReport> memorySnapshots,
out SortedDictionary<int,TensorMemoryInfo> tensors,
out SortedDictionary<int,AllocatorMemoryInfo> allocators,
out SortedDictionary<int,TensorDataMemoryInfo> tensorDatas,
out SortedDictionary<int,TempMemoryInfo> tempMemories)
{
tensors = new SortedDictionary<int, TensorMemoryInfo>();
allocators = new SortedDictionary<int, AllocatorMemoryInfo>();
tensorDatas = new SortedDictionary<int, TensorDataMemoryInfo>();
tempMemories = new SortedDictionary<int, TempMemoryInfo>();
//Collect all unique tensors, tensors and allocator
foreach (var snapshot in memorySnapshots)
{
//From Vars
foreach (var tensor in snapshot.TensorsMemoryInfo)
{
tensors[tensor.UniqueId] = tensor;
if (tensor.tensorDataMemoryInfo != null)
tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
}
//From allocators
foreach (var allocator in snapshot.AllocatorsMemoryInfo)
{
allocators[allocator.UniqueId] = allocator;
foreach (var tensor in allocator.TensorsMemoryInfo)
{
tensors[tensor.UniqueId] = tensor;
if (tensor.tensorDataMemoryInfo != null)
tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
}
foreach (var tensorData in allocator.TensorDatasMemoryInfo)
{
tensorDatas[tensorData.UniqueId] = tensorData;
}
}
//From temp memories
foreach (var tempMemoryInfo in snapshot.TempMemoriesInfo)
{
tempMemories[tempMemoryInfo.UniqueId] = tempMemoryInfo;
}
}
}
#endregion
#region Reports -> internal data format
private static List<SnapshotView> GenerateTempMemoriesDatasViews(List<MemorySnapshotReport> memorySnapshots,
SortedDictionary<int, TempMemoryInfo> allTempMemoryInfosAsFirstSeen)
{
List<SnapshotView> views = new List<SnapshotView>();
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
{
long allTotal = 0L;
var snapshot = memorySnapshots[memorySnapshotIndex];
//Titles and contexts
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
view.sections = new SnapshotFieldsWithContexts(
fieldsTitles: new[]
{
"Allocated (bytes)",
"On GPU"
},
contextTitles: new[] {"Name", "Id"});
foreach (var tempMemoryInfo in allTempMemoryInfosAsFirstSeen)
{
var id = tempMemoryInfo.Key;
view.sections.AddContext(id);
view.sections.SetContext(id, "Name", tempMemoryInfo.Value.Name);
view.sections.SetContext(id, "Id", id.ToString());
}
view.summary = new SnapshotFields(new[]
{
"Memory pressure in bytes (sum of all temp memory capacities)"
});
//Details
foreach (var alloc in allTempMemoryInfosAsFirstSeen)
{
var tempMemory = FindTempMemoryInSnapshot(snapshot, alloc.Key);
if (tempMemory != null)
{
allTotal += tempMemory.TotalBytes;
view.sections[tempMemory.UniqueId, "Allocated (bytes)"] = tempMemory.TotalBytes.ToString();
view.sections[tempMemory.UniqueId, "On GPU"] = tempMemory.IsGPUMem ? "GPU" : "CPU";
}
}
//Summary
view.summary["Memory pressure in bytes (sum of all temp memory capacities)"] = allTotal.ToString();
views.Add(view);
}
return views;
}
private static List<SnapshotView> GenerateAllocatorViews(List<MemorySnapshotReport> memorySnapshots,
SortedDictionary<int, AllocatorMemoryInfo> allAllocatorAsFirstSeen)
{
List<SnapshotView> views = new List<SnapshotView>();
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
{
long allTotal = 0L;
long allBusy = 0L;
long allUsed = 0L;
long allFragmented = 0L;
long allFree = 0L;
var snapshot = memorySnapshots[memorySnapshotIndex];
//Titles and contexts
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
view.sections = new SnapshotFieldsWithContexts(
fieldsTitles: new[]
{
"Memory pressure in bytes (sum of allocated tensorDatas capacities)",
"Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
"Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
"Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
"Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
},
contextTitles: new[] {"Name", "Id"});
foreach (var allocatorMemoryInfo in allAllocatorAsFirstSeen)
{
var id = allocatorMemoryInfo.Key;
view.sections.AddContext(id);
view.sections.SetContext(id, "Name", allocatorMemoryInfo.Value.Name);
view.sections.SetContext(id, "Id", id.ToString());
}
view.summary = new SnapshotFields(new[]
{
"Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)",
"Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
"Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
"Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
"Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
});
//Details
foreach (var alloc in allAllocatorAsFirstSeen)
{
var allocator = FindAllocatorInSnapshot(snapshot, alloc.Key);
if (allocator != null)
{
allTotal += allocator.TotalBytes;
allBusy += allocator.BusyBytes;
allUsed += allocator.UsedBytes;
allFragmented += allocator.BusyBytes-allocator.UsedBytes;
allFree += allocator.FreeBytes;
view.sections[allocator.UniqueId, "Memory pressure in bytes (sum of allocated tensorDatas capacities)"] = allocator.TotalBytes.ToString();
view.sections[allocator.UniqueId, "Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allocator.BusyBytes.ToString();
view.sections[allocator.UniqueId, "Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allocator.UsedBytes.ToString();
view.sections[allocator.UniqueId, "Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allocator.BytesLostToFragmentation.ToString();
view.sections[allocator.UniqueId, "Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allocator.FreeBytes.ToString();
}
}
//Summary
view.summary["Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)"] = allTotal.ToString();
view.summary["Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allBusy.ToString();
view.summary["Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allUsed.ToString();
view.summary["Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allFragmented.ToString();
view.summary["Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allFree.ToString();
views.Add(view);
}
return views;
}
private static List<SnapshotView> GenerateTensorDatasViews(List<MemorySnapshotReport> memorySnapshots,
SortedDictionary<int,TensorDataMemoryInfo> allTensorDataAsFirstSeen)
{
List<SnapshotView> views = new List<SnapshotView>();
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
{
long allGPUInBytes = 0L;
long allCPUInBytes = 0L;
long allUsedGPUInBytes = 0L;
long allUsedCPUInBytes = 0L;
long allFragmentedMemGPUInBytes = 0L;
long allFragmentedMemCPUInBytes = 0L;
var snapshot = memorySnapshots[memorySnapshotIndex];
//Titles and contexts
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
view.sections = new SnapshotFieldsWithContexts(
fieldsTitles: new[]
{
"In use", "Capacity (bytes)", "On GPU", "Allocator",
"Tensor(s) Id(s)", "Tensor(s) max bytes", "Fragmented bytes"
},
contextTitles: new[] {"Id"});
foreach (var tensorData in allTensorDataAsFirstSeen)
{
var id = tensorData.Key;
view.sections.AddContext(id);
view.sections.SetContext(id, "Id", id.ToString());
}
view.summary = new SnapshotFields(new[]
{
"GPU sum of all allocated tensorData capacities (bytes)",
"CPU sum of all allocated tensorData capacities (bytes)",
"GPU sum of all 'in use' tensorData (bytes)",
"CPU sum of all 'in use' tensorData (bytes)",
"GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
"CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
});
foreach (var tData in allTensorDataAsFirstSeen)
{
TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
if (tensorData != null)
{
var associatedTensors = FindAllTensorsInSnapshotUsingTensorDataId(snapshot, tensorData.UniqueId);
string tensorNamesandIds = "";
int tensorBytes = 0;
bool first = true;
foreach (var tensor in associatedTensors)
{
if (!first)
tensorNamesandIds += " / ";
tensorNamesandIds += tensor.Name + " Id:" + tensor.UniqueId;
first = false;
tensorBytes = Math.Max(tensorBytes, tensor.Shape.length * sizeof(float));
}
int fragmentedTensorDataBytes = (tensorData.InUse) ? tensorData.MaxBytes - tensorBytes : 0;
if (tensorData.IsGPUMem)
{
allGPUInBytes += tensorData.MaxBytes;
if (tensorData.InUse)
{
allFragmentedMemGPUInBytes += fragmentedTensorDataBytes;
allUsedGPUInBytes += tensorData.MaxBytes;
}
}
else
{
allCPUInBytes += tensorData.MaxBytes;
if (tensorData.InUse)
{
allFragmentedMemCPUInBytes += fragmentedTensorDataBytes;
allUsedCPUInBytes += tensorData.MaxBytes;
}
}
view.sections[tensorData.UniqueId, "In use"] = tensorData.InUse ? "Yes" : "";
view.sections[tensorData.UniqueId, "Capacity (bytes)"] = tensorData.MaxBytes.ToString();
view.sections[tensorData.UniqueId, "On GPU"] = tensorData.IsGPUMem ? "GPU" : "CPU";
view.sections[tensorData.UniqueId, "Allocator"] = FindTensorDataAllocatorInSnapshot(snapshot, tensorData.UniqueId);
view.sections[tensorData.UniqueId, "Tensor(s) Id(s)"] = tensorNamesandIds;
view.sections[tensorData.UniqueId, "Tensor(s) max bytes"] = tensorBytes.ToString();
view.sections[tensorData.UniqueId, "Fragmented bytes"] = fragmentedTensorDataBytes.ToString();
}
}
//Summary
view.summary["GPU sum of all allocated tensorData capacities (bytes)"] = allGPUInBytes.ToString();
view.summary["CPU sum of all allocated tensorData capacities (bytes)"] = allCPUInBytes.ToString();
view.summary["GPU sum of all 'in use' tensorData (bytes)"] = allUsedGPUInBytes.ToString();
view.summary["CPU sum of all 'in use' tensorData (bytes)"] = allUsedCPUInBytes.ToString();
view.summary["GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemGPUInBytes.ToString();
view.summary["CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemCPUInBytes.ToString();
views.Add(view);
}
return views;
}
private static List<SnapshotView> GenerateTensorsViews(List<MemorySnapshotReport> memorySnapshots,
SortedDictionary<int, TensorMemoryInfo> allTensorAsFirstSeen)
{
List<SnapshotView> views = new List<SnapshotView>();
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
{
var snapshot = memorySnapshots[memorySnapshotIndex];
//Titles and contexts
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
view.sections = new SnapshotFieldsWithContexts(
fieldsTitles: new[] {"Allocated (bytes)", "Name", "Shape", "Cache size (bytes)", "TensorData Id", "TensorData Capacity (bytes)"},
contextTitles: new[] {"Id"});
foreach (var tensorMemoryInfo in allTensorAsFirstSeen)
{
var id = tensorMemoryInfo.Key;
view.sections.AddContext(id);
view.sections.SetContext(id, "Id", id.ToString());
}
view.summary = new SnapshotFields(new[]
{
"Tensor memory on GPU (in bytes)",
"Tensor memory on CPU (in bytes)",
"On CPU tensor cache (in bytes)"
});
//Details
long cacheMemInBytes = 0L;
long gpuMem = 0L;
long cpuMem = 0L;
foreach (var tensorFromDict in allTensorAsFirstSeen)
{
var tensor = FindTensorInSnapshot(snapshot, tensorFromDict.Key);
if (tensor != null)
{
cacheMemInBytes += tensor.CacheBytes;
var dataBytes = tensor.Shape.length * sizeof(float);
string allocatedStr = "Yes";
if (tensor.tensorDataMemoryInfo != null)
{
allocatedStr += $" ({(tensor.Shape.length * sizeof(float)).ToString()})";
view.sections[tensor.UniqueId, "TensorData Id"] = tensor.tensorDataMemoryInfo.UniqueId.ToString();
view.sections[tensor.UniqueId, "TensorData Capacity (bytes)"] = tensor.tensorDataMemoryInfo.MaxBytes.ToString();
if (tensor.tensorDataMemoryInfo.IsGPUMem)
gpuMem += dataBytes;
else
cpuMem += dataBytes;
}
else
{
allocatedStr += " (0)";
}
view.sections[tensor.UniqueId, "Name"] = tensor.Name;
view.sections[tensor.UniqueId, "Shape"] = tensor.Shape.ToString();
view.sections[tensor.UniqueId, "Cache size (bytes)"] = tensor.CacheBytes.ToString();
view.sections[tensor.UniqueId, "Allocated (bytes)"] = allocatedStr;
}
}
//Summary
view.summary["Tensor memory on GPU (in bytes)"] = gpuMem.ToString();
view.summary["Tensor memory on CPU (in bytes)"] = cpuMem.ToString();
view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
views.Add(view);
}
return views;
}
private static List<SnapshotView> GenerateExecutionViews(List<LayerExecutionReport> layerReports, int numCompletedLayer)
{
List<SnapshotView> views = new List<SnapshotView>();
for (var layerIndex = 0; layerIndex < layerReports.Count; layerIndex++)
{
var report = layerReports[layerIndex];
//Titles
SnapshotView view = new SnapshotView(layerIndex, report);
view.sections = new SnapshotFieldsWithContexts(null, null);
view.summary = new SnapshotFields(new[]
{
"Summary",
"Compute Kernels(workItems:X,Y,Z)",
"Theoretical ALU count",
"Theoretical Bandwidth (bytes)",
"Note"
});
//Summary
view.summary["Summary"] = report.Summary==""?"NA":report.Summary;
view.summary["Compute Kernels(workItems:X,Y,Z)"] = report.DispatchInfos;
view.summary["Theoretical ALU count"] = report.NumAlu.ToString();
view.summary["Theoretical Bandwidth (bytes)"] = report.NumBytes.ToString();
if (layerIndex >= numCompletedLayer)
view.summary["Note"] = "UNCOMPLETED LAYER";
views.Add(view);
}
return views;
}
private static List<SnapshotView> GenerateSummaryViews(List<MemorySnapshotReport> memorySnapshots,
SortedDictionary<int, TensorMemoryInfo> allTensorsAsFirstSeen,
SortedDictionary<int, TensorDataMemoryInfo> allTensorDatasAsFirstSeen,
SortedDictionary<int, TempMemoryInfo> allTempMemoriesAsFirstSeen,
out MemoryPeakSummary memoryPeakSummary)
{
HashSet<int> previousSnapshotTensorIds = new HashSet<int>();
List<SnapshotView> views = new List<SnapshotView>();
long peakMemoryUsageGPU = 0;
long peakMemoryUsageCPU = 0;
long peakMemoryUsageGPUAndCPU = 0;
for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
{
var snapshot = memorySnapshots[memorySnapshotIndex];
//Titles and contexts
SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
view.sections = new SnapshotFieldsWithContexts(
fieldsTitles: new[] {"Allocated", "Released"},
contextTitles: new[] {"Type" });
view.sections.AddContext(0);
view.sections.SetContext(0, "Type", "Tensor");
view.summary = new SnapshotFields(new[]
{
"Total memory pressure on GPU (in bytes)",
"Total memory pressure on CPU (in bytes)",
"On CPU tensor cache (in bytes)"
});
//Summary
HashSet<int> currentSnapshotTensorIds = new HashSet<int>();
long cacheMemInBytes = 0L;
foreach (var tensor in snapshot.TensorsMemoryInfo)
{
cacheMemInBytes += tensor.CacheBytes;
currentSnapshotTensorIds.Add(tensor.UniqueId);
}
long gpuMem = 0L;
long cpuMem = 0L;
foreach (var tData in allTensorDatasAsFirstSeen)
{
TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
if (tensorData != null)
{
if (tensorData.IsGPUMem)
gpuMem += tensorData.MaxBytes;
else
cpuMem += tensorData.MaxBytes;
}
}
foreach (var mData in allTempMemoriesAsFirstSeen)
{
TempMemoryInfo tempMemoryInfo = FindTempMemoryInSnapshot(snapshot, mData.Key);
if (tempMemoryInfo != null)
{
if (tempMemoryInfo.IsGPUMem)
gpuMem += tempMemoryInfo.TotalBytes;
else
cpuMem += tempMemoryInfo.TotalBytes;
}
}
view.summary["Total memory pressure on GPU (in bytes)"] = gpuMem.ToString();
view.summary["Total memory pressure on CPU (in bytes)"] = cpuMem.ToString();
view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
peakMemoryUsageGPU = Math.Max(peakMemoryUsageGPU, gpuMem);
peakMemoryUsageCPU = Math.Max(peakMemoryUsageCPU, cpuMem);
peakMemoryUsageGPUAndCPU = Math.Max(peakMemoryUsageGPUAndCPU, gpuMem+cpuMem);
if (memorySnapshotIndex != 0)
{
//Tensor allocated and freed (diff from snapshot to snapshot)
var allocatedTensorsId = currentSnapshotTensorIds.Except(previousSnapshotTensorIds);
var releasedTensorsId = previousSnapshotTensorIds.Except(currentSnapshotTensorIds);
StringBuilder tensorDiff = new StringBuilder();
bool first = true;
foreach (var tensorId in allocatedTensorsId)
{
var tensor = FindTensorInSnapshot(snapshot, tensorId);
string tensorDataInfo = "none";
if (tensor.tensorDataMemoryInfo != null)
{
var data = tensor.tensorDataMemoryInfo;
var memType = data.IsGPUMem ? "GPU" : "CPU";
tensorDataInfo = $"id:{data.UniqueId} bytes:{data.MaxBytes} on:{memType}";
}
if (!first) tensorDiff.Append(" / ");
first = false;
tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId} tensorData:[{tensorDataInfo}]");
}
view.sections[0, "Allocated"] = tensorDiff.ToString();
tensorDiff.Clear();
first = true;
foreach (var tensorId in releasedTensorsId)
{
var tensor = allTensorsAsFirstSeen[tensorId];
if (!first) tensorDiff.Append(" / ");
first = false;
tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId}");
}
view.sections[0, "Released"] = tensorDiff.ToString();
}
views.Add(view);
previousSnapshotTensorIds = currentSnapshotTensorIds;
}
memoryPeakSummary = new MemoryPeakSummary(peakMemoryUsageGPU, peakMemoryUsageCPU, peakMemoryUsageGPUAndCPU);
return views;
}
#endregion
#region Internal data format -> text
private static void Append(this StringBuilder sb, string str, int repeatCount)
{
for (int i = 0; i < repeatCount; ++i)
sb.Append(str);
}
private static void Append(this StringBuilder sb, string str, string separator)
{
sb.Append(str);
sb.Append(separator);
}
private static void GenerateReportForViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string sectionTitle, bool isSummaryView)
{
if (spreadSheetFormat)
{
//Columns Titles
views[0].context.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
views[0].summary.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var tensorFields in views[0].sections.Fields)
{
tensorFields.Value.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
//All snapshots
foreach (var view in views)
{
view.context.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
view.summary.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var tensorFields in view.sections.Fields)
{
tensorFields.Value.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
}
}
else
{
string doubleIndentation = ModelExecutionsReporter.TextIndentation + ModelExecutionsReporter.TextIndentation;
foreach (var view in views)
{
view.context.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
stringBuilder.Append("\n");
view.summary.AddAllToReport(stringBuilder, suffix:"\n", prefix: ModelExecutionsReporter.TextIndentation);
stringBuilder.Append("\n"+ModelExecutionsReporter.TextIndentation + sectionTitle +"\n");
foreach (var context in view.sections.Contexts)
{
stringBuilder.Append(doubleIndentation);
if (isSummaryView)
{
view.sections.Fields[context.Key].AddAllToReport(stringBuilder, "\n"+doubleIndentation);
}
else
{
context.Value.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
stringBuilder.Append("\n"+doubleIndentation +"=> ");
view.sections.Fields[context.Key].AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
stringBuilder.Append("\n");
}
}
stringBuilder.Append("\n");
}
}
}
private static void GenerateHeaderForSummaryViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
{
if (views.Count == 0)
{
stringBuilder.Append("<******** Summary info ********> NONE!\n");
return;
}
if (!spreadSheetFormat)
{
stringBuilder.Append("<******** Summary info ********>\n");
return;
}
//Columns names
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
int sectionFieldCount = views[0].sections.FieldTitles.Length;
stringBuilder.Append("<******** Summary info ********>");
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var context in views[0].sections.Contexts)
{
stringBuilder.Append(context.Value["Type"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
}
private static void GenerateHeaderForTensorViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
{
GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "Tensors");
}
private static void GenerateHeaderForTensorDatasViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
{
GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "TensorDatas");
}
private static void GenerateHeaderForViewsByID(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string dataType)
{
if (views.Count == 0)
{
stringBuilder.Append($"<******** {dataType} info ********> NONE!\n");
return;
}
if (!spreadSheetFormat)
{
stringBuilder.Append($"<******** {dataType} info ********>\n");
return;
}
//Columns names
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
int sectionFieldCount = views[0].sections.FieldTitles.Length;
stringBuilder.Append($"<******** {dataType} info ********>");
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var context in views[0].sections.Contexts)
{
stringBuilder.Append("Id: ");
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
}
private static void GenerateHeaderForTempMemoriesViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
{
if (views.Count == 0)
{
stringBuilder.Append("<******** Worker temporary memories info ********> NONE!\n");
return;
}
if (!spreadSheetFormat)
{
stringBuilder.Append("<******** Worker temporary memories info ********>\n");
return;
}
//Columns names
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
int sectionFieldCount = views[0].sections.FieldTitles.Length;
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("Temp memories names and ids:");
stringBuilder.Append("\n");
stringBuilder.Append("<******** Worker temporary memories info ********>");
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var context in views[0].sections.Contexts)
{
stringBuilder.Append(context.Value["Name"], " / Id: ");
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
}
private static void GenerateHeaderForAllocatorsViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
{
if (views.Count == 0)
{
stringBuilder.Append("<******** Allocators info ********> NONE!\n");
return;
}
if (!spreadSheetFormat)
{
stringBuilder.Append("<******** Allocators info ********>\n");
return;
}
//Columns names
int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
int sectionFieldCount = views[0].sections.FieldTitles.Length;
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append("Allocators names and shapes:");
stringBuilder.Append("\n");
stringBuilder.Append("<******** Allocators info ********>");
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
foreach (var context in views[0].sections.Contexts)
{
stringBuilder.Append(context.Value["Name"], " / Id: ");
stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
}
stringBuilder.Append("\n");
}
#endregion
}
} // namespace Unity.Barracuda
#endif //ENABLE_BARRACUDA_STATS

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 5b125a79bdbfb1b41adba78ef255dd80
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,196 +0,0 @@
#if ENABLE_BARRACUDA_STATS
using System.Collections.Generic;
using System.Text;
namespace Unity.Barracuda {
public class TensorDataMemoryInfo
{
public int UniqueId { get; }
public int MaxBytes { get; }
public bool InUse { get; }
public bool IsGPUMem { get; }
internal TensorDataMemoryInfo(ITensorDataStatistics tensorDataStatistics)
{
UniqueId = tensorDataStatistics.uniqueId;
MaxBytes = tensorDataStatistics.maxCapacity * sizeof(float);
InUse = tensorDataStatistics.inUse;
IsGPUMem = tensorDataStatistics.isGPUMem;
}
public override string ToString()
{
return $"TensorData of maxBytes {MaxBytes}, inUse:{InUse}, onGPU:{IsGPUMem}, uniqueId:{UniqueId}";
}
}
public class TempMemoryInfo
{
public int UniqueId { get; }
public string Name { get; }
public long TotalBytes { get; }
public bool IsGPUMem { get; }
internal TempMemoryInfo(TempMemoryStatistics tempMemoryStatistics)
{
UniqueId = tempMemoryStatistics.uniqueId;
Name = tempMemoryStatistics.name;
TotalBytes = tempMemoryStatistics.size;
IsGPUMem = tempMemoryStatistics.isGPUMem;
}
public override string ToString()
{
return $"Temp memory '{Name}' of totalBytes {TotalBytes}";
}
}
public class AllocatorMemoryInfo
{
public int UniqueId { get; }
public string Name { get; }
public long UsedBytes { get; }
public long BusyBytes { get; }
public long FreeBytes { get; }
public long TotalBytes { get; }
public List<TensorDataMemoryInfo> TensorDatasMemoryInfo { get; }
public List<TensorMemoryInfo> TensorsMemoryInfo { get; }
public long BytesLostToFragmentation => BusyBytes - UsedBytes;
internal AllocatorMemoryInfo(IAllocatorStatistics allocatorStatistics)
{
UniqueId = allocatorStatistics.uniqueId;
Name = allocatorStatistics.name;
UsedBytes = allocatorStatistics.usedBytes;
BusyBytes = allocatorStatistics.busyBytes;
FreeBytes = allocatorStatistics.freeBytes;
TotalBytes = allocatorStatistics.totalBytes;
TensorDatasMemoryInfo = new List<TensorDataMemoryInfo>();
foreach (var tensorDataStatistics in allocatorStatistics.GetTensorDatasStatistics())
{
TensorDatasMemoryInfo.Add(new TensorDataMemoryInfo(tensorDataStatistics));
}
TensorsMemoryInfo = new List<TensorMemoryInfo>();
foreach (var tensorStatistics in allocatorStatistics.GetTensorsStatistics())
{
TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistics));
}
}
public override string ToString()
{
return $"Allocator '{Name}' of totalBytes {TotalBytes}, usedBytes:{UsedBytes}, lostToFragmentation:{BytesLostToFragmentation}, free:{FreeBytes}";
}
}
public class TensorMemoryInfo
{
public int UniqueId { get; }
public string Name { get; }
public TensorShape Shape { get; }
public int CacheBytes { get; }
public TensorDataMemoryInfo tensorDataMemoryInfo { get; }
internal TensorMemoryInfo(ITensorStatistics tensorStatistics)
{
UniqueId = tensorStatistics.uniqueId;
Name = tensorStatistics.name;
Shape = tensorStatistics.shape;
CacheBytes = tensorStatistics.cacheBytes;
var tensorDataStats = tensorStatistics.GetTensorDataStatistics();
if (tensorDataStats != null)
tensorDataMemoryInfo = new TensorDataMemoryInfo(tensorDataStats);
}
public override string ToString()
{
var tensorDataStr = (tensorDataMemoryInfo != null) ? tensorDataMemoryInfo.ToString() : "";
return $"Tensor: {Name} of shape {Shape.ToString()}, cacheBytes: {CacheBytes} (data: {tensorDataStr})";
}
}
public class MemorySnapshotReport
{
public string ContextType { get; }
public string ContextName { get; }
public List<TensorMemoryInfo> TensorsMemoryInfo { get; }
public List<AllocatorMemoryInfo> AllocatorsMemoryInfo { get; }
public List<TempMemoryInfo> TempMemoriesInfo { get; }
internal MemorySnapshotReport(IOps ops, IVarsStatistics vars, string context, Layer layer)
{
ContextType = context;
ContextName = "";
if (layer != null)
{
ContextType += ": " + layer.type + ((layer.type == Layer.Type.Activation) ? ("." + layer.activation) : "");
ContextName += layer.name;
}
TensorsMemoryInfo = new List<TensorMemoryInfo>();
AllocatorsMemoryInfo = new List<AllocatorMemoryInfo>();
TempMemoriesInfo = new List<TempMemoryInfo>();
foreach (var allocatorsStatistic in vars.GetAllocatorsStatistics())
{
AllocatorsMemoryInfo.Add(new AllocatorMemoryInfo(allocatorsStatistic));
}
foreach (var tensorStatistic in vars.GetTensorsStatistics())
{
TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistic));
}
foreach (var tempMemoryStatistic in ops.GetTempMemoryStatistics())
{
TempMemoriesInfo.Add(new TempMemoryInfo(tempMemoryStatistic));
}
}
}
public class MemorySnapshotsReport
{
public List<MemorySnapshotReport> MemorySnapshotsReports { get; private set; }
public MemorySnapshotsReport()
{
Reset();
}
public void Reset()
{
MemorySnapshotsReports = new List<MemorySnapshotReport>();
}
public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
{
var varsWithStatistics = vars as IVarsStatistics;
if (varsWithStatistics == null)
return;
MemorySnapshotsReports.Add(new MemorySnapshotReport(ops, varsWithStatistics, context, layer));
}
public MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, bool spreadSheetFormat)
{
stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - START ****************\n");
stringBuilder.Append($"Number of snapshots : {MemorySnapshotsReports.Count}\n\n");
var memoryPeakSummary = MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, MemorySnapshotsReports, spreadSheetFormat);
stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - STOP ****************\n");
return memoryPeakSummary;
}
public override string ToString()
{
var stringBuilder = new StringBuilder(10000);
GenerateStringReport(stringBuilder, spreadSheetFormat:false);
return stringBuilder.ToString();
}
}
} // namespace Unity.Barracuda
#endif //ENABLE_BARRACUDA_STATS

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 0e26059fb46b5a345a0a59a9fe3eafae
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,922 +0,0 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using UnityEngine;
using UnityEngine.Assertions;
using UnityEngine.Profiling;
[assembly: InternalsVisibleTo("Unity.Barracuda.ONNX")]
[assembly: InternalsVisibleTo("Unity.Barracuda.Editor")]
namespace Unity.Barracuda {
internal class ModelAnalyzer
{
public static string GetDefaultInputName(Model model)
{
bool modelHasOnlyOneInput = model.inputs.Count == 1;
if (modelHasOnlyOneInput)
return model.inputs[0].name;
var memories = new HashSet<string>();
foreach (var m in model.memories)
memories.Add(m.input);
// find the first unconnected input as a default model input
var previousLayerNames = new HashSet<string>();
foreach (var l in model.layers)
{
previousLayerNames.Add(l.name);
bool layerDoesNotNeedInput = (l.type == Layer.Type.Load);
if (layerDoesNotNeedInput)
continue;
foreach (var inputName in l.inputs)
{
bool inputIsUnconnected = !previousLayerNames.Contains(inputName);
bool inputIsNotPartOfMemory = !memories.Contains(inputName);
if (inputIsUnconnected && inputIsNotPartOfMemory)
return inputName;
}
}
return "";
}
static public string GetDefaultOutputName(Model model)
{
if (model.outputs.Count == 1)
return model.outputs[0];
if (model.layers.Count > 0)
{
var lastLayer = model.layers[model.layers.Count - 1];
return lastLayer.name;
}
return "";
}
public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes)
{
IDictionary<string, TensorShape?> shapesByName;
return ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
}
public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes,
out IDictionary<string, TensorShape?> shapesByName)
{
Profiler.BeginSample ("Barracuda.ListTemporaryTensorShapes");
var shapes = new List<TensorShape?>();
shapesByName = new Dictionary<string, TensorShape?>();
foreach (var entry in inputShapes)
shapesByName.Add(entry.Key, entry.Value);
TensorShape? Xn;
shapesByName.TryGetValue(GetDefaultInputName(model), out Xn); // default input
TensorShape? O = Xn;
foreach (var l in model.layers)
{
if (l.inputs.Length > 0 && shapesByName.TryGetValue(l.inputs[0], out TensorShape? xShape))
Xn = xShape;
else
Xn = O; // previous output is used, if-and-only-if layer has no explicit inputs
if (Xn == null)
{
shapes.Add(Xn);
shapesByName.Add(l.name, Xn);
continue;
}
TensorShape X = Xn.Value;
if (l.type == Layer.Type.Dense)
{
Assert.IsNotNull(l.datasets);
var W = l.datasets[0].shape;
O = new TensorShape(X.flatHeight, W.flatWidth);
}
else if (l.type == Layer.Type.Dense3)
{
Assert.IsNotNull(l.datasets);
var W = l.datasets[0].shape;
O = new TensorShape(X.batch, 1, W.channels, X.channels);
}
else if (l.type == Layer.Type.MatMul)
{
if (!shapesByName.ContainsKey(l.inputs[1]) || shapesByName[l.inputs[1]] == null)
{
O = null;
break;
}
var Y = shapesByName[l.inputs[1]].Value;
int rankX;
int rankY;
List<int> onnxXshape;
List<int> onnxYshape;
if (l.pool == null || l.pool.Length == 0)
{
LegacyGetXYRanks(X, Y, out rankX, out rankY);
}
else
{
rankX = l.pool[0];
rankY = l.pool[1];
}
onnxXshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(X, rankX);
onnxYshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(Y, rankY);
int rankO = Math.Max(rankX, rankY);
// pad 1 on front of shape to both be rankO shape
for (int i = 0; i < (rankX - rankY); i++)
onnxYshape.Insert(0, 1);
for (int i = 0; i < (rankY - rankX); i++)
onnxXshape.Insert(0, 1);
if (rankO == 2)
O = new TensorShape(onnxXshape[0], 1, 1, onnxYshape[1]);
else if (rankO == 3)
O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), 1, onnxYshape[2], onnxXshape[1]);
else
O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), onnxXshape[2], onnxYshape[3], Math.Max(onnxXshape[1], onnxYshape[1]));
}
else if (
l.type == Layer.Type.Conv2D ||
l.type == Layer.Type.Conv3D ||
l.type == Layer.Type.DepthwiseConv2D)
{
var K = l.datasets[0].shape;
Assert.IsNotNull(l.stride);
Assert.IsNotNull(l.pad);
var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
O = X.ApplyKernel(K, l.stride, pad);
}
else if (
l.type == Layer.Type.Conv2DTrans)
{
var K = l.datasets[0].shape;
Assert.IsNotNull(l.stride);
Assert.IsNotNull(l.pad);
// pool size is treated as output_adjustment aka output_padding here
var outputAdjustment = l.pool;
var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
O = X.ApplyKernelInverse(K, l.stride, pad, outputAdjustment);
}
else if (
l.type == Layer.Type.Upsample2D)
{
if(l.pool.Length != 2)
{
O = null;
}
else
{
// pool size is treated as upsample coefficient here
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 2);
O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels);
}
}
else if (
l.type == Layer.Type.Upsample3D)
{
if(l.pool.Length != 2)
{
O = null;
}
else
{
// pool size is treated as upsample coefficient here
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 3);
O = new TensorShape(1,1,X.batch, 1, X.depth * l.pool[2], X.height * l.pool[1], X.width * l.pool[0], X.channels);
}
}
else if (
l.type == Layer.Type.Resample2D)
{
if(l.pool.Length != 2)
{
O = null;
}
else
{
// pool is treated as resample size here
var size = l.pool;
Assert.IsNotNull(size);
Assert.AreEqual(size.Length, 2);
O = new TensorShape(X.batch, size[1], size[0], X.channels);
}
}
else if (
l.type == Layer.Type.DepthToSpace)
{
// pool size is treated as blocksize here
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 2);
Assert.AreEqual(X.channels % (l.pool[0] * l.pool[1]), 0);
O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels / (l.pool[0] * l.pool[1]));
}
else if (
l.type == Layer.Type.SpaceToDepth)
{
// pool size is treated as blocksize here
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 2);
O = new TensorShape(X.batch, X.height / l.pool[1], X.width / l.pool[0], X.channels * (l.pool[0] * l.pool[1]));
}
else if (
l.type == Layer.Type.MaxPool2D ||
l.type == Layer.Type.AvgPool2D)
{
Assert.IsNotNull(l.pool);
Assert.IsNotNull(l.stride);
Assert.IsNotNull(l.pad);
var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
O = X.ApplyPool(l.pool, l.stride, pad);
}
else if (
l.type == Layer.Type.GlobalMaxPool2D ||
l.type == Layer.Type.GlobalAvgPool2D)
{
O = new TensorShape(X.batch, 1, 1, X.channels);
}
else if (l.type == Layer.Type.Border3D)
{
Assert.IsNotNull(l.pad);
// legacy support
if (l.pad.Length == 6)
X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], l.pad[2], 0, l.pad[3], l.pad[4], l.pad[5], 0 });
else
O = X.ApplyBorder(l.pad);
}
else if (
l.type == Layer.Type.Border2D ||
l.type == Layer.Type.Pad2DReflect ||
l.type == Layer.Type.Pad2DSymmetric ||
l.type == Layer.Type.Pad2DEdge)
{
Assert.IsNotNull(l.pad);
// legacy support
if (l.pad.Length == 4)
X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], 0, l.pad[2], l.pad[3], 0 });
else
O = X.ApplyBorder(l.pad);
}
else if (
l.type == Layer.Type.Conv3D ||
l.type == Layer.Type.Conv3DTrans ||
l.type == Layer.Type.Upsample3D ||
l.type == Layer.Type.MaxPool3D ||
l.type == Layer.Type.AvgPool3D ||
l.type == Layer.Type.GlobalMaxPool3D ||
l.type == Layer.Type.GlobalAvgPool3D ||
l.type == Layer.Type.Border3D)
{
throw new NotImplementedException();
}
else if (
l.type == Layer.Type.RandomNormal ||
l.type == Layer.Type.RandomUniform)
{
Assert.IsNotNull(l.pool);
// pool size is treated as shape constant, if not empty
// otherwise shape of the previous tensor is used
if (l.pool.Length > 0)
O = new TensorShape(l.pool);
else
O = X;
}
else if (l.type == Layer.Type.ConstantOfShape)
{
if(l.axis != 1)
O = null;
else
O = X;
}
else if (
l.type == Layer.Type.Multinomial)
{
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 1);
O = new TensorShape(X.batch, l.pool[0]);
}
else if (
l.type == Layer.Type.OneHot)
{
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 1);
int depth = l.pool[0];
int inputRank = l.axis;
inputRank = inputRank < 0 ? X.dimensions : inputRank;
if (inputRank == 1)
O = new TensorShape(X.flatHeight, depth);
else if (inputRank == 2)
O = new TensorShape(X.flatHeight, 1, depth, X.flatWidth);
else
O = new TensorShape(X.batch, X.height, depth, X.channels);
}
else if (l.type == Layer.Type.RoiAlign)
{
Assert.IsNotNull(l.pool);
Assert.AreEqual(l.pool.Length, 2);
if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape) && shape != null)
{
int batches = shape.Value.flatHeight;
O = new TensorShape(batches, l.pool[0], l.pool[1], X.channels);
}
else
O = null;
}
else if (
l.type == Layer.Type.Add ||
l.type == Layer.Type.Sub ||
l.type == Layer.Type.Mul ||
l.type == Layer.Type.Div ||
l.type == Layer.Type.Pow ||
l.type == Layer.Type.Min ||
l.type == Layer.Type.Max ||
l.type == Layer.Type.Mean||
l.type == Layer.Type.Greater ||
l.type == Layer.Type.GreaterEqual ||
l.type == Layer.Type.Less ||
l.type == Layer.Type.LessEqual ||
l.type == Layer.Type.Equal ||
l.type == Layer.Type.LogicalOr ||
l.type == Layer.Type.LogicalAnd ||
l.type == Layer.Type.LogicalXor ||
l.type == Layer.Type.Where)
{
// gather shapes by names
var list = new List<TensorShape>(l.inputs.Length);
bool allShapesKnown = true;
foreach (var i in l.inputs)
{
if (shapesByName.TryGetValue(i, out TensorShape? shape) && shape != null)
list.Add(shape.Value);
else
allShapesKnown = false;
}
O = allShapesKnown ? TensorExtensions.Max(list.ToArray()) : default(TensorShape?);
}
else if (
l.type == Layer.Type.ReduceL1 ||
l.type == Layer.Type.ReduceL2 ||
l.type == Layer.Type.ReduceLogSum ||
l.type == Layer.Type.ReduceLogSumExp ||
l.type == Layer.Type.ReduceMax ||
l.type == Layer.Type.ReduceMean ||
l.type == Layer.Type.ReduceMin ||
l.type == Layer.Type.ReduceProd ||
l.type == Layer.Type.ReduceSum ||
l.type == Layer.Type.ReduceSumSquare ||
l.type == Layer.Type.ArgMax ||
l.type == Layer.Type.ArgMin)
{
O = X.Reduce(l.axis);
}
else if (
l.type == Layer.Type.Flatten)
{
O = X.Flatten();
}
else if (
l.type == Layer.Type.Reshape)
{
// pool size is treated as the shape, if not empty
var size = l.pool;
Assert.IsNotNull(size);
if (size.Length == 0 && l.inputs.Length > 1)
{
switch (l.axis)
{
// Legacy - use the shape of the input tensor as the shape
case -1:
if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape))
size = shape.Value.ToArray();
break;
// Use the tensor values as the shape; Calculated at runtime
case 1:
O = null;
break;
}
if (O == null)
break;
}
Assert.IsTrue( (size.Length == 4) || (size.Length == 8));
O = X.Reshape(size);
}
else if (
l.type == Layer.Type.Expand)
{
// pool size is treated as new shape
var newShape = l.pool;
Assert.IsNotNull(newShape);
Assert.IsTrue(newShape.Length == 8 || newShape.Length == 4);
O = new TensorShape(newShape);
}
else if (
l.type == Layer.Type.Transpose)
{
var permutations = l.pool;
if (permutations == null)
O = new TensorShape(X.flatWidth, X.flatHeight);
else
{
Assert.IsTrue(permutations.Length == 8 || permutations.Length == 4);
O = X.Permute(permutations);
}
}
else if (
l.type == Layer.Type.Gather)
{
if (!shapesByName.TryGetValue(l.inputs[0], out TensorShape? input0Shape) || input0Shape == null
|| !shapesByName.TryGetValue(l.inputs[1], out TensorShape? input1Shape) || input1Shape == null)
{
O = null;
break;
}
int[] shape = input0Shape.Value.ToArray();
shape[l.axis] = input1Shape.Value.length;
O = new TensorShape(shape);
if (l.pool != null && l.pool.Length == 2 && l.pool[1] > 1)
{
int xRank = l.pool[0];
int indicesRank = l.pool[1];
var oShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(O.Value, xRank);
var indicesShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(input1Shape.Value, indicesRank);
int axis = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaAxisToTensor(l.axis, xRank);
oShape.InsertRange(axis, indicesShape);
oShape.RemoveAt(axis + indicesShape.Count);
O = (O.Value).Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaLayoutToTensorShapeLayout(oShape.ToArray()));
// rank 2 -> 3
if (xRank == 2 && oShape.Count == 3)
O = (O.Value).Permute(new int[] { 0, 1, 3, 2 });
}
}
else if (l.type == Layer.Type.ScatterND)
{
O = X;
}
else if (
l.type == Layer.Type.Squeeze ||
l.type == Layer.Type.Unsqueeze)
{
O = X;
}
else if (
l.type == Layer.Type.Concat)
{
// gather shapes by names
var list = new List<TensorShape>(l.inputs.Length);
bool allShapesKnown = true;
foreach (var i in l.inputs)
{
if (!shapesByName.TryGetValue(i, out var shape) || shape == null)
{
allShapesKnown = false;
continue;
}
list.Add(shape.Value);
}
O = allShapesKnown ? TensorExtensions.Concat(list.ToArray(), l.axis) : default(TensorShape?);
}
else if (
l.type == Layer.Type.StridedSlice)
{
Assert.IsNotNull(l.pad);
Assert.IsNotNull(l.pool);
Assert.IsNotNull(l.stride);
O = X.ApplyStridedSlice(l.pad, l.pool, l.stride);
}
else if (
l.type == Layer.Type.Tile)
{
// pool size is treated as tiling coefficient here
Assert.IsNotNull(l.pool);
var scale = l.pool;
O = X.Scale(scale);
}
else if (
l.type == Layer.Type.Load)
{
O = l.datasets[0].shape;
}
else if (// elementwise operations
l.type == Layer.Type.Nop ||
l.type == Layer.Type.Activation ||
l.type == Layer.Type.ScaleBias ||
l.type == Layer.Type.Normalization ||
l.type == Layer.Type.LRN ||
l.type == Layer.Type.Dropout ||
l.type == Layer.Type.LogicalNot ||
l.type == Layer.Type.Sign)
{
// works in place, keeps the same shape size
O = X;
}
else if (
l.type == Layer.Type.TopKIndices ||
l.type == Layer.Type.TopKValues ||
l.type == Layer.Type.NonMaxSuppression ||
l.type == Layer.Type.LSTM ||
l.type == Layer.Type.NonZero)
{
// Calculated at runtime
O = null;
}
else if (l.type == Layer.Type.Shape)
{
int shapeRank = l.axis > 0 ? 1 : X.length;
O = new TensorShape(shapeRank, 1, 1, 1);
}
else if (
l.type == Layer.Type.Conv3D ||
l.type == Layer.Type.Conv3DTrans ||
l.type == Layer.Type.Upsample3D ||
l.type == Layer.Type.MaxPool3D ||
l.type == Layer.Type.AvgPool3D ||
l.type == Layer.Type.GlobalMaxPool3D ||
l.type == Layer.Type.GlobalAvgPool3D ||
l.type == Layer.Type.Border3D)
{
throw new NotImplementedException("3D operations are not implemented yet!");
}
else
{
throw new NotImplementedException($"Layer type {l.type} needs to be explicitly handled");
}
shapes.Add(O);
shapesByName.Add(l.name, O);
}
Profiler.EndSample();
return shapes.ToArray();
}
// TODO: Remove when the legacy importer / code path is no longer needed (i.e. when pool is always set)
public static void LegacyGetXYRanks(TensorShape X, TensorShape Y, out int rankX, out int rankY)
{
// ONNX rank 2 : N,C => N,1,1,C
// rank 3 : one must be N C W, (batches = N) => N, 1, W, C
// rank 4 : one must be N C H W, (batches = N * C) => N H W C
// X and Y can be different ranks
var onnxXshape = new List<int> { X.batch, X.channels, X.height, X.width };
if (X.height == 1) onnxXshape = new List<int> { X.batch, X.channels, X.width, 1 };
var onnxYshape = new List<int> { Y.batch, Y.channels, Y.height, Y.width };
if (Y.height == 1) onnxYshape = new List<int> { Y.batch, Y.channels, Y.width, 1 };
rankX = 0;
for (int i = 3; i >= 0; i--)
{
if (onnxXshape[i] != 1)
{
rankX = i + 1;
break;
}
}
rankY = 0;
for (int i = 3; i >= 0; i--)
{
if (onnxYshape[i] != 1)
{
rankY = i + 1;
break;
}
}
}
public static bool TryGetOutputTensorShape(Model model, IDictionary<string, TensorShape> inputShapes, string output, out TensorShape shape)
{
shape = new TensorShape();
IDictionary<string, TensorShape?> shapesByName;
ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
TensorShape? dynamicShape;
bool found = shapesByName.TryGetValue(output, out dynamicShape) && dynamicShape != null;
if (found)
shape = dynamicShape.Value;
return found;
}
public static bool TryGetOutputTensorShape(Model model, string output, out TensorShape shape)
{
var inputShapes = new Dictionary<string, TensorShape>();
foreach (var i in model.inputs)
inputShapes.Add(i.name, new TensorShape(i.shape));
return TryGetOutputTensorShape(model, inputShapes, output, out shape);
}
public static bool FindLayerByName(Model model, string name, out Layer layer)
{
layer = new Layer("",Layer.Type.Nop);
foreach (var l in model.layers)
{
if (l.name == name)
{
layer = l;
return true;
}
}
return false;
}
public static HashSet<Layer> FindLayersThatRequireStorage(Model model)
{
var allInputsExceptFromPreviousLayer = new HashSet<string>();
Layer prevLayer = null;
foreach (var layer in model.layers)
{
foreach (var input in layer.inputs)
if (prevLayer != null && input != prevLayer.name)
allInputsExceptFromPreviousLayer.Add(input);
prevLayer = layer;
}
var allOutputs = new HashSet<string>();
foreach (var output in model.outputs)
allOutputs.Add(output);
foreach (var memory in model.memories)
allOutputs.Add(memory.output);
allOutputs.Add(GetDefaultOutputName(model));
var requireStorage = new HashSet<Layer>();
foreach (var layer in model.layers)
{
// loading constant tensor requires storage
if (layer.type == Layer.Type.Load)
requireStorage.Add(layer);
// @TBD: implement safety check that ensures Nop never has input
// otherwise it has to be treated as Load operation
if (layer.type == Layer.Type.Nop)
requireStorage.Add(layer);
if (allInputsExceptFromPreviousLayer.Contains(layer.name) ||
allOutputs.Contains(layer.name))
requireStorage.Add(layer);
}
return requireStorage;
}
public static HashSet<Layer> FindUpstreamLayers(Model model, string[] outputs)
{
// TODO: replace with var layersByName = model.layers.ToDictionary(i => i.name, i => i);
var layersByName = new Dictionary<string, Layer>();
foreach (var l in model.layers)
layersByName.Add(l.name, l);
var connected = new HashSet<Layer>();
var layersToVisit = new HashSet<Layer>();
foreach (var o in outputs)
if (layersByName.ContainsKey(o))
{
layersToVisit.Add(layersByName[o]);
connected.Add(layersByName[o]);
}
while (layersToVisit.Count > 0)
{
var visitNext = new HashSet<Layer>();
foreach (var l in layersToVisit)
foreach (var i in l.inputs)
if (layersByName.ContainsKey(i))
{
visitNext.Add(layersByName[i]);
connected.Add(layersByName[i]);
}
layersToVisit = visitNext;
}
return connected;
}
public static TensorShape FindLargestNecessaryTensorShape(Model model, IDictionary<string, TensorShape> inputShapes)
{
Profiler.BeginSample ("Barracuda.FindLargestNecessaryTensorShape");
var shapes = ListTemporaryTensorShapes(model, inputShapes);
var maxTensorShape = new TensorShape(1,1,1,1);
foreach (var X in shapes)
if (X?.length > maxTensorShape.length)
maxTensorShape = X.Value;
Profiler.EndSample ();
return maxTensorShape;
}
public static TensorShape FindLargestArgumentTensorShape(Model model)
{
TensorShape maxTensorShape = new TensorShape(1,1,1,1);
foreach (var layer in model.layers)
foreach (var arg in layer.datasets)
if (arg.shape.length > maxTensorShape.length)
maxTensorShape = arg.shape;
return maxTensorShape;
}
public static string[] FindUnusedLayers(Model model)
{
var layerUsageByName = model.layers.ToDictionary(i => i.name, i => false);
foreach (var layer in model.layers)
{
if (layer.flags.HasFlag(Layer.Flags.Preserve))
layerUsageByName[layer.name] = true;
foreach (var i in layer.inputs)
{
layerUsageByName[i] = true;
}
}
foreach (var o in model.outputs)
{
layerUsageByName[o] = true;
}
foreach (var mem in model.memories)
{
layerUsageByName[mem.output] = true;
}
return layerUsageByName.Where(keyValue => !keyValue.Value).Select(keyValue => keyValue.Key).ToArray();
}
private static string[] FindBrokenLinks(Model model, HashSet<string> links)
{
var allVariables = new HashSet<string>(model.layers.Select(i => i.name));
var globalInputs = new HashSet<string>(model.inputs.Select(i => i.name));
var memoryInputs = new HashSet<string>(model.memories.Select(i => i.input));
allVariables.UnionWith(globalInputs);
allVariables.UnionWith(memoryInputs);
var brokenLinks = links;
brokenLinks.ExceptWith(allVariables);
return brokenLinks.ToArray();
}
private static string[] FindBrokenLinks(Model model, string[] links)
{
return FindBrokenLinks(model, new HashSet<string>(links));
}
public static string[] FindBrokenLinks(Model model)
{
// check global outputs
var linksToInspect = new HashSet<string>(model.outputs);
// and all layers
foreach (var layer in model.layers)
foreach (var i in layer.inputs)
linksToInspect.Add(i);
return FindBrokenLinks(model, linksToInspect);
}
public static string[] FindUnconnectedInputs(Model model)
{
var unconnected = model.inputs.ToDictionary(i => i.name, i => true);
// check global outputs
foreach (var o in model.outputs)
unconnected.Remove(o);
// and all layers
foreach (var layer in model.layers)
foreach (var i in layer.inputs)
unconnected.Remove(i);
return unconnected.Keys.ToArray();
}
public static string[] FindLayerOutputs(Model model, string layerName)
{
var allVariables = model.layers.Where(x => x.inputs.Contains(layerName)).Select(x => x.name);
var globalOutputs = model.outputs.Where(x => x == layerName); ;
allVariables.Union(globalOutputs);
return allVariables.ToArray();
}
static public string[] FindUnconnectedOutputs(Model model)
{
return FindBrokenLinks(model, model.outputs.ToArray());
}
public static bool IsLayerBroacastable(Layer layer)
{
return layer.type == Layer.Type.Add ||
layer.type == Layer.Type.Sub ||
layer.type == Layer.Type.Mul ||
layer.type == Layer.Type.Div ||
layer.type == Layer.Type.Pow ||
layer.type == Layer.Type.Min ||
layer.type == Layer.Type.Max ||
layer.type == Layer.Type.Mean ||
layer.type == Layer.Type.Greater ||
layer.type == Layer.Type.GreaterEqual ||
layer.type == Layer.Type.Less ||
layer.type == Layer.Type.LessEqual ||
layer.type == Layer.Type.Equal ||
layer.type == Layer.Type.LogicalOr ||
layer.type == Layer.Type.LogicalAnd ||
layer.type == Layer.Type.LogicalXor ||
layer.type == Layer.Type.Where ||
layer.type == Layer.Type.Concat;
}
public static bool IsLayerBroadcastSkippable(Layer layer)
{
if(layer.type == Layer.Type.ConstantOfShape)
{
// dynamic shape support
if (layer.axis != 1)
return true;
else
return false;
}
return false;
}
// Allow some unknown input dimension for shape inference pass
// for now batch does not yield problematic shape inference, so allow for unkown batch
public static bool IsInputShapeAcceptablyKnowForShapeInference(Model.Input input) // acceptable unknown shape : N
{
for (int i = 0; i < input.shape.Length; i++)
{
var x = input.shape[i];
if (x <= 0 && i != TensorShape.DataBatch)
return false;
}
return true;
}
public static bool DoesTransposeChangeTensorLayout(TensorShape shape, int[] permutations)
{
var activeDimLayout = new List<int>();
for (int i = 0; i < 8; i++)
{
if (shape[i] != 1)
activeDimLayout.Add(i);
}
if (permutations.Length == 4)
permutations = TensorExtensions.Get8DPermutationsForNHWCPermutationsAndShape(shape, permutations);
var transposedLayout = TensorExtensions.Permute(new[] { 0, 1, 2, 3, 4, 5, 6, 7 }, permutations);
var permutedShape = shape.Permute(permutations);
var premutedActiveDimLayout = new List<int>();
for (int i = 0; i < 8; i++)
{
if (permutedShape[i] != 1)
premutedActiveDimLayout.Add(transposedLayout[i]);
}
return activeDimLayout.SequenceEqual(premutedActiveDimLayout);
}
}
} // namespace Unity.Barracuda

View File

@@ -1,11 +0,0 @@
fileFormatVersion: 2
guid: 58838262534854657974303d5782ea38
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -1,253 +0,0 @@
#if ENABLE_BARRACUDA_STATS
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Assertions;
namespace Unity.Barracuda {
public readonly struct DispatchInfo
{
public readonly string backend;
public readonly string kernel;
public readonly int workItemsX;
public readonly int workItemsY;
public readonly int workItemsZ;
public DispatchInfo(string backend, string kernel, int workItemsX, int workItemsY, int workItemsZ)
{
this.backend = backend;
this.kernel = kernel;
this.workItemsX = workItemsX;
this.workItemsY = workItemsY;
this.workItemsZ = workItemsZ;
}
public override string ToString()
{
return $"{backend}:{kernel}({workItemsX},{workItemsY},{workItemsZ})";
}
internal static DispatchInfo CreateFromComputeFunc(ComputeFunc computeFunc, int x, int y, int z)
{
var backend = computeFunc.computeShaderContext==ComputeShaderContext.Reference?"REF":"OPT";
return new DispatchInfo(backend, computeFunc.kernelName, x, y, z);
}
}
public class LayerExecutionReport
{
public string LayerType { get; }
public string LayerName { get; }
public string DispatchInfos { get; private set; }
public string Summary { get; private set; }
public long NumAlu { get; private set; }
public long NumBytes { get; private set; }
internal LayerExecutionReport(Layer l)
{
LayerType = l.type + ((l.type == Layer.Type.Activation) ? ("." + l.activation) : "");
LayerName = l.name;
Summary = "";
DispatchInfos = "";
NumAlu = 0;
NumBytes = 0;
}
internal void SetSummary(string message)
{
Summary = message;
}
internal void SetALUAndMemStats(long alu, long bytes)
{
NumAlu = alu;
NumBytes = bytes;
}
internal void AddDispatch(DispatchInfo dispatchInfo)
{
if (DispatchInfos.Length != 0)
DispatchInfos = DispatchInfos + " / ";
DispatchInfos = DispatchInfos + dispatchInfo;
}
}
public class ModelExecutionReport
{
public List<LayerExecutionReport> CompletedLayerExecutionReports { get; }
public LayerExecutionReport CurrentLayerExecutionReport { get; private set; }
internal ModelExecutionReport()
{
CompletedLayerExecutionReports = new List<LayerExecutionReport>();
CurrentLayerExecutionReport = null;
}
internal void LayerExecutionStarted(Layer layer)
{
Assert.IsNull(CurrentLayerExecutionReport);
CurrentLayerExecutionReport = new LayerExecutionReport(layer);
}
internal void LayerExecutionCompleted()
{
CompletedLayerExecutionReports.Add(CurrentLayerExecutionReport);
CurrentLayerExecutionReport = null;
}
internal void SetLayerSummary(string message)
{
Assert.IsNotNull(CurrentLayerExecutionReport);
CurrentLayerExecutionReport.SetSummary(message);
}
internal void SetLayerALUAndMemStats(long alu, long bytes)
{
Assert.IsNotNull(CurrentLayerExecutionReport);
CurrentLayerExecutionReport.SetALUAndMemStats(alu, bytes);
}
internal void AddLayerDispatch(DispatchInfo dispatchInfo)
{
Assert.IsNotNull(CurrentLayerExecutionReport);
CurrentLayerExecutionReport.AddDispatch(dispatchInfo);
}
}
public class ModelExecutionsReporter : IModelExecutionsReporter
{
//Tabs separator make importing into spreadsheet software easy.
public static readonly string SpreadSheetFieldSeparator = "\t";
public static readonly string TextFormatFieldSeparator = " / ";
public static readonly string TextIndentation = " ";
public List<ModelExecutionReport> CompletedModelExecutionReports { get; private set; }
public ModelExecutionReport CurrentModelExecutionReport { get; private set; }
public MemorySnapshotsReport MemorySnapshotsReport { get; private set; }
public ModelExecutionsReporter()
{
Reset();
}
public void Reset()
{
CompletedModelExecutionReports = new List<ModelExecutionReport>();
CurrentModelExecutionReport = null;
MemorySnapshotsReport = new MemorySnapshotsReport();
}
public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
{
MemorySnapshotsReport.TakeMemorySnapshot(ops, vars, context, layer);
}
public void ModelExecutionStarted()
{
Assert.IsNull(CurrentModelExecutionReport);
CurrentModelExecutionReport = new ModelExecutionReport();
}
public void ModelExecutionCompleted()
{
CompletedModelExecutionReports.Add(CurrentModelExecutionReport);
CurrentModelExecutionReport = null;
}
public void LayerExecutionStarted(Layer layer)
{
Assert.IsNotNull(CurrentModelExecutionReport);
CurrentModelExecutionReport.LayerExecutionStarted(layer);
}
public void LayerExecutionCompleted()
{
Assert.IsNotNull(CurrentModelExecutionReport);
CurrentModelExecutionReport.LayerExecutionCompleted();
}
public void SetLayerSummary(string message)
{
Assert.IsNotNull(CurrentModelExecutionReport);
CurrentModelExecutionReport.SetLayerSummary(message);
}
public void SetLayerALUAndMemStats(long alu, long bytes)
{
Assert.IsNotNull(CurrentModelExecutionReport);
CurrentModelExecutionReport.SetLayerALUAndMemStats(alu, bytes);
}
public void AddLayerDispatch(DispatchInfo dispatchInfo)
{
Assert.IsNotNull(CurrentModelExecutionReport);
CurrentModelExecutionReport.AddLayerDispatch(dispatchInfo);
}
public override string ToString()
{
return GenerateStringReport(out var memoryPeakSummary, false);
}
public string GenerateStringReport(out MemoryPeakSummary memoryPeakSummary, bool spreadsheetFormat)
{
var stringBuilder = new StringBuilder(1000);
//**************** MODEL EXECUTIONS REPORT - START ****************
stringBuilder.Append($"**************** MODEL EXECUTIONS REPORT - START ****************\n");
stringBuilder.Append($"Number of completed executions : {CompletedModelExecutionReports.Count}\n");
if (CurrentModelExecutionReport != null)
stringBuilder.Append("Warning: last model execution was not completed. It will be logged, but information might be incomplete.\n");
stringBuilder.Append("\n");
int i = 0;
for (; i < CompletedModelExecutionReports.Count; ++i)
{
stringBuilder.Append($"--------- Execution index : {i} - START ---------\n");
MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CompletedModelExecutionReports[i], spreadsheetFormat);
stringBuilder.Append($"--------- Execution index : {i} - STOP ---------\n");
stringBuilder.Append("\n");
}
if (CurrentModelExecutionReport != null)
{
stringBuilder.Append($"--------- Uncompleted execution - START ---------\n");
MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CurrentModelExecutionReport, spreadsheetFormat);
stringBuilder.Append($"--------- Uncompleted execution - STOP ---------\n");
stringBuilder.Append("\n");
}
stringBuilder.Append($"**************** MODEL EXECUTION REPORT - STOP ****************\n");
stringBuilder.Append("\n");
//**************** MODEL EXECUTIONS REPORT - STOP ****************
//**************** MEMORY SNAPSHOTS REPORTS - START ****************
memoryPeakSummary = MemorySnapshotsReport.GenerateStringReport(stringBuilder, spreadsheetFormat);
//**************** MEMORY SNAPSHOTS REPORTS - STOP ****************
return stringBuilder.ToString();
}
#if UNITY_EDITOR
public static string ToTextFile(IModelExecutionsReporter report, bool spreadsheetFormat, out MemoryPeakSummary memoryPeakSummary, string filename = null)
{
string stringToSave = report.GenerateStringReport(out memoryPeakSummary, spreadsheetFormat);
string fullPath = Application.temporaryCachePath;
if (filename == null)
{
fullPath = Path.Combine(fullPath, "ModelExecutionReport");
fullPath = Path.ChangeExtension(fullPath, "txt");
}
else
{
fullPath = Path.Combine(fullPath, filename);
}
File.WriteAllText(fullPath, stringToSave);
return fullPath;
}
#endif
}
} // namespace Unity.Barracuda
#endif //ENABLE_BARRACUDA_STATS

Some files were not shown because too many files have changed in this diff Show More