Resolve WES-100 "Natml integration"

2023-04-03 14:14:49 +00:00
parent edf1805a92
commit f95e34c6fe
425 changed files with 525 additions and 96655 deletions
--- a/Assets/Common/Interfaces/InterfacesScripts.asmdef
+++ b/Assets/Common/Interfaces/InterfacesScripts.asmdef
@@ -2,7 +2,8 @@
    "name": "InterfacesScripts",
    "rootNamespace": "",
    "references": [
-        "GUID:5c2b5ba89f9e74e418232e154bc5cc7a"
+        "GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
+        "GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
    ],
    "includePlatforms": [],
    "excludePlatforms": [],
--- a/Assets/Common/Interfaces/InterfacesScripts.asmdef.meta
+++ b/Assets/Common/Interfaces/InterfacesScripts.asmdef.meta
@@ -1,7 +1,6 @@
 fileFormatVersion: 2
 guid: 7f2d0ee6dd21e1d4eb25b71b7a749d25
-folderAsset: yes
-DefaultImporter:
+AssemblyDefinitionImporter:
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Assets/Common/Interfaces/ModelIndex.cs
+++ b/Assets/Common/Interfaces/ModelIndex.cs
@@ -6,6 +6,6 @@ using UnityEngine;
 /// </summary>
 public enum ModelIndex 
 {
-    FINGERSPELLING,
-    NONE
+    NONE,
+    FINGERSPELLING
 }
--- a/Assets/Common/Interfaces/ModelList.cs
+++ b/Assets/Common/Interfaces/ModelList.cs
@@ -1,8 +1,7 @@
+using NatML;
 using System;
-using System.Collections;
 using System.Collections.Generic;
 using UnityEngine;
-using Unity.Barracuda;
 /// <summary>
 /// This scriptable will hold tupples of Courseindices and models
 /// </summary>
@@ -22,28 +21,49 @@ public class ModelList : ScriptableObject
        /// <summary>
        /// The model itself
        /// </summary>
-        public NNModel model;
+        public MLModelData modelWINDOWS;
+        /// <summary>
+        /// The model itself
+        /// </summary>
+        public MLModelData modelMAC;
    }


-    /// <summary>
-    /// Index of the currently active model
-    /// </summary>
-    public int currentModelIndex = 0;
-
    /// <summary>
    /// A list of all the models
    /// </summary>
    public List<ModelTuple> models = new List<ModelTuple>();

+    /// <summary>
+    /// Index of the currently active model
+    /// </summary>
+    public int currentModelIndex = 0;
+
    /// <summary>
    /// Get a model by modelindex
    /// </summary>
    /// <param name="modelIndex">ModelIndex of the model</param>
    /// <returns>Model associated with this index, null if no model was found</returns>
-    public NNModel GetCurrentModel()
+    public MLModelData GetCurrentModel()
    {
-        return models.Find(x => x.model == models[currentModelIndex].model)?.model;
+
+        // Select Model based on OS
+#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
+        return models.Find(x => x.modelWINDOWS == models[currentModelIndex].modelWINDOWS)?.modelWINDOWS;
+#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
+        return models.Find(x => x.modelMAC == models[currentModelIndex].modelMAC)?.modelMAC;
+#endif
+        return null;
+    }
+
+
+    /// <summary>
+    /// Function to check if the modelIndex has been set
+    /// </summary>
+    /// <returns></returns>
+    public bool HasValidModel()
+    {
+        return models[currentModelIndex].index != (int)ModelIndex.NONE;
    }

    /// <summary>
--- a/Assets/Common/Interfaces/Theme.cs
+++ b/Assets/Common/Interfaces/Theme.cs
@@ -27,6 +27,7 @@ public class Theme : ScriptableObject
    /// </summary>
    public ModelIndex modelIndex;

+
    /// <summary>
    /// List of all learnable words/letters
    /// </summary>
--- a/Assets/Common/Models/FingerSpelling/model_A-L.onnx.meta
+++ b/Assets/Common/Models/FingerSpelling/model_A-L.onnx.meta
@@ -7,10 +7,4 @@ ScriptedImporter:
  userData: 
  assetBundleName: 
  assetBundleVariant: 
-  script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
-  optimizeModel: 1
-  forceArbitraryBatchSize: 1
-  treatErrorsAsWarnings: 0
-  importMode: 1
-  weightsTypeMode: 0
-  activationTypeMode: 0
+  script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}
--- a/Assets/Common/Models/FingerSpelling/model_A-Z2.onnx
+++ b/Assets/Common/Models/FingerSpelling/model_A-Z2.onnx
--- a/Assets/Common/Models/FingerSpelling/model_A-Z2.onnx.meta
+++ b/Assets/Common/Models/FingerSpelling/model_A-Z2.onnx.meta
@@ -0,0 +1,10 @@
+fileFormatVersion: 2
+guid: fdbf401e965a6bf4a87637cd519f2715
+ScriptedImporter:
+  internalIDToNameTable: []
+  externalObjects: {}
+  serializedVersion: 2
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 8264490bef67c46f2982e6dd3f5e46cd, type: 3}
--- a/Assets/Common/ScriptableObjects/FingerspellingTheme.asset
+++ b/Assets/Common/ScriptableObjects/FingerspellingTheme.asset
@@ -15,7 +15,7 @@ MonoBehaviour:
  title: Handalfabet
  description: Van A tot Z
  index: 0
-  model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
+  modelIndex: 1
  learnables:
  - name: A
    image: {fileID: 21300000, guid: 4eb4ef55f866f114dafb722f4bd05c76, type: 3}
--- a/Assets/Common/Tests/CommonTests.asmdef
+++ b/Assets/Common/Tests/CommonTests.asmdef
@@ -6,8 +6,8 @@
        "UnityEditor.TestRunner",
        "CommonScripts",
        "InterfacesScripts",
-        "Unity.Barracuda",
-        "SignPredictor"
+        "SignPredictor",
+        "NatML.ML"
    ],
    "includePlatforms": [
        "Editor"
--- a/Assets/Common/Tests/ModelListTest.cs
+++ b/Assets/Common/Tests/ModelListTest.cs
@@ -1,5 +1,5 @@
+using NatML;
 using NUnit.Framework;
-using Unity.Barracuda;
 using UnityEngine;
 /// <summary>
 /// Test the ModelList class
@@ -45,7 +45,11 @@ public class ModelListTest
        ModelIndex value = (ModelIndex)random.Next(modelList.models.Count);
        modelList.SetCurrentModel(value);

-        Assert.AreEqual(modelList.models[modelList.currentModelIndex].model, modelList.GetCurrentModel());
+#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
+        Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelWINDOWS, modelList.GetCurrentModel());
+#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
+        Assert.AreEqual(modelList.models[modelList.currentModelIndex].modelMAC, modelList.GetCurrentModel());
+#endif

        // Check if empty model fails gracefully (returns null)
        Assert.IsNull(ScriptableObject.CreateInstance<ModelList>().GetCurrentModel());
@@ -69,7 +73,11 @@ public class ModelListTest
                ModelList.ModelTuple m = modelList.models[modelList.currentModelIndex];

                Assert.AreEqual(m.index, value);
-                Assert.IsTrue(m.model is NNModel || m.model is null);
+#if (UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN)
+                Assert.IsTrue(m.modelWINDOWS is MLModelData || m.modelWINDOWS is null);
+#elif (UNITY_STANDALONE_OSX || UNITY_EDITOR_OSX)
+                Assert.IsTrue(m.modelMAC is MLModelData || m.modelMAC is null);
+#endif
            }
        }
        ModelList emptyList = ScriptableObject.CreateInstance<ModelList>();
--- a/Assets/Courses/Scripts/CourseScripts.asmdef
+++ b/Assets/Courses/Scripts/CourseScripts.asmdef
@@ -5,8 +5,9 @@
        "Unity.TextMeshPro",
        "AccountsScripts",
        "InterfacesScripts",
-        "Tween",
-        "SignPredictor"
+        "SignPredictor",
+        "NatML.ML",
+        "Tween"
    ],
    "includePlatforms": [],
    "excludePlatforms": [],
--- a/Assets/Courses/Scripts/CoursesController.cs
+++ b/Assets/Courses/Scripts/CoursesController.cs
@@ -152,8 +152,7 @@ public class CoursesController : AbstractFeedback
    void Start()
    {
        StartCourseController();
-
-        signPredictor.SetModel(course.theme.modelIndex);
+        signPredictor.ChangeModel(course.theme.modelIndex);
        AddSelfAsListener();
    }
    /// <summary>
--- a/Assets/Hangman/Scenes/HangmanGame.unity
+++ b/Assets/Hangman/Scenes/HangmanGame.unity
@@ -38,7 +38,7 @@ RenderSettings:
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.37311918, g: 0.3807398, b: 0.35872716, a: 1}
+  m_IndirectSpecularColor: {r: 0.37311953, g: 0.38074014, b: 0.3587274, a: 1}
  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
@@ -6416,472 +6416,3 @@ CanvasRenderer:
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 2039368310}
  m_CullTransparentMesh: 1
--- !u!114 &5233312447201393291
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447201393293}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 67db9e8f0e2ae9c40bc1e2b64352a6b4, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Navigation:
-    m_Mode: 3
-    m_WrapAround: 0
-    m_SelectOnUp: {fileID: 0}
-    m_SelectOnDown: {fileID: 0}
-    m_SelectOnLeft: {fileID: 0}
-    m_SelectOnRight: {fileID: 0}
-  m_Transition: 1
-  m_Colors:
-    m_NormalColor: {r: 1, g: 1, b: 1, a: 1}
-    m_HighlightedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
-    m_PressedColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 1}
-    m_SelectedColor: {r: 0.9607843, g: 0.9607843, b: 0.9607843, a: 1}
-    m_DisabledColor: {r: 0.78431374, g: 0.78431374, b: 0.78431374, a: 0.5019608}
-    m_ColorMultiplier: 1
-    m_FadeDuration: 0.1
-  m_SpriteState:
-    m_HighlightedSprite: {fileID: 0}
-    m_PressedSprite: {fileID: 0}
-    m_SelectedSprite: {fileID: 0}
-    m_DisabledSprite: {fileID: 0}
-  m_AnimationTriggers:
-    m_NormalTrigger: Normal
-    m_HighlightedTrigger: Highlighted
-    m_PressedTrigger: Pressed
-    m_SelectedTrigger: Selected
-    m_DisabledTrigger: Disabled
-  m_Interactable: 1
-  m_TargetGraphic: {fileID: 0}
-  m_FillRect: {fileID: 5233312447919013132}
-  m_HandleRect: {fileID: 0}
-  m_Direction: 0
-  m_MinValue: 0
-  m_MaxValue: 1
-  m_WholeNumbers: 0
-  m_Value: 0
-  m_OnValueChanged:
-    m_PersistentCalls:
-      m_Calls: []
--- !u!224 &5233312447201393292
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447201393293}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children:
-  - {fileID: 5233312448534255807}
-  - {fileID: 5233312448785575104}
-  m_Father: {fileID: 5233312447513285389}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 1, y: 0}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 50}
-  m_Pivot: {x: 0.5, y: 0}
--- !u!1 &5233312447201393293
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312447201393292}
-  - component: {fileID: 5233312447201393291}
-  m_Layer: 5
-  m_Name: Progress
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &5233312447513285388
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447513285390}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 44e682a32ee15cc489bf50f3a06f717b, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  feedbackText: {fileID: 0}
-  feedbackProgress: {fileID: 0}
-  feedbackProgressImage: {fileID: 0}
-  signPredictor: {fileID: 1991376311}
--- !u!224 &5233312447513285389
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447513285390}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children:
-  - {fileID: 5233312448025626847}
-  - {fileID: 5233312447201393292}
-  m_Father: {fileID: 0}
-  m_RootOrder: 5
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0.5, y: 0}
-  m_AnchorMax: {x: 0.5, y: 0}
-  m_AnchoredPosition: {x: 960, y: 200}
-  m_SizeDelta: {x: 500, y: 150}
-  m_Pivot: {x: 0.5, y: 0}
--- !u!1 &5233312447513285390
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312447513285389}
-  - component: {fileID: 5233312447513285388}
-  m_Layer: 5
-  m_Name: Feedback
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!224 &5233312447919013132
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447919013135}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children: []
-  m_Father: {fileID: 5233312448785575104}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 0, y: 0}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 10, y: 0}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!222 &5233312447919013133
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447919013135}
-  m_CullTransparentMesh: 1
--- !u!114 &5233312447919013134
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312447919013135}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 1, g: 0, b: 0, a: 1}
-  m_RaycastTarget: 1
-  m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
-  m_Maskable: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-  m_Sprite: {fileID: 10905, guid: 0000000000000000f000000000000000, type: 0}
-  m_Type: 1
-  m_PreserveAspect: 0
-  m_FillCenter: 1
-  m_FillMethod: 4
-  m_FillAmount: 1
-  m_FillClockwise: 1
-  m_FillOrigin: 0
-  m_UseSpriteMesh: 0
-  m_PixelsPerUnitMultiplier: 1
--- !u!1 &5233312447919013135
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312447919013132}
-  - component: {fileID: 5233312447919013133}
-  - component: {fileID: 5233312447919013134}
-  m_Layer: 5
-  m_Name: Fill
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!222 &5233312448025626832
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448025626834}
-  m_CullTransparentMesh: 1
--- !u!114 &5233312448025626833
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448025626834}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: f4688fdb7df04437aeb418b961361dc5, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 1, g: 1, b: 1, a: 1}
-  m_RaycastTarget: 1
-  m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
-  m_Maskable: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-  m_text: Detecteren ...
-  m_isRightToLeft: 0
-  m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
-  m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
-  m_fontSharedMaterials: []
-  m_fontMaterial: {fileID: 0}
-  m_fontMaterials: []
-  m_fontColor32:
-    serializedVersion: 2
-    rgba: 4282188031
-  m_fontColor: {r: 0.5803922, g: 0.58431375, b: 0.6, a: 1}
-  m_enableVertexGradient: 0
-  m_colorMode: 3
-  m_fontColorGradient:
-    topLeft: {r: 1, g: 1, b: 1, a: 1}
-    topRight: {r: 1, g: 1, b: 1, a: 1}
-    bottomLeft: {r: 1, g: 1, b: 1, a: 1}
-    bottomRight: {r: 1, g: 1, b: 1, a: 1}
-  m_fontColorGradientPreset: {fileID: 0}
-  m_spriteAsset: {fileID: 0}
-  m_tintAllSprites: 0
-  m_StyleSheet: {fileID: 0}
-  m_TextStyleHashCode: -1183493901
-  m_overrideHtmlColors: 0
-  m_faceColor:
-    serializedVersion: 2
-    rgba: 4294967295
-  m_fontSize: 48
-  m_fontSizeBase: 48
-  m_fontWeight: 400
-  m_enableAutoSizing: 0
-  m_fontSizeMin: 18
-  m_fontSizeMax: 72
-  m_fontStyle: 1
-  m_HorizontalAlignment: 2
-  m_VerticalAlignment: 512
-  m_textAlignment: 65535
-  m_characterSpacing: 0
-  m_wordSpacing: 0
-  m_lineSpacing: 0
-  m_lineSpacingMax: 0
-  m_paragraphSpacing: 0
-  m_charWidthMaxAdj: 0
-  m_enableWordWrapping: 1
-  m_wordWrappingRatios: 0.4
-  m_overflowMode: 0
-  m_linkedTextComponent: {fileID: 0}
-  parentLinkedComponent: {fileID: 0}
-  m_enableKerning: 1
-  m_enableExtraPadding: 0
-  checkPaddingRequired: 0
-  m_isRichText: 1
-  m_parseCtrlCharacters: 1
-  m_isOrthographic: 1
-  m_isCullingEnabled: 0
-  m_horizontalMapping: 0
-  m_verticalMapping: 0
-  m_uvLineOffset: 0
-  m_geometrySortingOrder: 0
-  m_IsTextObjectScaleStatic: 0
-  m_VertexBufferAutoSizeReduction: 0
-  m_useMaxVisibleDescender: 1
-  m_pageToDisplay: 1
-  m_margin: {x: 0, y: 0, z: 0, w: 0}
-  m_isUsingLegacyAnimationComponent: 0
-  m_isVolumetricText: 0
-  m_hasFontAssetChanged: 0
-  m_baseMaterial: {fileID: 0}
-  m_maskOffset: {x: 0, y: 0, z: 0, w: 0}
--- !u!1 &5233312448025626834
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312448025626847}
-  - component: {fileID: 5233312448025626832}
-  - component: {fileID: 5233312448025626833}
-  m_Layer: 5
-  m_Name: Text
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!224 &5233312448025626847
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448025626834}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children: []
-  m_Father: {fileID: 5233312447513285389}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0.5, y: 1}
-  m_AnchorMax: {x: 0.5, y: 1}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 500, y: 100}
-  m_Pivot: {x: 0.5, y: 1}
--- !u!1 &5233312448534255792
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312448534255807}
-  - component: {fileID: 5233312448534255805}
-  - component: {fileID: 5233312448534255806}
-  m_Layer: 5
-  m_Name: Background
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!222 &5233312448534255805
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448534255792}
-  m_CullTransparentMesh: 1
--- !u!114 &5233312448534255806
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448534255792}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 1, g: 1, b: 1, a: 1}
-  m_RaycastTarget: 1
-  m_RaycastPadding: {x: 0, y: 0, z: 0, w: 0}
-  m_Maskable: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-  m_Sprite: {fileID: 10907, guid: 0000000000000000f000000000000000, type: 0}
-  m_Type: 1
-  m_PreserveAspect: 0
-  m_FillCenter: 1
-  m_FillMethod: 4
-  m_FillAmount: 1
-  m_FillClockwise: 1
-  m_FillOrigin: 0
-  m_UseSpriteMesh: 0
-  m_PixelsPerUnitMultiplier: 1
--- !u!224 &5233312448534255807
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448534255792}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children: []
-  m_Father: {fileID: 5233312447201393292}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 1, y: 1}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 0}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!224 &5233312448785575104
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 5233312448785575105}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_ConstrainProportionsScale: 0
-  m_Children:
-  - {fileID: 5233312447919013132}
-  m_Father: {fileID: 5233312447201393292}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 1, y: 1}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 0}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!1 &5233312448785575105
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 5233312448785575104}
-  m_Layer: 5
-  m_Name: Fill Area
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- a/Assets/Hangman/Scripts/HangmanController.cs
+++ b/Assets/Hangman/Scripts/HangmanController.cs
@@ -244,7 +244,7 @@ public class HangmanController : AbstractFeedback
    {
        StartController();

-        signPredictor.SetModel(ModelIndex.FINGERSPELLING);
+        signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
        AddSelfAsListener();
    }
    /// <summary>
--- a/Assets/MediaPipeUnity/Common/Scripts/KeypointManager.cs
+++ b/Assets/MediaPipeUnity/Common/Scripts/KeypointManager.cs
@@ -74,12 +74,15 @@ public class KeypointManager
        }


-        if (width > height){
-            delta_x = ((float)0.1)*width;
-            delta_y = delta_x + ((width - height)/2);
-        }else{
-            delta_y = ((float)0.1)*height;
-            delta_x = delta_y + ((height - width)/2);
+        if (width > height)
+        {
+            delta_x = ((float)0.1) * width;
+            delta_y = delta_x + ((width - height) / 2);
+        }
+        else
+        {
+            delta_y = ((float)0.1) * height;
+            delta_x = delta_y + ((height - width) / 2);
        }

        float starting_x = min_x - delta_x;
@@ -124,10 +127,10 @@ public class KeypointManager
        float eye_left_x = pose_x[1];
        float eye_left_y = pose_y[1];

-        float starting_x = shoulder_center_x - (bbox_size/2) * shoulder_distance;
-        float starting_y = eye_left_y - shoulder_distance/2;
+        float starting_x = shoulder_center_x - (bbox_size / 2) * shoulder_distance;
+        float starting_y = eye_left_y - shoulder_distance / 2;

-        float ending_x = shoulder_center_x + (bbox_size/2) * shoulder_distance;
+        float ending_x = shoulder_center_x + (bbox_size / 2) * shoulder_distance;
        float ending_y = starting_y + (bbox_size - ((float)0.5)) * shoulder_distance;

        float bbox_center_x = (starting_x + ending_x) / 2;
--- a/Assets/MediaPipeUnity/ScriptableObjects/ModelList.asset
+++ b/Assets/MediaPipeUnity/ScriptableObjects/ModelList.asset
@@ -15,6 +15,8 @@ MonoBehaviour:
  currentModelIndex: 0
  models:
  - index: 0
-    model: {fileID: 5022602860645237092, guid: e6d85df707405ad4f97c23b07227ee99, type: 3}
+    modelWINDOWS: {fileID: 0}
+    modelMAC: {fileID: 0}
  - index: 1
-    model: {fileID: 0}
+    modelWINDOWS: {fileID: 8538825877217656561, guid: fdbf401e965a6bf4a87637cd519f2715, type: 3}
+    modelMAC: {fileID: 0}
--- a/Assets/MediaPipeUnity/Scripts/AbstractFeedback.cs
+++ b/Assets/MediaPipeUnity/Scripts/AbstractFeedback.cs
@@ -1,11 +1,5 @@
-using DigitalRuby.Tween;
-using Mediapipe.Unity.Tutorial;
-using System;
 using System.Collections;
-using TMPro;
 using UnityEngine;
-using UnityEngine.Events;
-using UnityEngine.UI;

 /// <summary>
 /// Class to display feedback during a course
--- a/Assets/MediaPipeUnity/Scripts/SignPredictor.asmdef
+++ b/Assets/MediaPipeUnity/Scripts/SignPredictor.asmdef
@@ -3,12 +3,12 @@
    "rootNamespace": "",
    "references": [
        "GUID:6055be8ebefd69e48b49212b09b47b2f",
-        "GUID:5c2b5ba89f9e74e418232e154bc5cc7a",
        "GUID:04c4d86a70aa56c55a78c61f1ab1a56d",
        "GUID:edc93f477bb73a743a97d6882ed330b3",
        "GUID:58e104b97fb3752438ada2902a36dcbf",
        "GUID:7f2d0ee6dd21e1d4eb25b71b7a749d25",
-        "GUID:f55a02e98b01bc849b30d9650ccd8f15"
+        "GUID:f55a02e98b01bc849b30d9650ccd8f15",
+        "GUID:d23f64cfd3b314bb4a18a8284c99bf5e"
    ],
    "includePlatforms": [],
    "excludePlatforms": [],
--- a/Assets/MediaPipeUnity/Scripts/SignPredictor.cs
+++ b/Assets/MediaPipeUnity/Scripts/SignPredictor.cs
@@ -1,334 +1,362 @@
-// Copyright (c) 2021 homuler
-//
-// Use of this source code is governed by an MIT-style
-// license that can be found in the LICENSE file or at
-// https://opensource.org/licenses/MIT.
-
-// ATTENTION!: This code is for a tutorial.
-
+using Mediapipe;
+using Mediapipe.Unity;
+using NatML;
+using NatML.Features;
+using NatML.Internal;
 using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
-using Unity.Barracuda;
+using System.Threading.Tasks;
 using UnityEngine;
 using UnityEngine.UI;

-namespace Mediapipe.Unity.Tutorial
+/// <summary>
+/// 
+/// </summary>
+public class SignPredictor : MonoBehaviour
 {
-    public class SignPredictor : MonoBehaviour
+    /// <summary>
+    /// Predictor class which is used to predict the sign using an MLEdgeModel
+    /// </summary>
+    public class NatMLSignPredictor : IMLPredictor<List<float>>
    {
        /// <summary>
-        /// ModelList, used to change model using ModelIndex
+        /// The MLEdgeModel used for predictions
        /// </summary>
-        public ModelList modelList;
+        private readonly MLEdgeModel edgeModel;

        /// <summary>
-        /// Reference to the model info file
+        /// The type used to create features which are input for the model
        /// </summary>
-        public TextAsset modelInfoFile;
+        private MLFeatureType featureType;

        /// <summary>
-        /// Config file to set up the graph
+        /// Creation of a NatMLSignPredictor instance
        /// </summary>
-        [SerializeField]
-        private TextAsset configAsset;
-
-        /// <summary>
-        /// Index to indicate which camera is being used
-        /// </summary>
-        private int camdex = 0;
-
-        /// <summary>
-        /// The screen object on which the video is displayed
-        /// </summary>
-        [SerializeField]
-        private RawImage screen;
-
-        /// <summary>
-        /// A secondary optional screen object on which the video is displayed
-        /// </summary>
-        [SerializeField]
-        private RawImage screen2;
-    
-        /// <summary>
-        /// MediaPipe graph
-        /// </summary>
-        private CalculatorGraph graph;
-
-        /// <summary>
-        /// Resource manager for graph resources
-        /// </summary>
-        private ResourceManager resourceManager;
-
-        /// <summary>
-        /// Webcam texture
-        /// </summary>
-        private WebCamTexture webcamTexture;
-
-        /// <summary>
-        /// Input texture
-        /// </summary>
-        private Texture2D inputTexture;
-
-        /// <summary>
-        /// Screen pixel data
-        /// </summary>
-        private Color32[] pixelData;
-
-        /// <summary>
-        /// Stopwatch to give a timestamp to video frames
-        /// </summary>
-        private Stopwatch stopwatch;
-
-        /// <summary>
-        /// The mediapipe stream which contains the pose landmarks
-        /// </summary>
-        private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
-
-        /// <summary>
-        /// The mediapipe stream which contains the left hand landmarks
-        /// </summary>
-        private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
-
-        /// <summary>
-        /// The mediapipe stream which contains the right hand landmarks
-        /// </summary>
-        private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
-
-        /// <summary>
-        /// create precense stream
-        /// </summary>
-        public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
-
-        /// <summary>
-        /// A keypointmanager which does normalization stuff, keeps track of the landmarks
-        /// </summary>
-        private KeypointManager keypointManager;
-
-        /// <summary>
-        /// The worker on which we schedule the signpredictor model execution
-        /// </summary>
-        private IWorker worker;
-
-        /// <summary>
-        /// Width of th webcam
-        /// </summary>
-        private int width;
-
-        /// <summary>
-        /// Height of the webcam
-        /// </summary>
-        private int height;
-
-        /// <summary>
-        /// The enumerator of the worker which executes the sign predictor model
-        /// </summary>
-        private IEnumerator enumerator;
-
-        /// <summary>
-        /// The prediction of the sign predictor model
-        /// </summary>
-        public Dictionary<string, float> learnableProbabilities;
-
-        /// <summary>
-        /// Bool indicating whether or not the resource manager has already been initialized
-        /// </summary>
-        private static bool resourceManagerIsInitialized = false;
-
-        /// <summary>
-        /// an inputTensor for the sign predictor
-        /// </summary>
-        private Tensor inputTensor;
-
-        public List<Listener> listeners = new List<Listener>();
-
-        /// <summary>
-        /// Google Mediapipe setup & run
-        /// </summary>
-        /// <returns>IEnumerator</returns>
-        /// <exception cref="System.Exception"></exception>
-        private IEnumerator Start()
+        /// <param name="edgeModel"></param>
+        public NatMLSignPredictor(MLEdgeModel edgeModel)
        {
-            // Webcam setup
-            if (WebCamTexture.devices.Length == 0)
-            {
-                throw new System.Exception("Web Camera devices are not found");
-            }
-            // Start the webcam
-            WebCamDevice webCamDevice = WebCamTexture.devices[0];
-            webcamTexture = new WebCamTexture(webCamDevice.name);
-
-            webcamTexture.Play();
-
-            
-            yield return new WaitUntil(() => webcamTexture.width > 16);
-
-            // Set webcam aspect ratio
-            width = webcamTexture.width;
-            height = webcamTexture.height;
-            float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
-            screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
-            screen.texture = webcamTexture;
-            if (screen2 != null)
-            {
-                screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
-            }
-
-            if (modelList.GetCurrentModel() != null)
-            {
-                // TODO this method is kinda meh you should use    
-                inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
-                pixelData = new Color32[width * height];
-
-                if (!resourceManagerIsInitialized)
-                {
-                    resourceManager = new StreamingAssetsResourceManager();
-                    yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
-                    yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
-                    yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
-                    yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
-                    yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
-                    yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
-                    yield return resourceManager.PrepareAssetAsync("handedness.txt");
-                    resourceManagerIsInitialized = true;
-                }
-
-                stopwatch = new Stopwatch();
-
-                // Setting up the graph
-                graph = new CalculatorGraph(configAsset.text);
-
-                posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
-                leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
-                rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
-
-                posestream.StartPolling().AssertOk();
-                leftstream.StartPolling().AssertOk();
-                rightstream.StartPolling().AssertOk();
-
-                graph.StartRun().AssertOk();
-                stopwatch.Start();
-
-
-                keypointManager = new KeypointManager(modelInfoFile);
-                // check if model exists at path
-                //var model = ModelLoader.Load(Resources.Load<NNModel>("Models/Fingerspelling/model_A-L"));
-                worker = modelList.GetCurrentModel().CreateWorker();
-
-                StartCoroutine(SignRecognitionCoroutine());
-                StartCoroutine(MediapipeCoroutine());
-            }
-        }
-        /// <summary>
-        /// Called at the start of course/Minigame, will set the model before the start of SIgnPredictor is called.
-        /// </summary>
-        /// <param name="index">The index of the model to be used</param>
-        public void SetModel(ModelIndex index)
-        {
-            this.modelList.SetCurrentModel(index);
+            this.edgeModel = edgeModel;
+            featureType = edgeModel.inputs[0];
        }

        /// <summary>
-        /// Coroutine which executes the mediapipe pipeline
+        /// Predicts the sign using the MLEdgeModel
        /// </summary>
+        /// <param name="inputs"></param>
        /// <returns></returns>
-        private IEnumerator MediapipeCoroutine()
+        public List<float> Predict(params MLFeature[] inputs)
        {
-            while (true)
+            List<float> predictions = null;
+            IMLEdgeFeature iedgeFeature = (IMLEdgeFeature)inputs[0];
+            MLEdgeFeature edgeFeature = iedgeFeature.Create(featureType);
+            MLFeatureCollection<MLEdgeFeature> result = edgeModel.Predict(edgeFeature);
+            if (0 < result.Count)
            {
-                inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
-                var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
-                var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
-                graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
-                //Debug.Log(Time.timeAsDouble + " Added new packet to mediapipe graph");
-                yield return new WaitForEndOfFrame();
-
-                NormalizedLandmarkList _poseLandmarks = null;
-                NormalizedLandmarkList _leftHandLandmarks = null;
-                NormalizedLandmarkList _rightHandLandmarks = null;
-
-                //Debug.Log("Extracting keypoints");
-
-                yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks, false); return true; });
-                yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks, false); return true; });
-                yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks, false); return true; });
-                //Debug.Log(Time.timeAsDouble + " Retrieved landmarks ");
-
-                keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
+                predictions = new MLArrayFeature<float>(result[0]).Flatten().ToArray().ToList();
+                predictions = predictions.ConvertAll((c) => Mathf.Exp(c));
+                float sum = predictions.Sum();
+                predictions = predictions.ConvertAll((c) => c / sum);
            }
+            edgeFeature.Dispose();
+            result.Dispose();
+            return predictions;
        }

+
        /// <summary>
-        /// Coroutine which calls the sign predictor model
+        /// Disposing the MLEdgeModel
        /// </summary>
-        /// <returns></returns>
-        private IEnumerator SignRecognitionCoroutine()
+        public void Dispose()
        {
-            while (true)
+            edgeModel.Dispose();
+        }
+    }
+
+    public List<Listener> listeners = new List<Listener>();
+
+    /// <summary>
+    /// Predictor which is used to create the asyncPredictor (should not be used if asyncPredictor exists)
+    /// </summary>
+    private NatMLSignPredictor predictor;
+
+    /// <summary>
+    /// The asynchronous predictor which is used to predict the sign using an MLEdgemodel
+    /// </summary>
+    private MLAsyncPredictor<List<float>> asyncPredictor;
+
+    /// <summary>
+    /// Reference to the model used in the SignPredictor
+    /// </summary>
+    private MLEdgeModel model;
+
+    /// <summary>
+    /// Modellist used to change model using ModelIndex
+    /// </summary>     
+    public ModelList modelList;
+
+    /// <summary>
+    /// Chosen model data based on the operating system
+    /// </summary>
+    private MLModelData modelData;
+
+    /// <summary>
+    /// Reference to the model info file
+    /// </summary>
+    public TextAsset modelInfoFile;
+
+    /// <summary>
+    /// Config file to set up the graph
+    /// </summary>
+    [SerializeField]
+    private TextAsset configAsset;
+
+    /// <summary>
+    /// Index to indicate which camera is being used
+    /// </summary>
+    private int camdex = 0;
+
+    /// <summary>
+    /// The screen object on which the video is displayed
+    /// </summary>
+    [SerializeField]
+    private RawImage screen;
+
+    /// <summary>
+    /// A secondary optional screen object on which the video is displayed
+    /// </summary>
+    [SerializeField]
+    private RawImage screen2;
+
+    /// <summary>
+    /// MediaPipe graph
+    /// </summary>
+    private CalculatorGraph graph;
+
+    /// <summary>
+    /// Resource manager for graph resources
+    /// </summary>
+    private ResourceManager resourceManager;
+
+    /// <summary>
+    /// Webcam texture
+    /// </summary>
+    private WebCamTexture webcamTexture;
+
+    /// <summary>
+    /// Input texture
+    /// </summary>
+    private Texture2D inputTexture;
+
+    /// <summary>
+    /// Screen pixel data
+    /// </summary>
+    private Color32[] pixelData;
+
+    /// <summary>
+    /// Stopwatch to give a timestamp to video frames
+    /// </summary>
+    private Stopwatch stopwatch;
+
+    /// <summary>
+    /// The mediapipe stream which contains the pose landmarks
+    /// </summary>
+    private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> posestream;
+
+    /// <summary>
+    /// The mediapipe stream which contains the left hand landmarks
+    /// </summary>
+    private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> leftstream;
+
+    /// <summary>
+    /// The mediapipe stream which contains the right hand landmarks
+    /// </summary>
+    private OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList> rightstream;
+
+    /// <summary>
+    /// create precense stream
+    /// </summary>
+    public OutputStream<DetectionVectorPacket, List<Detection>> presenceStream;
+
+    /// <summary>
+    /// A keypointmanager which does normalization stuff, keeps track of the landmarks
+    /// </summary>
+    private KeypointManager keypointManager;
+
+    /// <summary>
+    /// Width of th webcam
+    /// </summary>
+    private int width;
+
+    /// <summary>
+    /// Height of the webcam
+    /// </summary>
+    private int height;
+
+    /// <summary>
+    /// The prediction of the sign predictor model
+    /// </summary>
+    public Dictionary<string, float> learnableProbabilities;
+
+    /// <summary>
+    /// Bool indicating whether or not the resource manager has already been initialized
+    /// </summary>
+    private static bool resourceManagerIsInitialized = false;
+
+    /// <summary>
+    /// Google Mediapipe setup & run
+    /// </summary>
+    /// <returns>IEnumerator</returns>
+    /// <exception cref="System.Exception"></exception>
+    private IEnumerator Start()
+    {
+        // Webcam setup
+        if (WebCamTexture.devices.Length == 0)
+        {
+            throw new System.Exception("Web Camera devices are not found");
+        }
+        // Start the webcam
+        WebCamDevice webCamDevice = WebCamTexture.devices[0];
+        webcamTexture = new WebCamTexture(webCamDevice.name);
+
+        webcamTexture.Play();
+
+        yield return new WaitUntil(() => webcamTexture.width > 16);
+
+        // Set webcam aspect ratio
+        width = webcamTexture.width;
+        height = webcamTexture.height;
+        float webcamAspect = (float)webcamTexture.width / (float)webcamTexture.height;
+        screen.rectTransform.sizeDelta = new Vector2(screen.rectTransform.sizeDelta.y * webcamAspect, (screen.rectTransform.sizeDelta.y));
+        screen.texture = webcamTexture;
+        if (screen2 != null)
+        {
+            screen2.rectTransform.sizeDelta = new Vector2(screen2.rectTransform.sizeDelta.y * webcamAspect, (screen2.rectTransform.sizeDelta.y));
+        }
+
+        // TODO this method is kinda meh you should use    
+        inputTexture = new Texture2D(width, height, TextureFormat.RGBA32, false);
+        pixelData = new Color32[width * height];
+
+        if (!resourceManagerIsInitialized)
+        {
+            resourceManager = new StreamingAssetsResourceManager();
+            yield return resourceManager.PrepareAssetAsync("pose_detection.bytes");
+            yield return resourceManager.PrepareAssetAsync("pose_landmark_full.bytes");
+            yield return resourceManager.PrepareAssetAsync("face_landmark.bytes");
+            yield return resourceManager.PrepareAssetAsync("hand_landmark_full.bytes");
+            yield return resourceManager.PrepareAssetAsync("face_detection_short_range.bytes");
+            yield return resourceManager.PrepareAssetAsync("hand_recrop.bytes");
+            yield return resourceManager.PrepareAssetAsync("handedness.txt");
+            resourceManagerIsInitialized = true;
+        }
+
+        stopwatch = new Stopwatch();
+
+        // Setting up the graph
+        graph = new CalculatorGraph(configAsset.text);
+
+        posestream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "pose_landmarks", "pose_landmarks_presence");
+        leftstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "left_hand_landmarks", "left_hand_landmarks_presence");
+        rightstream = new OutputStream<NormalizedLandmarkListPacket, NormalizedLandmarkList>(graph, "right_hand_landmarks", "right_hand_landmarks_presence");
+
+        posestream.StartPolling().AssertOk();
+        leftstream.StartPolling().AssertOk();
+        rightstream.StartPolling().AssertOk();
+
+        graph.StartRun().AssertOk();
+        stopwatch.Start();
+
+        // Creating a KeypointManager
+        keypointManager = new KeypointManager(modelInfoFile);
+
+        // Check if a model is ready to load
+        yield return new WaitUntil(() => modelList.HasValidModel());
+
+        // Create Model
+        Task<MLEdgeModel> t = Task.Run(() => MLEdgeModel.Create(modelList.GetCurrentModel()));
+        yield return new WaitUntil(() => t.IsCompleted);
+        model = t.Result;
+        predictor = new NatMLSignPredictor(model);
+        asyncPredictor = predictor.ToAsync();
+
+        // Start the Coroutine
+        StartCoroutine(SignRecognitionCoroutine());
+        StartCoroutine(MediapipeCoroutine());
+    }
+
+    /// <summary>
+    /// Coroutine which executes the mediapipe pipeline
+    /// </summary>
+    /// <returns></returns>
+    private IEnumerator MediapipeCoroutine()
+    {
+        while (true)
+        {
+            inputTexture.SetPixels32(webcamTexture.GetPixels32(pixelData));
+            var imageFrame = new ImageFrame(ImageFormat.Types.Format.Srgba, width, height, width * 4, inputTexture.GetRawTextureData<byte>());
+            var currentTimestamp = stopwatch.ElapsedTicks / (System.TimeSpan.TicksPerMillisecond / 1000);
+            graph.AddPacketToInputStream("input_video", new ImageFramePacket(imageFrame, new Timestamp(currentTimestamp))).AssertOk();
+            yield return new WaitForEndOfFrame();
+
+            NormalizedLandmarkList _poseLandmarks = null;
+            NormalizedLandmarkList _leftHandLandmarks = null;
+            NormalizedLandmarkList _rightHandLandmarks = null;
+
+            yield return new WaitUntil(() => { posestream.TryGetNext(out _poseLandmarks); return true; });
+            yield return new WaitUntil(() => { leftstream.TryGetNext(out _leftHandLandmarks); return true; });
+            yield return new WaitUntil(() => { rightstream.TryGetNext(out _rightHandLandmarks); return true; });
+
+            keypointManager.AddLandmarks(_poseLandmarks, _leftHandLandmarks, _rightHandLandmarks);
+        }
+    }
+
+    /// <summary>
+    /// Coroutine which calls the sign predictor model
+    /// </summary>
+    /// <returns></returns>
+    private IEnumerator SignRecognitionCoroutine()
+    {
+        while (true)
+        {
+            List<List<float>> inputData = keypointManager.GetKeypoints();
+            if (inputData != null && asyncPredictor.readyForPrediction)
            {
-                List<List<float>> input = keypointManager.GetKeypoints();
-                if (input != null)
+                // Getting the size of the input data
+                int framecount = inputData.Count;
+                int keypointsPerFrame = inputData[0].Count;
+
+                // Creating ArrayFeature
+                int[] shape = { framecount, keypointsPerFrame };
+                float[] input = new float[framecount * keypointsPerFrame];
+                int i = 0;
+                inputData.ForEach((e) => e.ForEach((f) => input[i++] = f));
+                MLArrayFeature<float> feature = new MLArrayFeature<float>(input, shape);
+
+                // Predicting
+                Task<List<float>> task = Task.Run(async () => await asyncPredictor.Predict(feature));
+                yield return new WaitUntil(() => task.IsCompleted);
+                List<float> result = task.Result;
+                if (0 < result.Count)
                {
-
-                    //UnityEngine.Debug.Log("input: " + input.Count);
-
-                    int frameCount = input.Count;
-                    int keypoints_per_frame = input[0].Count;
-
-                    // Create a tensor with the input
-                    inputTensor = new Tensor(frameCount, keypoints_per_frame);
-
-                    // Fill the tensor with the input
-                    for (int i = 0; i < frameCount; i++)
-                    {
-                        for (int j = 0; j < keypoints_per_frame; j++)
-                        {
-                            inputTensor[i, j] = input[i][j];
-                        }
-                    }
-
-                    int stepsPerFrame = 190;
-                    enumerator = worker.StartManualSchedule(inputTensor);
-                    int step = 0;
-                    while (enumerator.MoveNext())
-                    {
-                        if (++step % stepsPerFrame == 0)
-                        {
-                            //Debug.Log(Time.timeAsDouble + " : " + step);
-                            yield return null;
-                        }
-                    }
-
-                    var output = worker.PeekOutput();
-
-                    inputTensor.Dispose();
-
-                    // Get the output as an array
-                    float[] outputArray = output.ToReadOnlyArray();
-                    //Debug.Log($"out = [{outputArray.Aggregate("   ", (t, f) => $"{t}{f}   ")}]");
-
-                    // Calculate the softmax of the output
-                    float max = outputArray.Max();
-                    float[] softmaxedOutput = outputArray.Select(x => Mathf.Exp(x - max)).ToArray();
-                    float sum = softmaxedOutput.Sum();
-                    float[] softmaxedOutput2 = softmaxedOutput.Select(x => x / sum).ToArray();
-
-                    // Get the index of the highest probability
-                    int maxIndex = softmaxedOutput2.ToList().IndexOf(softmaxedOutput2.Max());
-
-                    // Get the letter from the index
-                    char letter = (char)(maxIndex + 65);
-                    float accuracy = (Mathf.RoundToInt(softmaxedOutput2[maxIndex] * 100));
-
-                    // Set the letterProbabilities, currently used by Courses 
                    learnableProbabilities = new Dictionary<string, float>();
-                    for (int i = 0; i < softmaxedOutput2.Length; i++)
+
+                    // Temporary fix
+                    List<string> signs = new List<string>()
                    {
-                        learnableProbabilities.Add(((char)(i + 65)).ToString(), softmaxedOutput2[i]);
+                        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
+                        "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
+                    };
+
+
+
+                    for (int j = 0; j < result.Count; j++)
+                    {
+                        learnableProbabilities.Add(signs[j].ToUpper(), result[j]);
                    }
                    //Debug.Log($"prob = [{learnableProbabilities.Aggregate("   ", (t, kv) => $"{t}{kv.Key}:{kv.Value}   ")}]");
-                    foreach(Listener listener in listeners)
+                    foreach (Listener listener in listeners)
                    {
                        yield return listener.ProcessIncomingCall();
                    }
@@ -339,77 +367,85 @@ namespace Mediapipe.Unity.Tutorial
                    yield return null;
                }
            }
-        }

-        /// <summary>
-        /// Propper destruction on the Mediapipegraph
-        /// </summary>
-        private void OnDestroy()
-        {
-            if (webcamTexture != null)
-            {
-                webcamTexture.Stop();
-            }
-
-            if (graph != null)
-            {
-                try
-                {
-                    graph.CloseInputStream("input_video").AssertOk();
-                    graph.WaitUntilDone().AssertOk();
-                }
-                finally
-                {
-
-                    graph.Dispose();
-                }
-            }
-            // inputTensor must still be disposed, if it exists
-            inputTensor?.Dispose();
-            worker?.Dispose();
-        }
-
-        /// <summary>
-        /// So long as there are cameras to use, you swap the camera you are using to another in the list.
-        /// </summary>
-        public void SwapCam()
-        {
-            if (WebCamTexture.devices.Length > 0)
-            {
-                // Stop the old camera
-                // If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
-                if (webcamTexture.isPlaying)
-                {
-                    screen.texture = null;
-                    webcamTexture.Stop();
-                    webcamTexture = null;
-                }
-                // Find the new camera
-                camdex += 1;
-                camdex %= WebCamTexture.devices.Length;
-                // Start the new camera
-                WebCamDevice device = WebCamTexture.devices[camdex];
-                webcamTexture = new WebCamTexture(device.name);
-                screen.texture = webcamTexture;
-
-                webcamTexture.Play();
-            }
-        }
-        /// <summary>
-        /// Swaps the display screens
-        /// </summary>
-        public void SwapScreen()
-        {   
-            if(screen2.texture == null && screen.texture != null)
-            {
-                screen2.texture = webcamTexture;
-                screen.texture = null;
-            }
-            else if (screen2.texture != null && screen.texture == null)
-            {
-                screen.texture = webcamTexture;
-                screen2.texture = null;
-            }
+            yield return null;
        }
    }
+
+    /// <summary>
+    /// Propper destruction on the Mediapipegraph
+    /// </summary>
+    private void OnDestroy()
+    {
+        if (webcamTexture != null)
+        {
+            webcamTexture.Stop();
+        }
+
+        if (graph != null)
+        {
+            try
+            {
+                graph.CloseInputStream("input_video").AssertOk();
+                graph.WaitUntilDone().AssertOk();
+            }
+            finally
+            {
+                graph.Dispose();
+            }
+        }
+        if (asyncPredictor != null)
+        {
+            asyncPredictor.Dispose();
+        }
+    }
+
+    /// <summary>
+    /// So long as there are cameras to use, you swap the camera you are using to another in the list.
+    /// </summary>
+    public void SwapCam()
+    {
+        if (WebCamTexture.devices.Length > 0)
+        {
+            // Stop the old camera
+            // If there was no camera playing before, then you dont have to reset the texture, as it wasn't assigned in the first place.
+            if (webcamTexture.isPlaying)
+            {
+                screen.texture = null;
+                webcamTexture.Stop();
+                webcamTexture = null;
+            }
+            // Find the new camera
+            camdex += 1;
+            camdex %= WebCamTexture.devices.Length;
+            // Start the new camera
+            WebCamDevice device = WebCamTexture.devices[camdex];
+            webcamTexture = new WebCamTexture(device.name);
+            screen.texture = webcamTexture;
+
+            webcamTexture.Play();
+        }
+    }
+
+    /// <summary>
+    /// Swaps the display screens
+    /// </summary>
+    public void SwapScreen()
+    {
+        if (screen2.texture == null && screen.texture != null)
+        {
+            screen2.texture = webcamTexture;
+            screen.texture = null;
+        }
+        else if (screen2.texture != null && screen.texture == null)
+        {
+            screen.texture = webcamTexture;
+            screen2.texture = null;
+        }
+    }
+    public void ChangeModel(ModelIndex index)
+    {
+        this.modelList.SetCurrentModel(index);
+    }
+
 }
--- a/Assets/SpellingBee/Scripts/SpellingBeeController.cs
+++ b/Assets/SpellingBee/Scripts/SpellingBeeController.cs
@@ -179,7 +179,7 @@ public partial class SpellingBeeController : AbstractFeedback
    {
        StartController();

-        signPredictor.SetModel(currentTheme.modelIndex);
+        signPredictor.ChangeModel(ModelIndex.FINGERSPELLING);
        AddSelfAsListener();
    }
    /// <summary>
--- a/Packages/com.unity.barracuda/Editor.meta
+++ b/Packages/com.unity.barracuda/Editor.meta
@@ -1,8 +0,0 @@
-fileFormatVersion: 2
-guid: f6ebab52a13ea425ba87006839f1d776
-folderAsset: yes
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/BarracudaAnalytics.cs
+++ b/Packages/com.unity.barracuda/Editor/BarracudaAnalytics.cs
@@ -1,148 +0,0 @@
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using Onnx;
-using UnityEditor;
-using UnityEngine.Analytics;
-
-namespace Unity.Barracuda.Editor
-{
-    internal class BarracudaAnalytics
-    {
-        static bool s_EventRegistered = false;
-        const int k_MaxEventsPerHour = 1000;
-        const int k_MaxNumberOfElements = 1000;
-        const string k_VendorKey = "unity.barracuda";
-        const string k_ImportEventName = "uBarracudaImport";
-
-        static bool EnableAnalytics()
-        {
-            AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_ImportEventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
-            if (result == AnalyticsResult.Ok)
-                s_EventRegistered = true;
-
-            return s_EventRegistered;
-        }
-
-        struct BarracudaImportAnalyticsData
-        {
-            public string model_type;
-            public string original_layers;
-            public string imported_layers;
-            public string import_warnings;
-        }
-
-        public static void SendBarracudaImportEvent(object originalModel, Model importedModel)
-        {
-            //The event shouldn't be able to report if this is disabled but if we know we're not going to report
-            //Lets early out and not waste time gathering all the data
-            if (!EditorAnalytics.enabled)
-                return;
-
-            if (!EnableAnalytics())
-                return;
-
-
-            var data = new BarracudaImportAnalyticsData();
-
-            try
-            {
-                data.original_layers = AnalyzeONNXModel(originalModel);
-                data.imported_layers = AnalyzeNNModel(importedModel);
-                data.model_type = string.IsNullOrEmpty(data.original_layers) ? "NN" : "ONNX";
-                data.import_warnings = AnalyzeWarnings(importedModel);
-            }
-            catch (Exception e)
-            {
-                D.LogError($"Failed collecting Barracuda analytics: {e}");
-            }
-
-            EditorAnalytics.SendEventWithLimit(k_ImportEventName, data);
-        }
-
-        static string AnalyzeONNXModel(object originalModel)
-        {
-            if (!(originalModel is ModelProto))
-                return "";
-
-            var layers = new Dictionary<string, int>();
-
-            var onnxModel = originalModel as ModelProto;
-            foreach (var node in onnxModel.Graph.Node)
-            {
-                var layerDescription = node.OpType;
-
-                if (!layers.ContainsKey(layerDescription))
-                    layers[layerDescription] = 1;
-                else
-                    layers[layerDescription] += 1;
-            }
-
-            return DictionaryToJson(layers);
-        }
-
-        static string AnalyzeNNModel(Model importedModel)
-        {
-            var layers = new Dictionary<string, int>();
-
-            foreach (Layer layer in importedModel.layers)
-            {
-                var layerDescription = LayerToString(layer);
-
-                if (!layers.ContainsKey(layerDescription))
-                    layers[layerDescription] = 1;
-                else
-                    layers[layerDescription] += 1;
-            }
-
-            return DictionaryToJson(layers);
-        }
-
-        static string LayerToString(Layer layer)
-        {
-            var layerDescription = layer.type.ToString();
-
-            if (layer.type == Layer.Type.Conv2D || layer.type == Layer.Type.Conv2DTrans ||
-                layer.type == Layer.Type.Conv3D || layer.type == Layer.Type.Conv3DTrans ||
-                layer.type == Layer.Type.DepthwiseConv2D)
-            {
-                layerDescription += "_" + ConvShapeToString(layer);
-            }
-
-            if (layer.activation != Layer.Activation.None)
-                layerDescription += "_" + layer.activation.ToString();
-
-            return layerDescription;
-        }
-
-        static string ConvShapeToString(Layer layer)
-        {
-            if (layer.type == Layer.Type.Conv2D ||
-                layer.type == Layer.Type.DepthwiseConv2D ||
-                layer.type == Layer.Type.Conv2DTrans)
-                return string.Join("_",
-                    layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
-                        $"{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
-
-            if (layer.type == Layer.Type.Conv3D ||
-                layer.type == Layer.Type.Conv3DTrans)
-                return string.Join("_",
-                    layer.datasets.Where(d => d.name.EndsWith("/K")).Select(it =>
-                        $"{it.shape.kernelSpatialDepth}x{it.shape.kernelHeight}x{it.shape.kernelWidth}x{it.shape.kernelDepth}x{it.shape.kernelCount}"));
-
-            return "";
-        }
-
-        static string AnalyzeWarnings(Model importedModel)
-        {
-            return "[" + string.Join(",",importedModel.Warnings.Select(item => $"'{item.LayerName}:{item.Message}'")) + "]";
-        }
-
-        static string DictionaryToJson(Dictionary<string, int> dict)
-        {
-            var entries = dict.Select(d => $"\"{d.Key}\":{string.Join(",", d.Value)}");
-            return "{" + string.Join(",", entries) + "}";
-        }
-    }
-}
--- a/Packages/com.unity.barracuda/Editor/BarracudaAnalytics.cs.meta
+++ b/Packages/com.unity.barracuda/Editor/BarracudaAnalytics.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 92cb0e57f8c0c4255a2d2d93f844424d
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/NNModelIcon.png
+++ b/Packages/com.unity.barracuda/Editor/NNModelIcon.png
--- a/Packages/com.unity.barracuda/Editor/NNModelIcon.png.meta
+++ b/Packages/com.unity.barracuda/Editor/NNModelIcon.png.meta
@@ -1,106 +0,0 @@
-fileFormatVersion: 2
-guid: 8682ff569c4c7457a8a8e3a527aad537
-TextureImporter:
-  fileIDToRecycleName: {}
-  externalObjects: {}
-  serializedVersion: 4
-  mipmaps:
-    mipMapMode: 0
-    enableMipMap: 0
-    sRGBTexture: 0
-    linearTexture: 0
-    fadeOut: 0
-    borderMipMap: 0
-    mipMapsPreserveCoverage: 0
-    alphaTestReferenceValue: 0.5
-    mipMapFadeDistanceStart: 1
-    mipMapFadeDistanceEnd: 3
-  bumpmap:
-    convertToNormalMap: 0
-    externalNormalMap: 0
-    heightScale: 0.25
-    normalMapFilter: 0
-  isReadable: 0
-  grayScaleToAlpha: 0
-  generateCubemap: 6
-  cubemapConvolution: 0
-  seamlessCubemap: 0
-  textureFormat: 1
-  maxTextureSize: 2048
-  textureSettings:
-    serializedVersion: 2
-    filterMode: -1
-    aniso: 1
-    mipBias: -1
-    wrapU: 1
-    wrapV: 1
-    wrapW: -1
-  nPOTScale: 0
-  lightmap: 0
-  compressionQuality: 50
-  spriteMode: 0
-  spriteExtrude: 1
-  spriteMeshType: 1
-  alignment: 0
-  spritePivot: {x: 0.5, y: 0.5}
-  spritePixelsToUnits: 100
-  spriteBorder: {x: 0, y: 0, z: 0, w: 0}
-  spriteGenerateFallbackPhysicsShape: 1
-  alphaUsage: 1
-  alphaIsTransparency: 1
-  spriteTessellationDetail: -1
-  textureType: 2
-  textureShape: 1
-  maxTextureSizeSet: 0
-  compressionQualitySet: 0
-  textureFormatSet: 0
-  platformSettings:
-  - buildTarget: DefaultTexturePlatform
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 1
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - buildTarget: Standalone
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 1
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - buildTarget: iPhone
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 1
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - buildTarget: Android
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 1
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  spriteSheet:
-    serializedVersion: 2
-    sprites: []
-    outline: []
-    physicsShape: []
-  spritePackingTag: 
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/NNModelImporter.cs
+++ b/Packages/com.unity.barracuda/Editor/NNModelImporter.cs
@@ -1,63 +0,0 @@
-using System.IO;
-using Unity.Barracuda.Editor;
-using UnityEditor;
-using UnityEngine;
-#if UNITY_2020_2_OR_NEWER
-using UnityEditor.AssetImporters;
-using UnityEditor.Experimental.AssetImporters;
-#else
-using UnityEditor.Experimental.AssetImporters;
-#endif
-
-namespace Unity.Barracuda
-{
-    /// <summary>
-    /// Asset Importer of barracuda models.
-    /// </summary>
-    [ScriptedImporter(3, new[] {"nn"})]
-    public class NNModelImporter : ScriptedImporter {
-        private const string iconName = "NNModelIcon";
-
-        private Texture2D iconTexture;
-
-        /// <summary>
-        /// Scripted importer callback
-        /// </summary>
-        /// <param name="ctx">Asset import context</param>
-        public override void OnImportAsset(AssetImportContext ctx)
-        {
-            var model = File.ReadAllBytes(ctx.assetPath);
-
-            // Analyze model and send analytics if enabled
-            var nnModel = ModelLoader.Load(ctx.assetPath, skipWeights:true);
-            BarracudaAnalytics.SendBarracudaImportEvent(null, nnModel);
-
-            var assetData = ScriptableObject.CreateInstance<NNModelData>();
-            assetData.Value = model;
-            assetData.name = "Data";
-            assetData.hideFlags = HideFlags.HideInHierarchy;
-
-            var asset = ScriptableObject.CreateInstance<NNModel>();
-            asset.modelData = assetData;
-            ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
-            ctx.AddObjectToAsset("model data", assetData);
-
-            ctx.SetMainObject(asset);
-        }
-
-        private Texture2D LoadIconTexture()
-        {
-            if (iconTexture == null)
-            {
-                string[] allCandidates = AssetDatabase.FindAssets(iconName);
-
-                if (allCandidates.Length > 0)
-                {
-                    iconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
-                }
-            }
-            return iconTexture;
-        }
-
-    }
-}
--- a/Packages/com.unity.barracuda/Editor/NNModelImporter.cs.meta
+++ b/Packages/com.unity.barracuda/Editor/NNModelImporter.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 19ed1486aa27d4903b34839f37b8f69f
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/ONNXModelIcon.png
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelIcon.png
--- a/Packages/com.unity.barracuda/Editor/ONNXModelIcon.png.meta
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelIcon.png.meta
@@ -1,165 +0,0 @@
-fileFormatVersion: 2
-guid: 44179f4142e33e24ca4feb8dfe55e56c
-TextureImporter:
-  fileIDToRecycleName: {}
-  externalObjects: {}
-  serializedVersion: 9
-  mipmaps:
-    mipMapMode: 0
-    enableMipMap: 0
-    sRGBTexture: 1
-    linearTexture: 0
-    fadeOut: 0
-    borderMipMap: 0
-    mipMapsPreserveCoverage: 0
-    alphaTestReferenceValue: 0.5
-    mipMapFadeDistanceStart: 1
-    mipMapFadeDistanceEnd: 3
-  bumpmap:
-    convertToNormalMap: 0
-    externalNormalMap: 0
-    heightScale: 0.25
-    normalMapFilter: 0
-  isReadable: 0
-  streamingMipmaps: 0
-  streamingMipmapsPriority: 0
-  grayScaleToAlpha: 0
-  generateCubemap: 6
-  cubemapConvolution: 0
-  seamlessCubemap: 0
-  textureFormat: 1
-  maxTextureSize: 2048
-  textureSettings:
-    serializedVersion: 2
-    filterMode: -1
-    aniso: -1
-    mipBias: -100
-    wrapU: -1
-    wrapV: -1
-    wrapW: -1
-  nPOTScale: 1
-  lightmap: 0
-  compressionQuality: 50
-  spriteMode: 0
-  spriteExtrude: 1
-  spriteMeshType: 1
-  alignment: 0
-  spritePivot: {x: 0.5, y: 0.5}
-  spritePixelsToUnits: 100
-  spriteBorder: {x: 0, y: 0, z: 0, w: 0}
-  spriteGenerateFallbackPhysicsShape: 1
-  alphaUsage: 1
-  alphaIsTransparency: 0
-  spriteTessellationDetail: -1
-  textureType: 0
-  textureShape: 1
-  singleChannelComponent: 0
-  maxTextureSizeSet: 0
-  compressionQualitySet: 0
-  textureFormatSet: 0
-  platformSettings:
-  - serializedVersion: 2
-    buildTarget: DefaultTexturePlatform
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: Standalone
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: iPhone
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: tvOS
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: Android
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: PS4
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: Windows Store Apps
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  - serializedVersion: 2
-    buildTarget: WebGL
-    maxTextureSize: 2048
-    resizeAlgorithm: 0
-    textureFormat: -1
-    textureCompression: 0
-    compressionQuality: 50
-    crunchedCompression: 0
-    allowsAlphaSplitting: 0
-    overridden: 0
-    androidETC2FallbackOverride: 0
-  spriteSheet:
-    serializedVersion: 2
-    sprites: []
-    outline: []
-    physicsShape: []
-    bones: []
-    spriteID: 
-    vertices: []
-    indices: 
-    edges: []
-    weights: []
-  spritePackingTag: 
-  pSDRemoveMatte: 0
-  pSDShowRemoveMatteOption: 0
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/ONNXModelImporter.cs
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelImporter.cs
@@ -1,106 +0,0 @@
-using UnityEngine;
-using UnityEditor;
-#if UNITY_2020_2_OR_NEWER
-using UnityEditor.AssetImporters;
-using UnityEditor.Experimental.AssetImporters;
-#else
-using UnityEditor.Experimental.AssetImporters;
-#endif
-using System;
-using System.IO;
-using System.Runtime.CompilerServices;
-using Unity.Barracuda.Editor;
-using Unity.Barracuda.ONNX;
-
-[assembly: InternalsVisibleToAttribute("Barracuda.EditorTests")]
-[assembly: InternalsVisibleToAttribute("Unity.Barracuda.Tests")]
-
-namespace Unity.Barracuda
-{
-    /// <summary>
-    /// Asset Importer for Open Neural Network Exchange (ONNX) files.
-    /// For more information about ONNX file format see: https://github.com/onnx/onnx
-    /// </summary>
-    [ScriptedImporter(34, new[] { "onnx" })]
-    public class ONNXModelImporter : ScriptedImporter
-    {
-        // Configuration
-        /// <summary>
-        /// Enable ONNX model optimization during import. Set via importer UI
-        /// </summary>
-        public bool optimizeModel = true;
-
-        /// <summary>
-        /// Fix batch size for ONNX models. Set via importer UI
-        /// </summary>
-        public bool forceArbitraryBatchSize = true;
-
-        /// <summary>
-        /// Treat errors as warnings. Set via importer UI
-        /// </summary>
-        public bool treatErrorsAsWarnings = false;
-
-        [SerializeField, HideInInspector]
-        internal ONNXModelConverter.ImportMode importMode = ONNXModelConverter.ImportMode.Standard;
-
-        [SerializeField, HideInInspector]
-        internal ONNXModelConverter.DataTypeMode weightsTypeMode = ONNXModelConverter.DataTypeMode.Default;
-        [SerializeField, HideInInspector]
-        internal ONNXModelConverter.DataTypeMode activationTypeMode = ONNXModelConverter.DataTypeMode.Default;
-
-        internal const string iconName = "ONNXModelIcon";
-
-
-        private Texture2D m_IconTexture;
-
-        /// <summary>
-        /// Scripted importer callback
-        /// </summary>
-        /// <param name="ctx">Asset import context</param>
-        public override void OnImportAsset(AssetImportContext ctx)
-        {
-            ONNXModelConverter.ModelImported += BarracudaAnalytics.SendBarracudaImportEvent;
-            var converter = new ONNXModelConverter(optimizeModel, treatErrorsAsWarnings, forceArbitraryBatchSize, importMode);
-
-            var model = converter.Convert(ctx.assetPath);
-
-            if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceHalf)
-                model.ConvertWeights(DataType.Half);
-            else if (weightsTypeMode == ONNXModelConverter.DataTypeMode.ForceFloat)
-                model.ConvertWeights(DataType.Float);
-
-            NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
-            using (var memoryStream = new MemoryStream())
-            using (var writer = new BinaryWriter(memoryStream))
-            {
-                ModelWriter.Save(writer, model);
-                assetData.Value = memoryStream.ToArray();
-            }
-            assetData.name = "Data";
-            assetData.hideFlags = HideFlags.HideInHierarchy;
-
-            NNModel asset = ScriptableObject.CreateInstance<NNModel>();
-            asset.modelData = assetData;
-
-            ctx.AddObjectToAsset("main obj", asset, LoadIconTexture());
-            ctx.AddObjectToAsset("model data", assetData);
-
-            ctx.SetMainObject(asset);
-        }
-
-        // Icon helper
-        private Texture2D LoadIconTexture()
-        {
-            if (m_IconTexture == null)
-            {
-                string[] allCandidates = AssetDatabase.FindAssets(iconName);
-
-                if (allCandidates.Length > 0)
-                {
-                    m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
-                }
-            }
-            return m_IconTexture;
-        }
-    }
-}
--- a/Packages/com.unity.barracuda/Editor/ONNXModelImporter.cs.meta
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelImporter.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 683b6cb6d0a474744822c888b46772c9
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/ONNXModelImporterEditor.cs
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelImporterEditor.cs
@@ -1,461 +0,0 @@
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-using UnityEditor;
-#if UNITY_2020_2_OR_NEWER
-using UnityEditor.AssetImporters;
-using UnityEditor.Experimental.AssetImporters;
-#else
-using UnityEditor.Experimental.AssetImporters;
-#endif
-using UnityEngine;
-using System;
-using System.IO;
-using System.Reflection;
-using Unity.Barracuda.ONNX;
-using ImportMode=Unity.Barracuda.ONNX.ONNXModelConverter.ImportMode;
-using DataTypeMode=Unity.Barracuda.ONNX.ONNXModelConverter.DataTypeMode;
-
-namespace Unity.Barracuda.Editor
-{
-/// <summary>
-/// Asset Importer Editor of ONNX models
-/// </summary>
-[CustomEditor(typeof(ONNXModelImporter))]
-[CanEditMultipleObjects]
-public class ONNXModelImporterEditor : ScriptedImporterEditor
-{
-    static PropertyInfo s_InspectorModeInfo;
-    static ONNXModelImporterEditor()
-    {
-        s_InspectorModeInfo = typeof(SerializedObject).GetProperty("inspectorMode", BindingFlags.NonPublic | BindingFlags.Instance);
-    }
-
-    /// <summary>
-    /// Scripted importer editor UI callback
-    /// </summary>
-    public override void OnInspectorGUI()
-    {
-        var onnxModelImporter = target as ONNXModelImporter;
-        if (onnxModelImporter == null)
-            return;
-
-        InspectorMode inspectorMode = InspectorMode.Normal;
-        if (s_InspectorModeInfo != null)
-            inspectorMode = (InspectorMode)s_InspectorModeInfo.GetValue(assetSerializedObject);
-
-        serializedObject.Update();
-
-        bool debugView = inspectorMode != InspectorMode.Normal;
-        SerializedProperty iterator = serializedObject.GetIterator();
-        for (bool enterChildren = true; iterator.NextVisible(enterChildren); enterChildren = false)
-        {
-            if (iterator.propertyPath != "m_Script")
-                EditorGUILayout.PropertyField(iterator, true);
-        }
-
-        // Additional options exposed from ImportMode
-        SerializedProperty importModeProperty = serializedObject.FindProperty(nameof(onnxModelImporter.importMode));
-        bool skipMetadataImport = ((ImportMode)importModeProperty.intValue).HasFlag(ImportMode.SkipMetadataImport);
-        if (EditorGUILayout.Toggle("Skip Metadata Import", skipMetadataImport) != skipMetadataImport)
-        {
-            importModeProperty.intValue ^= (int)ImportMode.SkipMetadataImport;
-        }
-
-        if (debugView)
-        {
-            importModeProperty.intValue = (int)(ImportMode)EditorGUILayout.EnumFlagsField("Import Mode", (ImportMode)importModeProperty.intValue);
-
-            SerializedProperty weightsTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.weightsTypeMode));
-            SerializedProperty activationTypeMode = serializedObject.FindProperty(nameof(onnxModelImporter.activationTypeMode));
-            weightsTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Weights type", (DataTypeMode)weightsTypeMode.intValue);
-            activationTypeMode.intValue = (int)(DataTypeMode)EditorGUILayout.EnumPopup("Activation type", (DataTypeMode)activationTypeMode.intValue);
-        }
-        else
-        {
-            if (onnxModelImporter.optimizeModel)
-                EditorGUILayout.HelpBox("Model optimizations are on\nRemove and re-import model if you observe incorrect behavior", MessageType.Info);
-
-            if (onnxModelImporter.importMode == ImportMode.Legacy)
-                EditorGUILayout.HelpBox("Legacy importer is in use", MessageType.Warning);
-        }
-
-        serializedObject.ApplyModifiedProperties();
-
-        ApplyRevertGUI();
-    }
-}
-
-/// <summary>
-/// Asset Importer Editor of NNModel (the serialized file generated by ONNXModelImporter)
-/// </summary>
-[CustomEditor(typeof(NNModel))]
-public class NNModelEditor : UnityEditor.Editor
-{
-    // Use a static store for the foldouts, so it applies to all inspectors
-    static Dictionary<string, bool> s_UIHelperFoldouts = new Dictionary<string, bool>();
-
-    private Model m_Model;
-    private List<string> m_Inputs = new List<string>();
-    private List<string> m_InputsDesc = new List<string>();
-    private List<string> m_Outputs = new List<string>();
-    private List<string> m_OutputsDesc = new List<string>();
-    private List<string> m_Memories = new List<string>();
-    private List<string> m_MemoriesDesc = new List<string>();
-    private List<string> m_Layers = new List<string>();
-    private List<string> m_LayersDesc = new List<string>();
-    private List<string> m_Constants = new List<string>();
-    private List<string> m_ConstantsDesc = new List<string>();
-
-    Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
-    Vector2 m_MetadataScrollPosition = Vector2.zero;
-    // warnings
-    private Dictionary<string, string> m_WarningsNeutral = new Dictionary<string, string>();
-    private Dictionary<string, string> m_WarningsInfo = new Dictionary<string, string>();
-    private Dictionary<string, string> m_WarningsWarning = new Dictionary<string, string>();
-    private Dictionary<string, string> m_WarningsError = new Dictionary<string, string>();
-    private Vector2 m_WarningsNeutralScrollPosition = Vector2.zero;
-    private Vector2 m_WarningsInfoScrollPosition = Vector2.zero;
-    private Vector2 m_WarningsWarningScrollPosition = Vector2.zero;
-    private Vector2 m_WarningsErrorScrollPosition = Vector2.zero;
-
-
-    private long m_NumEmbeddedWeights;
-    private long m_NumConstantWeights;
-    private long m_TotalWeightsSizeInBytes;
-
-    private Vector2 m_InputsScrollPosition = Vector2.zero;
-    private Vector2 m_OutputsScrollPosition = Vector2.zero;
-    private Vector2 m_MemoriesScrollPosition = Vector2.zero;
-    private Vector2 m_LayerScrollPosition = Vector2.zero;
-    private Vector2 m_ConstantScrollPosition = Vector2.zero;
-    private const float k_Space = 5f;
-
-    private Texture2D m_IconTexture;
-    private Texture2D LoadIconTexture()
-    {
-        if (m_IconTexture != null)
-            return m_IconTexture;
-
-        string[] allCandidates = AssetDatabase.FindAssets(ONNXModelImporter.iconName);
-        if (allCandidates.Length > 0)
-            m_IconTexture = AssetDatabase.LoadAssetAtPath(AssetDatabase.GUIDToAssetPath(allCandidates[0]), typeof(Texture2D)) as Texture2D;
-
-        return m_IconTexture;
-    }
-
-    /// <summary>
-    /// Editor static preview rendering callback
-    /// </summary>
-    /// <param name="assetPath">Asset path</param>
-    /// <param name="subAssets">Child assets</param>
-    /// <param name="width">width</param>
-    /// <param name="height">height</param>
-    /// <returns></returns>
-    public override Texture2D RenderStaticPreview(string assetPath, UnityEngine.Object[] subAssets, int width, int height)
-    {
-        Texture2D icon = LoadIconTexture();
-        if (icon == null)
-            return null;
-        Texture2D tex = new Texture2D(width, height);
-        EditorUtility.CopySerialized(icon, tex);
-        return tex;
-    }
-
-    private void AddDimension(StringBuilder stringBuilder, string name, int value, bool lastDim=false)
-    {
-        string strValue = (value >= 1) ? value.ToString() : "*";
-        stringBuilder.AppendFormat("{0}:{1}", name, strValue);
-        if (!lastDim)
-            stringBuilder.Append(", ");
-    }
-
-    private string GetUIStringFromShape(int[] shape)
-    {
-        StringBuilder stringBuilder = new StringBuilder("shape: (", 50);
-        if (shape.Length == 8)
-        {
-            bool is8D = (shape[0] > 1 || shape[1] > 1 || shape[3] > 1 || shape[4] > 1);
-            if (is8D) AddDimension(stringBuilder, "s", shape[0]);
-            if (is8D) AddDimension(stringBuilder, "r", shape[1]);
-                      AddDimension(stringBuilder, "n", shape[2]);
-            if (is8D) AddDimension(stringBuilder, "t", shape[3]);
-            if (is8D) AddDimension(stringBuilder, "d", shape[4]);
-                      AddDimension(stringBuilder, "h", shape[5]);
-                      AddDimension(stringBuilder, "w", shape[6]);
-                      AddDimension(stringBuilder, "c", shape[7], true);
-        }
-        else
-        {
-            UnityEngine.Debug.Assert(shape.Length == 4);
-            AddDimension(stringBuilder, "n", shape[0]);
-            AddDimension(stringBuilder, "h", shape[1]);
-            AddDimension(stringBuilder, "w", shape[2]);
-            AddDimension(stringBuilder, "c", shape[3], true);
-        }
-        stringBuilder.Append(")");
-        return stringBuilder.ToString();
-    }
-
-    void OnEnable()
-    {
-        var nnModel = target as NNModel;
-        if (nnModel == null)
-            return;
-        if (nnModel.modelData == null)
-            return;
-
-        m_Model = nnModel.GetDeserializedModel();
-        if (m_Model == null)
-            return;
-
-        m_Inputs = m_Model.inputs.Select(i => i.name).ToList();
-        m_InputsDesc = m_Model.inputs.Select(i => GetUIStringFromShape(i.shape)).ToList();
-        m_Outputs = m_Model.outputs.ToList();
-
-        bool allKnownInputShapes = true;
-        var inputShapes = new Dictionary<string, TensorShape>();
-        foreach (var i in m_Model.inputs)
-        {
-            allKnownInputShapes = allKnownInputShapes && ModelAnalyzer.IsInputShapeAcceptablyKnowForShapeInference(i);
-            if (!allKnownInputShapes)
-                break;
-            inputShapes.Add(i.name, new TensorShape(i.shape));
-        }
-        if (allKnownInputShapes)
-        {
-            m_OutputsDesc = m_Model.outputs.Select(i => {
-                string output = "shape: (n:*, h:*, w:*, c:*)";
-                try
-                {
-                    TensorShape shape;
-                    if (ModelAnalyzer.TryGetOutputTensorShape(m_Model, inputShapes, i, out shape))
-                        output = GetUIStringFromShape(shape.ToArray());
-                }
-                catch (Exception e)
-                {
-                    Debug.LogError($"Unexpected error while evaluating model output {i}. {e}");
-                }
-                return output; }).ToList();
-        }
-        else
-        {
-            m_OutputsDesc = m_Model.outputs.Select(i => "shape: (n:*, h:*, w:*, c:*)").ToList();
-        }
-
-        m_Memories = m_Model.memories.Select(i => i.input).ToList();
-        m_MemoriesDesc = m_Model.memories.Select(i => $"shape:{i.shape.ToString()} output:{i.output}").ToList();
-
-        var layers = m_Model.layers.Where(i => i.type != Layer.Type.Load);
-        var constants = m_Model.layers.Where(i => i.type == Layer.Type.Load);
-
-        m_Layers        = layers.Select(i => i.type.ToString()).ToList();
-        m_LayersDesc    = layers.Select(i => i.ToString()).ToList();
-        m_Constants     = constants.Select(i => i.type.ToString()).ToList();
-        m_ConstantsDesc = constants.Select(i => i.ToString()).ToList();
-
-        m_NumEmbeddedWeights = layers.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
-        m_NumConstantWeights = constants.Sum(l => (long)l.datasets.Sum(ds => (long)ds.length));
-
-        // weights are not loaded for UI, recompute size
-        m_TotalWeightsSizeInBytes = 0;
-        for (var l = 0; l < m_Model.layers.Count; ++l)
-            for (var d = 0; d < m_Model.layers[l].datasets.Length; ++d)
-                m_TotalWeightsSizeInBytes += m_Model.layers[l].datasets[d].length * m_Model.layers[l].datasets[d].itemSizeInBytes;
-
-        m_Metadata = new Dictionary<string, string>(m_Model.Metadata);
-
-        for (int i = 0; i < m_Model.Warnings.Count; i++)
-        {
-            var warning = m_Model.Warnings[i].LayerName;
-            var warningDesc = m_Model.Warnings[i].Message;
-            MessageType messageType = MessageType.Warning;
-            if(warningDesc.StartsWith("MessageType"))
-            {
-                messageType = (MessageType)(warningDesc[12] - '0');
-                warningDesc = warningDesc.Substring(13);
-            }
-
-            switch (messageType)
-            {
-                case MessageType.None:
-                    m_WarningsNeutral[warning] = warningDesc;
-                    break;
-                case MessageType.Info:
-                    m_WarningsInfo[warning] = warningDesc;
-                    break;
-                case MessageType.Warning:
-                    m_WarningsWarning[warning] = warningDesc;
-                    break;
-                case MessageType.Error:
-                    m_WarningsError[warning] = warningDesc;
-                    break;
-            }
-        }
-    }
-
-    private void OpenNNModelAsTempFileButton(NNModel nnModel)
-    {
-        if (nnModel == null)
-            return;
-        if (nnModel.modelData == null)
-            return;
-
-        if (GUILayout.Button("Open imported NN model as temp file"))
-        {
-            string tempPath = Application.temporaryCachePath;
-            string filePath = Path.Combine(tempPath, nnModel.name);
-            string filePathWithExtension = Path.ChangeExtension(filePath, "nn");
-            File.WriteAllBytes(filePathWithExtension, nnModel.modelData.Value);
-            System.Diagnostics.Process.Start(filePathWithExtension);
-        }
-    }
-
-    /// <summary>
-    /// Editor UI rendering callback
-    /// </summary>
-    public override void OnInspectorGUI()
-    {
-        if (m_Model == null)
-            return;
-
-        // HACK: When inspector settings are applied and the file is re-imported there doesn't seem to be a clean way to
-        // get a notification from Unity, so we detect this change
-        var nnModel = target as NNModel;
-        if (nnModel && m_Model != nnModel.GetDeserializedModel())
-            OnEnable(); // Model data changed underneath while inspector was active, so reload
-
-        GUI.enabled = true;
-        OpenNNModelAsTempFileButton(nnModel);
-        GUILayout.Label($"Source: {m_Model.IrSource}");
-        GUILayout.Label($"Version: {m_Model.IrVersion}");
-        GUILayout.Label($"Producer Name: {m_Model.ProducerName}");
-
-        if (m_Metadata.Any())
-        {
-            ListUIHelper($"Metadata {m_Metadata.Count}",
-                m_Metadata.Keys.ToList(), m_Metadata.Values.ToList(), ref m_MetadataScrollPosition);
-        }
-
-        if(m_WarningsError.Any())
-        {
-            ListUIHelper($"Errors {m_WarningsError.Count.ToString()}", m_WarningsError.Keys.ToList(), m_WarningsError.Values.ToList(), ref m_WarningsErrorScrollPosition);
-            EditorGUILayout.HelpBox("Model contains errors. Behavior might be incorrect", MessageType.Error, true);
-        }
-        if(m_WarningsWarning.Any())
-        {
-            ListUIHelper($"Warnings {m_WarningsWarning.Count.ToString()}", m_WarningsWarning.Keys.ToList(), m_WarningsWarning.Values.ToList(), ref m_WarningsWarningScrollPosition);
-            EditorGUILayout.HelpBox("Model contains warnings. Behavior might be incorrect", MessageType.Warning, true);
-        }
-        if(m_WarningsInfo.Any())
-        {
-            ListUIHelper($"Information: ", m_WarningsInfo.Keys.ToList(), m_WarningsInfo.Values.ToList(), ref m_WarningsInfoScrollPosition);
-            EditorGUILayout.HelpBox("Model contains import information.", MessageType.Info, true);
-        }
-        if(m_WarningsNeutral.Any())
-        {
-            ListUIHelper($"Comments: ", m_WarningsNeutral.Keys.ToList(), m_WarningsNeutral.Values.ToList(), ref m_WarningsNeutralScrollPosition);
-        }
-        var constantWeightInfo = m_Constants.Count > 0 ? $" using {m_NumConstantWeights:n0} weights" : "";
-        ListUIHelper($"Inputs ({m_Inputs.Count})", m_Inputs, m_InputsDesc, ref m_InputsScrollPosition);
-        ListUIHelper($"Outputs ({m_Outputs.Count})", m_Outputs, m_OutputsDesc, ref m_OutputsScrollPosition);
-        ListUIHelper($"Memories ({m_Memories.Count})", m_Memories, m_MemoriesDesc, ref m_MemoriesScrollPosition);
-        ListUIHelper($"Layers ({m_Layers.Count} using {m_NumEmbeddedWeights:n0} embedded weights)", m_Layers, m_LayersDesc, ref m_LayerScrollPosition, m_Constants.Count == 0 ? 1.5f: 1f);
-        ListUIHelper($"Constants ({m_Constants.Count}{constantWeightInfo})", m_Constants, m_ConstantsDesc, ref m_ConstantScrollPosition);
-
-        GUILayout.Label($"Total weight size: {m_TotalWeightsSizeInBytes:n0} bytes");
-    }
-
-    private static void ListUIHelper(string sectionTitle, IReadOnlyList<string> names, IReadOnlyList<string> descriptions, ref Vector2 scrollPosition, float maxHeightMultiplier = 1f)
-    {
-        int n = names.Count();
-        UnityEngine.Debug.Assert(descriptions.Count == n);
-        if (descriptions.Count < n)
-            return;
-
-        GUILayout.Space(k_Space);
-        if (!s_UIHelperFoldouts.TryGetValue(sectionTitle, out bool foldout))
-            foldout = true;
-
-        foldout = EditorGUILayout.Foldout(foldout, sectionTitle, true, EditorStyles.foldoutHeader);
-        s_UIHelperFoldouts[sectionTitle] = foldout;
-        if (foldout)
-        {
-            // GUILayout.Label(sectionTitle, EditorStyles.boldLabel);
-            float height = Mathf.Min(n * 20f + 2f, 150f * maxHeightMultiplier);
-            if (n == 0)
-                return;
-
-            scrollPosition = GUILayout.BeginScrollView(scrollPosition, GUI.skin.box, GUILayout.MinHeight(height));
-            Event e = Event.current;
-            float lineHeight = 16.0f;
-
-            StringBuilder fullText = new StringBuilder();
-            fullText.Append(sectionTitle);
-            fullText.AppendLine();
-            for (int i = 0; i < n; ++i)
-            {
-                string name = names[i];
-                string description = descriptions[i];
-                fullText.Append($"{name} {description}");
-                fullText.AppendLine();
-            }
-
-            for (int i = 0; i < n; ++i)
-            {
-                Rect r = EditorGUILayout.GetControlRect(false, lineHeight);
-
-                string name = names[i];
-                string description = descriptions[i];
-
-                // Context menu, "Copy"
-                if (e.type == EventType.ContextClick && r.Contains(e.mousePosition))
-                {
-                    e.Use();
-                    var menu = new GenericMenu();
-
-                    // need to copy current value to be used in delegate
-                    // (C# closures close over variables, not their values)
-                    menu.AddItem(new GUIContent($"Copy current line"), false, delegate
-                    {
-                        EditorGUIUtility.systemCopyBuffer = $"{name} {description}";
-                    });
-                    menu.AddItem(new GUIContent($"Copy section"), false, delegate
-                    {
-                        EditorGUIUtility.systemCopyBuffer = fullText.ToString();
-                    });
-                    menu.ShowAsContext();
-                }
-
-                // Color even line for readability
-                if (e.type == EventType.Repaint)
-                {
-                    GUIStyle st = "CN EntryBackEven";
-                    if ((i & 1) == 0)
-                        st.Draw(r, false, false, false, false);
-                }
-
-                // layer name on the right side
-                Rect locRect = r;
-                locRect.xMax = locRect.xMin;
-                GUIContent gc = new GUIContent(name.ToString(CultureInfo.InvariantCulture));
-
-                // calculate size so we can left-align it
-                Vector2 size = EditorStyles.miniBoldLabel.CalcSize(gc);
-                locRect.xMax += size.x;
-                GUI.Label(locRect, gc, EditorStyles.miniBoldLabel);
-                locRect.xMax += 2;
-
-                // message
-                Rect msgRect = r;
-                msgRect.xMin = locRect.xMax;
-                GUI.Label(msgRect, new GUIContent(description.ToString(CultureInfo.InvariantCulture)), EditorStyles.miniLabel);
-            }
-
-            GUILayout.EndScrollView();
-        }
-    }
-}
-
-}
--- a/Packages/com.unity.barracuda/Editor/ONNXModelImporterEditor.cs.meta
+++ b/Packages/com.unity.barracuda/Editor/ONNXModelImporterEditor.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 08ecb3218a86c6741aed5b2a299b203b
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Editor/Unity.Barracuda.Editor.asmdef
+++ b/Packages/com.unity.barracuda/Editor/Unity.Barracuda.Editor.asmdef
@@ -1,17 +0,0 @@
-{
-    "name": "Unity.Barracuda.Editor",
-    "references": [
-        "Unity.Barracuda",
-        "Unity.Barracuda.ONNX"
-    ],
-    "optionalUnityReferences": [],
-    "includePlatforms": [
-        "Editor"
-    ],
-    "excludePlatforms": [],
-    "allowUnsafeCode": false,
-    "overrideReferences": false,
-    "precompiledReferences": [],
-    "autoReferenced": true,
-    "defineConstraints": []
-}
--- a/Packages/com.unity.barracuda/Editor/Unity.Barracuda.Editor.asmdef.meta
+++ b/Packages/com.unity.barracuda/Editor/Unity.Barracuda.Editor.asmdef.meta
@@ -1,7 +0,0 @@
-fileFormatVersion: 2
-guid: 9f1e7d835703842dda0e25142ed6c3c9
-AssemblyDefinitionImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime.meta
+++ b/Packages/com.unity.barracuda/Runtime.meta
@@ -1,8 +0,0 @@
-fileFormatVersion: 2
-guid: a03a1fa0e3b784e19a9e9d31b945b252
-folderAsset: yes
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core.meta
@@ -1,8 +0,0 @@
-fileFormatVersion: 2
-guid: 5bec48e8f6ff349488387cf35fbae752
-folderAsset: yes
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/AssemblyInfo.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/AssemblyInfo.cs
@@ -1,7 +0,0 @@
-using System.Reflection;
-
-// DON'T EDIT
-// Will be replaced by Tools/Build/build.py
-[assembly: AssemblyVersion("3.0.0.0")]
-[assembly: AssemblyFileVersion("3.0.0.0")]
-
--- a/Packages/com.unity.barracuda/Runtime/Core/AssemblyInfo.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/AssemblyInfo.cs.meta
@@ -1,3 +0,0 @@
-fileFormatVersion: 2
-guid: f7f9574517c146ada866c486dc392731
-timeCreated: 1533296387
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends.meta
@@ -1,8 +0,0 @@
-fileFormatVersion: 2
-guid: 12a6bedd18899cd4189f66d8188f29ff
-folderAsset: yes
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackends.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackends.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackends.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackends.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 67f00a1befd4144eca5685250d893f09
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackendsFactory.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackendsFactory.cs
@@ -1,194 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq; // ToList()
-using UnityEngine;
-using UnityEngine.Assertions;
-
-namespace Unity.Barracuda {
-
-
-internal class BarracudaBackendsFactory
-{
-    public static WorkerFactory.Type ResolveAutoType(WorkerFactory.Type type)
-    {
-        if (type != WorkerFactory.Type.Auto)
-            return type;
-        return GetBestTypeForDevice(WorkerFactory.Device.Auto);
-    }
-
-    internal static WorkerFactory.Type GetBestTypeForDevice(WorkerFactory.Device device)
-    {
-        switch (device)
-        {
-            case WorkerFactory.Device.Auto:
-            case WorkerFactory.Device.GPU:
-                return WorkerFactory.Type.ComputePrecompiled;
-            default:
-                return WorkerFactory.Type.CSharpBurst;
-        }
-    }
-
-    internal static WorkerFactory.Type ValidateType(WorkerFactory.Type type)
-    {
-        type = ResolveAutoType(type);
-        Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
-
-        if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !ComputeShaderSingleton.Instance.supported)
-        {
-            type = WorkerFactory.Type.PixelShader;
-        }
-
-        return type;
-    }
-
-    private static IOps CreateOps(WorkerFactory.Type type, ITensorAllocator allocator, bool verbose)
-    {
-        switch(type)
-        {
-        case WorkerFactory.Type.ComputePrecompiled:
-            return new PrecompiledComputeOps(allocator, verbose);
-
-        case WorkerFactory.Type.Compute:
-            return new ComputeOps(allocator, verbose);
-
-        case WorkerFactory.Type.ComputeRef:
-            return new ReferenceComputeOps(allocator);
-
-        case WorkerFactory.Type.PixelShader:
-            return new PixelShaderOps(allocator);
-
-        case WorkerFactory.Type.CSharpBurst:
-            return new BurstCPUOps(allocator);
-
-        case WorkerFactory.Type.CSharp:
-            return new UnsafeArrayCPUOps(allocator);
-
-        default:
-            return new ReferenceCPUOps(allocator);
-        }
-    }
-
-    internal static IWorker CreateWorker(WorkerFactory.Type type, Model model, string[] additionalOutputs, string[] trimOutputs, WorkerFactory.WorkerConfiguration workerConfiguration, IModelExecutionsReporter modelExecutionsReporter = null)
-    {
-        type = ResolveAutoType(type);
-        var compareAgainstType = ResolveAutoType(workerConfiguration.compareAgainstType);
-        Assert.AreNotEqual(type, WorkerFactory.Type.Auto);
-        Assert.AreNotEqual(compareAgainstType, WorkerFactory.Type.Auto);
-
-        bool compare = type != compareAgainstType;
-
-        if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) && !SystemInfo.supportsComputeShaders && !Application.isEditor)
-        {
-            type = WorkerFactory.Type.PixelShader;
-        }
-
-        IVars vars;
-        // PixelShader worker uses Blit/Textures, cannot re-use vars unless the dispatch mechanism allows rendering to sub part of the texture
-        if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
-            vars = new GenericVarsWithReuse();
-        else
-        {
-            if (WorkerFactory.IsType(type, WorkerFactory.Device.GPU) || WorkerFactory.IsType(compareAgainstType, WorkerFactory.Device.GPU))
-                vars = new ComputeVarsWithSharedModel();
-            else
-                vars = new DefaultVars();
-        }
-
-        ITensorAllocator allocator = vars.GetAllocator();
-        if ((type == WorkerFactory.Type.PixelShader) || (compareAgainstType == WorkerFactory.Type.PixelShader))
-            allocator = new TensorCachingByShapeAllocator();
-       
-        if (workerConfiguration.verbose)
-            D.Log($"Storage type: {vars.GetType()}. Allocator type: {allocator.GetType()}.");
-
-        IOps ops = CreateOps(type, allocator, workerConfiguration.verbose);
-
-        if (compare)
-            ops = new CompareOps(ops,
-                CreateOps(compareAgainstType, allocator, workerConfiguration.verbose), workerConfiguration.compareLogLevel, workerConfiguration.compareEpsilon);
-
-        if (workerConfiguration.verbose || modelExecutionsReporter != null)
-            ops = new VerboseOps(ops, workerConfiguration.verbose);
-
-        if (Application.isEditor || modelExecutionsReporter != null)
-            ops = new StatsOps(ops);
-
-        model = ValidateModel(
-            PatchModel(model, additionalOutputs, trimOutputs));
-
-        ops.SetModelExecutionsReporter(modelExecutionsReporter);
-        return new GenericWorker(model, ops, vars, workerConfiguration.verbose, workerConfiguration.takeoverWeights);
-    }
-
-    internal static Model PatchModel(Model model, string[] additionalOutputs, string[] trimOutputs = null)
-    {
-        bool trimModel = trimOutputs != null;
-
-        if (trimOutputs != null)
-        {
-            foreach (var o in trimOutputs.Except(model.outputs))
-                if (additionalOutputs == null || !additionalOutputs.Contains(o))
-                    D.LogWarning($"Output specified in trimOutputs was not found in the model: {o}");
-
-            var newModel = model.ShallowCopy();
-            newModel.outputs = trimOutputs.Intersect(model.outputs).ToList();
-            model = newModel;
-        }
-
-        if (additionalOutputs != null)
-        {
-            foreach (var o in additionalOutputs.Except(model.layers.Select(l => l.name)))
-                D.LogWarning($"Layer specified in additionalOutputs was not found in the model: {o}");
-
-            // 'new' means that output name does not yet exist in model.outputs
-            // 'valid' means that output name matches one of the existing model.layer names
-             var newAndValidAdditionalOutputs =
-                additionalOutputs.Except(model.outputs).Intersect(model.layers.Select(l => l.name));
-
-            var newModel = model.ShallowCopy();
-            newModel.outputs.AddRange(newAndValidAdditionalOutputs);
-            model = newModel;
-        }
-
-        if (trimModel)
-        {
-            var newModel = model.ShallowCopy();
-            var upstream = ModelAnalyzer.FindUpstreamLayers(model, newModel.outputs.ToArray());
-            foreach (var l in model.layers)
-                if (!upstream.Contains(l))
-                    newModel.layers.Remove(l);
-
-            model = newModel;
-        }
-
-        model = ModelOptimizer.RemoveNoop(model);
-
-        return model;
-    }
-
-    internal static Model ValidateModel(Model model)
-    {
-        // validate, model contains no broken links
-        var brokenLinks = ModelAnalyzer.FindBrokenLinks(model);
-        if (brokenLinks.Length > 0)
-            D.LogWarning($"Model contains {brokenLinks.Length} broken links: {string.Join(",", brokenLinks)}");
-
-        // validate, all model outputs are unique
-        // https://stackoverflow.com/questions/18547354/c-sharp-linq-find-duplicates-in-list
-        var duplicateOutputs = model.outputs.GroupBy(x => x)
-            .Where(g => g.Count() > 1)
-            .Select(y => y.Key);
-        foreach (var o in duplicateOutputs)
-            D.LogWarning($"Output is specified more than once in the model: {o}");
-
-        // validate, model contains no unconnected layers
-        var unconnectedOutputs = ModelAnalyzer.FindUnconnectedOutputs(model);
-        foreach (var o in unconnectedOutputs)
-            D.LogWarning($"Layer is specified as output, but is missing in the model: {o}");
-
-        return model;
-    }
-}
-
-
-} // namespace Unity.Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackendsFactory.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBackendsFactory.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 355dc370391814b1c874848bb843b91c
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Core.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Core.cs
@@ -1,245 +0,0 @@
-using System.Threading;
-using UnityEngine;
-using Unity.Jobs;
-
-namespace Unity.Barracuda {
-
-// BarracudaBurstCPU.Core.cs -- definition of class BurstCPUOps, Pin(), BurstTensorData
-// BarracudaBurstCPU.Ops.cs  -- impl. IOps, job schedulers
-// BarracudaBurstCPU.Jobs.cs -- impl. jobs
-
-/// <summary>
-/// Burst specific internal `Tensor` data storage
-/// </summary>
-public class BurstTensorData : UnsafeArrayTensorData, IDependableTensorData
-{
-    private JobHandle m_ReadFence;
-    private JobHandle m_WriteFence;
-    private bool m_SafeToDispose = true;
-
-    /// <inheritdoc/>
-    public JobHandle fence { get { return m_ReadFence; }  set { m_ReadFence = value; m_WriteFence = value; m_SafeToDispose = false; } }
-
-    /// <inheritdoc/>
-    public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = BurstCPUOps.Dependencies(value, m_WriteFence); m_SafeToDispose = false; } }
-
-    /// <inheritdoc/>
-    public unsafe void* rawPtr => array.RawAddressAt(offset);
-
-    /// <summary>
-    /// Creates new array
-    /// </summary>
-    /// <param name="count">count</param>
-    public BurstTensorData(int count, DataType dataType) : base(count, dataType)
-    {
-    }
-
-    /// <summary>
-    /// Creates new array
-    /// </summary>
-    /// <param name="shape">shape</param>
-    public BurstTensorData(TensorShape shape, DataType dataType) : base(shape, dataType)
-    {
-    }
-
-    /// <summary>
-    /// Uses shared array
-    /// </summary>
-    /// <param name="sharedArray">shared array</param>
-    public BurstTensorData(ArrayTensorData sharedArray) : base(sharedArray)
-    {
-    }
-
-    /// <summary>
-    /// Uses shared array
-    /// </summary>
-    /// <param name="sharedArray">shared array</param>
-    public BurstTensorData(SharedArrayTensorData sharedArray) : base(sharedArray)
-    {
-    }
-
-    /// <summary>
-    /// Uses unsafe array
-    /// </summary>
-    /// <param name="unsafeArray">unsafe array</param>
-    public BurstTensorData(UnsafeArrayTensorData unsafeArray) : base(unsafeArray.array, unsafeArray.offset, unsafeArray.count, unsafeArray.m_Readonly)
-    {
-    }
-
-    /// <summary>
-    /// Finalizer
-    /// </summary>
-    ~BurstTensorData()
-    {
-        if (!m_SafeToDispose)
-            D.LogWarning($"Found unreferenced, but undisposed Tensor data that potentially participates in an unfinished job and might lead to hazardous memory overwrites: {ToString()}");
-    }
-
-    /// <summary>
-    /// Dispose contents
-    /// </summary>
-    public override void Dispose()
-    {
-        // It isn't safe to Complete jobs from a finalizer thread, so
-        if (Thread.CurrentThread == BurstCPUOps.MainThread)
-            CompleteAllPendingOperations();
-
-        base.Dispose();
-    }
-
-    internal void CompleteAllPendingOperations()
-    {
-        fence.Complete();
-        reuse.Complete();
-        m_SafeToDispose = true;
-    }
-
-    /// <summary>
-    /// Reserve (allocate) storage for `count` elements
-    /// </summary>
-    /// <param name="count">count</param>
-    public override void Reserve(int count)
-    {
-        if (count > maxCapacity)
-        {
-            // going to reallocate memory in base.Reserve()
-            // thus need to finish current work
-            CompleteAllPendingOperations();
-        }
-
-        base.Reserve(count);
-    }
-
-    /// <summary>
-    /// Upload data to internal storage
-    /// </summary>
-    /// <param name="data">data</param>
-    /// <param name="shape">shape</param>
-    /// <param name="managedBufferStartIndex">`data` start index</param>
-    public override void Upload(float[] data, TensorShape shape, int managedBufferStartIndex = 0)
-    {
-        CompleteAllPendingOperations();
-        base.Upload(data, shape, managedBufferStartIndex);
-    }
-
-    /// <summary>
-    /// Return data from internal storage
-    /// </summary>
-    /// <param name="shape">shape</param>
-    /// <returns>managed array</returns>
-    public override float[] Download(TensorShape shape)
-    {
-        // Download() as optimization gives direct access to the internal buffer
-        // thus need to prepare internal buffer for potential writes
-        CompleteAllPendingOperations();
-        return base.Download(shape);
-    }
-
-    /// <summary>
-    /// Return shared array from internal storage
-    /// </summary>
-    /// <returns>shared array from internal storage</returns>
-    public override BarracudaArray SharedAccess(out int offset)
-    {
-        // SharedAccess() by design gives direct access to the interna
-        // thus need to prepare internal buffer for potential writes
-        CompleteAllPendingOperations();
-        return base.SharedAccess(out offset);
-    }
-
-    /// <summary>
-    /// Schedule async internal data download
-    /// </summary>
-    /// <param name="count">count to download</param>
-    /// <returns>`true` if download is completed</returns>
-    public override bool ScheduleAsyncDownload(int count)
-    {
-        return fence.IsCompleted;
-    }
-
-    /// <summary>
-    /// Object summary as string
-    /// </summary>
-    /// <returns>object summary</returns>
-    public override string ToString()
-    {
-        string readyToRead = m_SafeToDispose ? "true": "unknown";
-        string readyForReuse = m_SafeToDispose ? "true": "unknown";
-        try
-        {
-            readyToRead = fence.IsCompleted.ToString();
-            readyForReuse = reuse.IsCompleted.ToString();
-        }
-        catch (UnityException) {}
-        return string.Format("(CPU burst: {0} length: {1} offset: {2} uploaded: {3} ready-to-read: {4} ready-for-reuse: {5})",
-            GetHashCode(), m_Array?.Length, m_Offset, m_Count, readyToRead, readyForReuse);
-    }
-}
-
-/// <summary>
-/// Burst specific implementation of `IOps`
-/// </summary>
-public partial class BurstCPUOps : UnsafeArrayCPUOps
-{
-    /// <summary>
-    /// Create `BurstCPUOps`
-    /// </summary>
-    /// <param name="allocator">allocator</param>
-    public BurstCPUOps(ITensorAllocator allocator = null)
-    : base(allocator)
-    {
-        if (PreferBLAS == BLAS.Native && !blas.IsNative())
-            PreferBLAS = BLAS.Disabled;
-    }
-
-    /// <summary>
-    /// Pin `Tensor` to Burst backend device, if `uploadCache` is false, data is not uploaded to device
-    /// </summary>
-    /// <param name="X">`Tensor`</param>
-    /// <param name="uploadCache">`bool`</param>
-    /// <returns>`BurstTensorData`</returns>
-    new public static BurstTensorData Pin(Tensor X, bool uploadCache = true)
-    {
-        X.FlushCache(uploadCache);
-
-        var onDevice = X.tensorOnDevice as BurstTensorData;
-        if (onDevice == null)
-        {
-            // try to adopt CPU arrays
-            var asUnsafeArray = X.tensorOnDevice as UnsafeArrayTensorData;
-            var asSharedArray = X.tensorOnDevice as SharedArrayTensorData;
-            var asArray = X.tensorOnDevice as ArrayTensorData;
-            if (asUnsafeArray != null) X.AttachToDevice(new BurstTensorData(asUnsafeArray));
-            else if (asSharedArray != null) X.AttachToDevice(new BurstTensorData(asSharedArray));
-            else if (asArray != null) X.AttachToDevice(new BurstTensorData(asArray));
-            else
-            {
-                if (uploadCache)
-                    X.UploadToDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array and upload
-                else
-                    X.AllocateOnDevice(new BurstTensorData(X.shape, X.dataType)); // device is not compatible, create new array but do not upload
-            }
-        }
-
-        return X.tensorOnDevice as BurstTensorData;
-    }
-
-    /// <summary>
-    /// Prepare `Tensor` for use with Burst backend
-    /// </summary>
-    /// <param name="X">`Tensor`</param>
-    /// <returns>`Tensor`</returns>
-    public override Tensor Prepare(Tensor X)
-    {
-        Pin(X);
-        return X;
-    }
-
-    public override Tensor PrepareNoAlloc(Tensor X)
-    {
-        Pin(X, uploadCache: false);
-        return X;
-    }
-}
-
-} // namespace Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Core.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Core.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: f44c1c453c1754aaeb1e8608df82452b
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Helper.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Helper.cs
@@ -1,471 +0,0 @@
-using UnityEngine;
-using UnityEngine.Assertions;
-using System;
-using System.Collections.Generic;
-using Unity.Collections;
-using Unity.Collections.LowLevel.Unsafe;
-using Unity.Jobs;
-using Unity.Mathematics;
-
-namespace Unity.Barracuda {
-
-//#region Job output context helper
-
-internal static class BurstSchedulingHelper
-{
-    #region Private scheduling helpers with pointer aliasing verification
-
-    private static unsafe JobHandle ScheduleXSBOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrX,
-        void* ptrS,
-        void* ptrB,
-        void* ptrO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
-    {
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
-        jobDataInternalCopy.S = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrS};
-        jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
-    }
-
-    private static unsafe JobHandle ScheduleXBOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrX,
-        void* ptrB,
-        void* ptrO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
-    {
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
-        jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB};
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
-    }
-
-    private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrX,
-        void* ptrO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
-    }
-
-    private static unsafe JobHandle ScheduleXOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrX,
-        void* ptrO)
-        where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        Assert.IsTrue(ptrO != ptrX);
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX};
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
-    }
-
-    private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrO)
-        where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
-    {
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(fenceBeforeJobStart);
-    }
-
-    private static unsafe JobHandle ScheduleOInternal<T>(T jobData,
-        JobHandle fenceBeforeJobStart,
-        void* ptrO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
-    {
-        T jobDataInternalCopy = jobData;
-        jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO};
-        return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart);
-    }
-
-    #endregion
-
-    #region Private fencing helper for readability
-    private static JobHandle GetFenceBeforeJobStartXSBO(
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinS,
-        IDependableMemoryResource pinB,
-        IDependableMemoryResource pinO)
-    {
-        return BurstCPUOps.Dependencies(pinX.fence, pinS.fence, pinB.fence, pinO.reuse);
-    }
-
-    private static JobHandle GetFenceBeforeJobStartXBO(
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinB,
-        IDependableMemoryResource pinO)
-    {
-        return BurstCPUOps.Dependencies(pinX.fence, pinB.fence, pinO.reuse);
-    }
-
-    private static JobHandle GetFenceBeforeJobStartXO(
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinO)
-    {
-        return BurstCPUOps.Dependencies(pinX.fence, pinO.reuse);
-    }
-
-    private static void SetXSBOFences(this JobHandle jobFence,
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinS,
-        IDependableMemoryResource pinB,
-        IDependableMemoryResource pinO)
-    {
-        pinX.reuse = jobFence;
-        pinS.reuse = jobFence;
-        pinB.reuse = jobFence;
-        pinO.fence = jobFence;
-    }
-
-    private static void SetXBOFences(this JobHandle jobFence,
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinB,
-        IDependableMemoryResource pinO)
-    {
-        pinX.reuse = jobFence;
-        pinB.reuse = jobFence;
-        pinO.fence = jobFence;
-    }
-
-    private static void SetXOFences(this JobHandle jobFence,
-        IDependableMemoryResource pinX,
-        IDependableMemoryResource pinO)
-    {
-        pinX.reuse = jobFence;
-        pinO.fence = jobFence;
-    }
-    #endregion
-
-    #region Immediate scheduling helper
-    internal enum FencingHelperMode
-    {
-        UpdateResourcesFencesOnScheduling,
-        CustomResourcesFencesHandling,
-    }
-
-    internal static unsafe JobHandle ScheduleXSBO<T>(this T jobData,
-        IDependableMemoryResource rX,
-        IDependableMemoryResource rS,
-        IDependableMemoryResource rB,
-        IDependableMemoryResource rO,
-        int arrayLength, int innerloopBatchCount,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO
-    {
-        var fenceBeforeJobStart = GetFenceBeforeJobStartXSBO(rX, rS, rB, rO);
-
-        JobHandle jobFence;
-        {
-            jobFence = ScheduleXSBOInternal(jobData, fenceBeforeJobStart, rX.rawPtr, rS.rawPtr, rB.rawPtr, rO.rawPtr, arrayLength, innerloopBatchCount);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            jobFence.SetXSBOFences(rX, rS, rB, rO);
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleXBO<T>(this T jobData,
-        IDependableMemoryResource X,
-        IDependableMemoryResource B,
-        IDependableMemoryResource O,
-        int arrayLength, int innerloopBatchCount,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
-    {
-        var fenceBeforeJobStart = GetFenceBeforeJobStartXBO(X, B, O);
-
-        JobHandle jobFence;
-        {
-            jobFence = ScheduleXBOInternal(jobData, fenceBeforeJobStart, X.rawPtr, B.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            jobFence.SetXBOFences(X, B, O);
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleO<T>(this T jobData,
-        IDependableMemoryResource O,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO
-    {
-        var fenceBeforeJobStart = O.reuse;
-
-        JobHandle jobFence;
-        {
-            jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, O.rawPtr);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            O.fence = jobFence;
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
-        IDependableMemoryResource X,
-        IDependableMemoryResource O,
-        int arrayLength, int innerloopBatchCount,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
-
-        JobHandle jobFence;
-        {
-            jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            jobFence.SetXOFences(X, O);
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleO<T>(this T jobData,
-        BurstTensorData pinO,
-        int offsetO,
-        int arrayLength, int innerloopBatchCount,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO
-    {
-        var fenceBeforeJobStart = pinO.reuse;
-
-        JobHandle jobFence;
-        {
-            void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
-            jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, ptrO, arrayLength, innerloopBatchCount);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            pinO.fence = jobFence;
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
-        BurstTensorData pinX,
-        int offsetX,
-        BurstTensorData pinO,
-        int offsetO,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        var fenceBeforeJobStart = GetFenceBeforeJobStartXO(pinX, pinO);
-
-        JobHandle jobFence;
-        {
-            void* ptrX = pinX.array.RawAddressAt(pinX.offset+offsetX);
-            void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO);
-            jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, ptrX, ptrO);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            jobFence.SetXOFences(pinX, pinO);
-        }
-
-        return jobFence;
-    }
-
-    internal static unsafe JobHandle ScheduleXO<T>(this T jobData,
-        IDependableMemoryResource X,
-        IDependableMemoryResource O,
-        FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O);
-
-        JobHandle jobFence;
-        {
-            jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr);
-        }
-
-        if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            jobFence.SetXOFences(X, O);
-        }
-
-        return jobFence;
-    }
-
-    #endregion
-}
-
-#region Schedulling helper for parrallel jobs
-
-internal struct ParallelJobsContext : IDisposable
-{
-    internal static Dictionary<IDependableMemoryResource, JobHandle> s_ReadDependencyTracker =
-        new Dictionary<IDependableMemoryResource, JobHandle>(100);
-
-    private readonly IDependableMemoryResource outputResource;
-    private JobHandle combinedJobFence;
-
-    public ParallelJobsContext(IDependableMemoryResource output)
-    {
-        outputResource = output;
-        combinedJobFence = new JobHandle();
-        Assert.AreEqual(0, s_ReadDependencyTracker.Count,
-            "s_ReadDependencyTracker should be empty meaning ParrallelJobs was not disposed properly.");
-    }
-
-    //For now only CopyStrideJobHelper and tests need ParallelJobsContext. If this code need to be duplicated for more case in the future:
-    //- Maybe add generic version by having CopyStrideJobHelper and other helper struct implement an interface (but beware of GC).
-    //- Or make ParallelJobsContext partial and code generated by jobs template.
-    public JobHandle ScheduleXO(
-        BurstCPUOps.CopyStrideJobHelper jobData,//See comment above.
-        BurstTensorData pinX, int offsetX,
-        BurstTensorData pinO, int offsetO)
-    {
-        Assert.IsTrue(pinO == outputResource);
-        var jobFence = jobData.ScheduleXO(pinX, offsetX, pinO, offsetO, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
-        TrackJobReadDependencies(pinX, jobFence);
-        AddJobDependencyToOutputFence(jobFence);
-        return jobFence;
-    }
-
-    public JobHandle ScheduleXO<T>(
-        T jobData,
-        BurstTensorData pinX,
-        BurstTensorData pinO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO
-    {
-        Assert.IsTrue(pinO == outputResource);
-        var jobFence = jobData.ScheduleXO(pinX, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
-        TrackJobReadDependencies(pinX, jobFence);
-        AddJobDependencyToOutputFence(jobFence);
-        return jobFence;
-    }
-
-
-    public JobHandle ScheduleXBO<T>(
-        T jobData,
-        BurstTensorData pinX,
-        BurstTensorData pinB,
-        BurstTensorData pinO,
-        int arrayLength, int innerloopBatchCount)
-        where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO
-    {
-        Assert.IsTrue(pinO == outputResource);
-        var jobFence = jobData.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling);
-        TrackJobReadDependencies(pinX, jobFence);
-        TrackJobReadDependencies(pinB, jobFence);
-        AddJobDependencyToOutputFence(jobFence);
-        return jobFence;
-    }
-
-    internal void AddJobDependencyToOutputFence(JobHandle jobFence)
-    {
-        //Once all jobs writing to O will be done, further jobs will be able to read from O.
-        //We combine job fences from all job writing to O here and assign to O.fence in Dispose().
-        combinedJobFence = JobHandle.CombineDependencies(combinedJobFence, jobFence);
-    }
-
-    internal void TrackJobReadDependencies(IDependableMemoryResource T, JobHandle jobFence)
-    {
-        //Once all jobs reading from T will be done, further jobs will be able to write to T.
-        //We combine job fences from all jobs reading from T here and assign to T.reuse in Dispose().
-        if (T != null)
-        {
-            if (s_ReadDependencyTracker.ContainsKey(T))
-                s_ReadDependencyTracker[T] = JobHandle.CombineDependencies(s_ReadDependencyTracker[T], jobFence);
-            else
-                s_ReadDependencyTracker[T] = jobFence;
-        }
-    }
-
-    public void Dispose()
-    {
-        foreach (var key in s_ReadDependencyTracker.Keys)
-        {
-            key.reuse = s_ReadDependencyTracker[key];
-        }
-        outputResource.fence = combinedJobFence;
-        s_ReadDependencyTracker.Clear();
-    }
-}
-
-#endregion
-
-#region Memory allocation wrapper usable by job fencing helpers
-
-internal unsafe class FencedMemoryAlloc : IDependableMemoryResource
-{
-    private JobHandle m_ReadFence;
-    private JobHandle m_WriteFence;
-    private void* data;
-    public void* rawPtr => data;
-    public half* halfdata { get { Assert.AreEqual(DataType.Half, type); return (half*) data; } }
-    public float* floatdata { get { Assert.AreEqual(DataType.Float, type);return (float*) data; } }
-    public DataType type;
-    public int elementCount;
-    public int elementSize;
-
-    /// <inheritdoc/>
-    public JobHandle fence { get { return m_ReadFence; }  set { m_ReadFence = value; m_WriteFence = value; } }
-
-    /// <inheritdoc/>
-    public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = value; } }
-
-    public void Allocate(int numElement, DataType dataType, int alignment, Allocator allocator)
-    {
-        m_ReadFence = new JobHandle();
-        m_WriteFence = new JobHandle();
-        elementCount = numElement;
-        elementSize = BarracudaArray.DataItemSize(dataType);
-        type = dataType;
-        Assert.IsTrue(data == null, "Please call ClearState() when freeing underlying memory.");
-        Assert.IsTrue(alignment % elementSize == 0);
-        data = UnsafeUtility.Malloc(elementCount * elementSize, alignment, allocator);
-        Assert.IsTrue(data != null);
-    }
-
-    public void ClearState()
-    {
-        m_ReadFence = new JobHandle();
-        m_WriteFence = new JobHandle();
-        elementCount = 0;
-        elementSize = 0;
-        type = DataType.Float;
-        data = null;
-    }
-
-    public FencedMemoryAlloc()
-    {
-        ClearState();
-    }
-}
-
-#endregion
-
-} // namespace Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Helper.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Helper.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 5071bbeadb81d034f827f20e95c52ee6
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Activation.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Activation.gen.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Activation.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Activation.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 5211ff135b3b87f42be25a8505a28df7
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Broadcast.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Broadcast.gen.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Broadcast.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Broadcast.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: d05274a6ecc82404abe715a573ea8e74
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.DenseConv.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.DenseConv.gen.cs
@@ -1,864 +0,0 @@
-// This is auto-generated -- do not modify directly
-using UnityEngine;
-using System;
-using Unity.Burst;
-using Unity.Burst.Intrinsics;
-using Unity.Collections;
-using Unity.Jobs;
-using Unity.Mathematics;
-using static Unity.Burst.Intrinsics.X86.Avx;
-using static Unity.Burst.Intrinsics.X86.Fma;
-using Unity.Collections.LowLevel.Unsafe;
-using Unity.Jobs.LowLevel.Unsafe;
-using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
-
-namespace Unity.Barracuda {
-public partial class BurstCPUOps
-{
-    #region Dense/Conv jobs declaration for mode: _Full_Float
-
-    internal partial struct DepthwiseConv2DJobHelper
-    {
-        public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinS = Pin(S);
-            var pinB = Pin(B);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool WHalf = pinS.array.Type == DataType.Half;
-            bool BHalf = pinB.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
-            if (AHalf && WHalf)
-            {
-                var job = new DepthwiseConv2DJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && WHalf)
-            {
-                var job = new DepthwiseConv2DJob_ActAsFloat_WeightAsHalf();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && !WHalf)
-            {
-                var job = new DepthwiseConv2DJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else //if (AHalf && !WHalf)
-            {
-                UnityEngine.Assertions.Assert.IsTrue(false, "DepthwiseConv2DJob does not support activation as half while weights are floats.");
-                return new JobHandle();
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct DepthwiseConv2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public DepthwiseConv2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.kernelCount * sizeof(float);
-            float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                // reset accumulators to 0
-                UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                // gather X * K results in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        float* dst    = outputAccumulators;
-                        float* src    = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-                        float* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
-
-                        int k = 0;
-                        for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
-                            for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
-                                *dst += (float)((*src) * (*kernel));
-                        for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
-                            *dst += (float)((*src) * (*kernel));
-                    }
-                }
-
-                { // write accumulators to memory and add bias
-                    int k = 0;
-                    float* src  = outputAccumulators;
-                    float* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    float* bias = Bptr;
-                    for (; k < data.kernelCount - unrollSize + 1; k += unrollSize)  // unroll of kernelCount loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
-                            *dst = (float)((*src) + (*bias));
-                    for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
-                        *dst = (float)((*src) + (*bias));
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    internal partial struct Dense3JobHelper
-    {
-        public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinS = Pin(S);
-            var pinB = Pin(B);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool WHalf = pinS.array.Type == DataType.Half;
-            bool BHalf = pinB.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
-            if (AHalf && WHalf)
-            {
-                var job = new Dense3Job_Full_Half();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && WHalf)
-            {
-                var job = new Dense3Job_ActAsFloat_WeightAsHalf();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && !WHalf)
-            {
-                var job = new Dense3Job_Full_Float();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else //if (AHalf && !WHalf)
-            {
-                UnityEngine.Assertions.Assert.IsTrue(false, "Dense3Job does not support activation as half while weights are floats.");
-                return new JobHandle();
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct Dense3Job_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public Dense3JobHelper data;
-
-        public const int blockSize = 16;
-        public void Execute(int threadID)
-        {
-            float* A = this.Xptr;
-            float* B = this.Sptr;
-            float* C = this.Bptr;
-            float* S = this.Optr;
-            int AM = data.AM;
-            int BM = data.BM;
-            int SM = data.SM;
-            int AN = data.AN;
-            int BN = data.BN;
-            int SN = data.SN;
-
-            int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
-
-            int batch = (threadID / dispatchThreadXY);
-            int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
-            int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
-
-            int batchOffSetA = (batch * AM * AN);
-            int batchOffSetS = (batch * SM * SN);
-
-            int rowA = i * blockSize;
-            int colB = j * blockSize;
-
-            unsafe
-            {
-                float* blockTempA = null;
-                float* blockTempB = null;
-                float* blockTempS = null;
-
-                float* blockS = S + rowA + SM * colB + batchOffSetS;
-                int strideS = SM;
-
-                if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
-                {
-                    blockTempS = AllocBlock(blockSize, blockSize);
-                    strideS = blockSize;
-                    blockS = blockTempS;
-                }
-                for (int y = 0; y < blockSize; y++)
-                    for (int x = 0; x < blockSize; x++)
-                        blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
-
-                for (int l = 0; l < AN; l += blockSize) // inner-loop
-                {
-                    float* blockA = A + rowA + AM * l + batchOffSetA;
-                    float* blockB = B + l * BN + colB;
-                    int strideA = AM;
-                    int strideB = BN;
-
-                    if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
-                    {
-                        if (blockTempA == null)
-                            blockTempA = AllocBlock(blockSize, blockSize);
-                        strideA = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
-
-                        blockA = blockTempA;
-                    }
-
-                    if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
-                    {
-                        if (blockTempB == null)
-                            blockTempB = AllocBlock(blockSize, blockSize);
-                        strideB = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempB[x + blockSize * y] = (float)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
-
-                        blockB = blockTempB;
-                    }
-
-                    MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
-                }
-
-                if (blockS == blockTempS) // copy back
-                {
-                    for (int y = 0; y < blockSize; y++)
-                        for (int x = 0; x < blockSize; x++)
-                        {
-                            if (((rowA + x) < SM) && ((colB + y) < SN))
-                                S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
-                        }
-                }
-
-                FreeBlock(blockTempA);
-                FreeBlock(blockTempB);
-                FreeBlock(blockTempS);
-            }
-        }
-
-        static void MultiplyBlockUnrollHx16(float* Ap, int Astride, float* Bp, int Bstride, float* Sp, int Sstride)
-        {
-            for (int i = 0; i < blockSize; i++)
-            {
-                float sum0 = *(Sp + i + Sstride * 0);
-                float sum1 = *(Sp + i + Sstride * 1);
-                float sum2 = *(Sp + i + Sstride * 2);
-                float sum3 = *(Sp + i + Sstride * 3);
-                float sum4 = *(Sp + i + Sstride * 4);
-                float sum5 = *(Sp + i + Sstride * 5);
-                float sum6 = *(Sp + i + Sstride * 6);
-                float sum7 = *(Sp + i + Sstride * 7);
-                float sum8 = *(Sp + i + Sstride * 8);
-                float sum9 = *(Sp + i + Sstride * 9);
-                float sumA = *(Sp + i + Sstride * 10);
-                float sumB = *(Sp + i + Sstride * 11);
-                float sumC = *(Sp + i + Sstride * 12);
-                float sumD = *(Sp + i + Sstride * 13);
-                float sumE = *(Sp + i + Sstride * 14);
-                float sumF = *(Sp + i + Sstride * 15);
-
-                for (int l = 0; l < blockSize; l++)
-                {
-                    float A = *(Ap + i + Astride * l);
-
-                    float B0 = *(Bp + l * Bstride + 0);
-                    float B1 = *(Bp + l * Bstride + 1);
-                    float B2 = *(Bp + l * Bstride + 2);
-                    float B3 = *(Bp + l * Bstride + 3);
-                    float B4 = *(Bp + l * Bstride + 4);
-                    float B5 = *(Bp + l * Bstride + 5);
-                    float B6 = *(Bp + l * Bstride + 6);
-                    float B7 = *(Bp + l * Bstride + 7);
-                    float B8 = *(Bp + l * Bstride + 8);
-                    float B9 = *(Bp + l * Bstride + 9);
-                    float BA = *(Bp + l * Bstride + 10);
-                    float BB = *(Bp + l * Bstride + 11);
-                    float BC = *(Bp + l * Bstride + 12);
-                    float BD = *(Bp + l * Bstride + 13);
-                    float BE = *(Bp + l * Bstride + 14);
-                    float BF = *(Bp + l * Bstride + 15);
-
-
-                    sum0 += A * B0;
-                    sum1 += A * B1;
-                    sum2 += A * B2;
-                    sum3 += A * B3;
-                    sum4 += A * B4;
-                    sum5 += A * B5;
-                    sum6 += A * B6;
-                    sum7 += A * B7;
-                    sum8 += A * B8;
-                    sum9 += A * B9;
-                    sumA += A * BA;
-                    sumB += A * BB;
-                    sumC += A * BC;
-                    sumD += A * BD;
-                    sumE += A * BE;
-                    sumF += A * BF;
-                }
-
-                *(Sp + i + Sstride * 0 ) = (float)(sum0);
-                *(Sp + i + Sstride * 1 ) = (float)(sum1);
-                *(Sp + i + Sstride * 2 ) = (float)(sum2);
-                *(Sp + i + Sstride * 3 ) = (float)(sum3);
-                *(Sp + i + Sstride * 4 ) = (float)(sum4);
-                *(Sp + i + Sstride * 5 ) = (float)(sum5);
-                *(Sp + i + Sstride * 6 ) = (float)(sum6);
-                *(Sp + i + Sstride * 7 ) = (float)(sum7);
-                *(Sp + i + Sstride * 8 ) = (float)(sum8);
-                *(Sp + i + Sstride * 9 ) = (float)(sum9);
-                *(Sp + i + Sstride * 10) = (float)(sumA);
-                *(Sp + i + Sstride * 11) = (float)(sumB);
-                *(Sp + i + Sstride * 12) = (float)(sumC);
-                *(Sp + i + Sstride * 13) = (float)(sumD);
-                *(Sp + i + Sstride * 14) = (float)(sumE);
-                *(Sp + i + Sstride * 15) = (float)(sumF);
-            }
-        }
-    }
-
-    #endregion
-    #region Dense/Conv jobs declaration for mode: _ActAsFloat_WeightAsHalf
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct DepthwiseConv2DJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public DepthwiseConv2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.kernelCount * sizeof(float);
-            float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                // reset accumulators to 0
-                UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                // gather X * K results in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        float* dst    = outputAccumulators;
-                        float* src    = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-                        half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
-
-                        int k = 0;
-                        for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
-                            for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
-                                *dst += (float)((*src) * (*kernel));
-                        for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
-                            *dst += (float)((*src) * (*kernel));
-                    }
-                }
-
-                { // write accumulators to memory and add bias
-                    int k = 0;
-                    float* src  = outputAccumulators;
-                    float* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    half* bias = Bptr;
-                    for (; k < data.kernelCount - unrollSize + 1; k += unrollSize)  // unroll of kernelCount loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
-                            *dst = (float)((*src) + (*bias));
-                    for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
-                        *dst = (float)((*src) + (*bias));
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct Dense3Job_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public Dense3JobHelper data;
-
-        public const int blockSize = 16;
-        public void Execute(int threadID)
-        {
-            float* A = this.Xptr;
-            half* B = this.Sptr;
-            half* C = this.Bptr;
-            float* S = this.Optr;
-            int AM = data.AM;
-            int BM = data.BM;
-            int SM = data.SM;
-            int AN = data.AN;
-            int BN = data.BN;
-            int SN = data.SN;
-
-            int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
-
-            int batch = (threadID / dispatchThreadXY);
-            int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
-            int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
-
-            int batchOffSetA = (batch * AM * AN);
-            int batchOffSetS = (batch * SM * SN);
-
-            int rowA = i * blockSize;
-            int colB = j * blockSize;
-
-            unsafe
-            {
-                float* blockTempA = null;
-                half* blockTempB = null;
-                float* blockTempS = null;
-
-                float* blockS = S + rowA + SM * colB + batchOffSetS;
-                int strideS = SM;
-
-                if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
-                {
-                    blockTempS = AllocBlock(blockSize, blockSize);
-                    strideS = blockSize;
-                    blockS = blockTempS;
-                }
-                for (int y = 0; y < blockSize; y++)
-                    for (int x = 0; x < blockSize; x++)
-                        blockS[x + strideS * y] = (float)((colB + y) < BN ? C[colB + y] : 0.0f);
-
-                for (int l = 0; l < AN; l += blockSize) // inner-loop
-                {
-                    float* blockA = A + rowA + AM * l + batchOffSetA;
-                    half* blockB = B + l * BN + colB;
-                    int strideA = AM;
-                    int strideB = BN;
-
-                    if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
-                    {
-                        if (blockTempA == null)
-                            blockTempA = AllocBlock(blockSize, blockSize);
-                        strideA = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempA[x + blockSize * y] = (float)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
-
-                        blockA = blockTempA;
-                    }
-
-                    if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
-                    {
-                        if (blockTempB == null)
-                            blockTempB = AllocBlockHalf(blockSize, blockSize);
-                        strideB = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
-
-                        blockB = blockTempB;
-                    }
-
-                    MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
-                }
-
-                if (blockS == blockTempS) // copy back
-                {
-                    for (int y = 0; y < blockSize; y++)
-                        for (int x = 0; x < blockSize; x++)
-                        {
-                            if (((rowA + x) < SM) && ((colB + y) < SN))
-                                S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
-                        }
-                }
-
-                FreeBlock(blockTempA);
-                FreeBlock(blockTempB);
-                FreeBlock(blockTempS);
-            }
-        }
-
-        static void MultiplyBlockUnrollHx16(float* Ap, int Astride, half* Bp, int Bstride, float* Sp, int Sstride)
-        {
-            for (int i = 0; i < blockSize; i++)
-            {
-                float sum0 = *(Sp + i + Sstride * 0);
-                float sum1 = *(Sp + i + Sstride * 1);
-                float sum2 = *(Sp + i + Sstride * 2);
-                float sum3 = *(Sp + i + Sstride * 3);
-                float sum4 = *(Sp + i + Sstride * 4);
-                float sum5 = *(Sp + i + Sstride * 5);
-                float sum6 = *(Sp + i + Sstride * 6);
-                float sum7 = *(Sp + i + Sstride * 7);
-                float sum8 = *(Sp + i + Sstride * 8);
-                float sum9 = *(Sp + i + Sstride * 9);
-                float sumA = *(Sp + i + Sstride * 10);
-                float sumB = *(Sp + i + Sstride * 11);
-                float sumC = *(Sp + i + Sstride * 12);
-                float sumD = *(Sp + i + Sstride * 13);
-                float sumE = *(Sp + i + Sstride * 14);
-                float sumF = *(Sp + i + Sstride * 15);
-
-                for (int l = 0; l < blockSize; l++)
-                {
-                    float A = *(Ap + i + Astride * l);
-
-                    float B0 = *(Bp + l * Bstride + 0);
-                    float B1 = *(Bp + l * Bstride + 1);
-                    float B2 = *(Bp + l * Bstride + 2);
-                    float B3 = *(Bp + l * Bstride + 3);
-                    float B4 = *(Bp + l * Bstride + 4);
-                    float B5 = *(Bp + l * Bstride + 5);
-                    float B6 = *(Bp + l * Bstride + 6);
-                    float B7 = *(Bp + l * Bstride + 7);
-                    float B8 = *(Bp + l * Bstride + 8);
-                    float B9 = *(Bp + l * Bstride + 9);
-                    float BA = *(Bp + l * Bstride + 10);
-                    float BB = *(Bp + l * Bstride + 11);
-                    float BC = *(Bp + l * Bstride + 12);
-                    float BD = *(Bp + l * Bstride + 13);
-                    float BE = *(Bp + l * Bstride + 14);
-                    float BF = *(Bp + l * Bstride + 15);
-
-
-                    sum0 += A * B0;
-                    sum1 += A * B1;
-                    sum2 += A * B2;
-                    sum3 += A * B3;
-                    sum4 += A * B4;
-                    sum5 += A * B5;
-                    sum6 += A * B6;
-                    sum7 += A * B7;
-                    sum8 += A * B8;
-                    sum9 += A * B9;
-                    sumA += A * BA;
-                    sumB += A * BB;
-                    sumC += A * BC;
-                    sumD += A * BD;
-                    sumE += A * BE;
-                    sumF += A * BF;
-                }
-
-                *(Sp + i + Sstride * 0 ) = (float)(sum0);
-                *(Sp + i + Sstride * 1 ) = (float)(sum1);
-                *(Sp + i + Sstride * 2 ) = (float)(sum2);
-                *(Sp + i + Sstride * 3 ) = (float)(sum3);
-                *(Sp + i + Sstride * 4 ) = (float)(sum4);
-                *(Sp + i + Sstride * 5 ) = (float)(sum5);
-                *(Sp + i + Sstride * 6 ) = (float)(sum6);
-                *(Sp + i + Sstride * 7 ) = (float)(sum7);
-                *(Sp + i + Sstride * 8 ) = (float)(sum8);
-                *(Sp + i + Sstride * 9 ) = (float)(sum9);
-                *(Sp + i + Sstride * 10) = (float)(sumA);
-                *(Sp + i + Sstride * 11) = (float)(sumB);
-                *(Sp + i + Sstride * 12) = (float)(sumC);
-                *(Sp + i + Sstride * 13) = (float)(sumD);
-                *(Sp + i + Sstride * 14) = (float)(sumE);
-                *(Sp + i + Sstride * 15) = (float)(sumF);
-            }
-        }
-    }
-
-    #endregion
-    #region Dense/Conv jobs declaration for mode: _Full_Half
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct DepthwiseConv2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public DepthwiseConv2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.kernelCount * sizeof(half);
-            half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                // reset accumulators to 0
-                UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                // gather X * K results in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        half* dst    = outputAccumulators;
-                        half* src    = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-                        half* kernel = Sptr + dy * data.kernelStrideH + dx * data.kernelStrideW;
-
-                        int k = 0;
-                        for (; k < data.kernelCount - unrollSize + 1; k += unrollSize) // unroll of kernelCount loop
-                            for (int q = 0; q < unrollSize; q++, src++, dst++, kernel++)
-                                *dst += (half)((*src) * (*kernel));
-                        for (; k < data.kernelCount; k++, src++, dst++, kernel++) // remainder of kernelCount loop
-                            *dst += (half)((*src) * (*kernel));
-                    }
-                }
-
-                { // write accumulators to memory and add bias
-                    int k = 0;
-                    half* src  = outputAccumulators;
-                    half* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    half* bias = Bptr;
-                    for (; k < data.kernelCount - unrollSize + 1; k += unrollSize)  // unroll of kernelCount loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++, bias++)
-                            *dst = (half)((*src) + (*bias));
-                    for (; k < data.kernelCount; k++, src++, dst++, bias++) // remainder of kernelCount loop
-                        *dst = (half)((*src) + (*bias));
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct Dense3Job_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public Dense3JobHelper data;
-
-        public const int blockSize = 16;
-        public void Execute(int threadID)
-        {
-            half* A = this.Xptr;
-            half* B = this.Sptr;
-            half* C = this.Bptr;
-            half* S = this.Optr;
-            int AM = data.AM;
-            int BM = data.BM;
-            int SM = data.SM;
-            int AN = data.AN;
-            int BN = data.BN;
-            int SN = data.SN;
-
-            int dispatchThreadXY = data.dispatchThreadX * data.dispatchThreadY;
-
-            int batch = (threadID / dispatchThreadXY);
-            int i = (threadID % dispatchThreadXY) % data.dispatchThreadX;
-            int j = (threadID % dispatchThreadXY) / data.dispatchThreadX;
-
-            int batchOffSetA = (batch * AM * AN);
-            int batchOffSetS = (batch * SM * SN);
-
-            int rowA = i * blockSize;
-            int colB = j * blockSize;
-
-            unsafe
-            {
-                half* blockTempA = null;
-                half* blockTempB = null;
-                half* blockTempS = null;
-
-                half* blockS = S + rowA + SM * colB + batchOffSetS;
-                int strideS = SM;
-
-                if (rowA + blockSize > SM || colB + blockSize > SN) // copy remainder of C into zero-padded block
-                {
-                    blockTempS = AllocBlockHalf(blockSize, blockSize);
-                    strideS = blockSize;
-                    blockS = blockTempS;
-                }
-                for (int y = 0; y < blockSize; y++)
-                    for (int x = 0; x < blockSize; x++)
-                        blockS[x + strideS * y] = (half)((colB + y) < BN ? C[colB + y] : 0.0f);
-
-                for (int l = 0; l < AN; l += blockSize) // inner-loop
-                {
-                    half* blockA = A + rowA + AM * l + batchOffSetA;
-                    half* blockB = B + l * BN + colB;
-                    int strideA = AM;
-                    int strideB = BN;
-
-                    if (rowA + blockSize > AM || l + blockSize > AN) // copy remainder of A into zero-padded block
-                    {
-                        if (blockTempA == null)
-                            blockTempA = AllocBlockHalf(blockSize, blockSize);
-                        strideA = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempA[x + blockSize * y] = (half)(((rowA + x) < AM && (l + y < AN)) ? blockA[x + AM * y] : 0.0f);
-
-                        blockA = blockTempA;
-                    }
-
-                    if (colB + blockSize > BN || l + blockSize > BM) // copy remainder of B into zero-padded block
-                    {
-                        if (blockTempB == null)
-                            blockTempB = AllocBlockHalf(blockSize, blockSize);
-                        strideB = blockSize;
-
-                        for (int y = 0; y < blockSize; y++)
-                            for (int x = 0; x < blockSize; x++)
-                                blockTempB[x + blockSize * y] = (half)(((colB + x) < BN && (l + y < BM)) ? blockB[x + BN * y] : 0.0f);
-
-                        blockB = blockTempB;
-                    }
-
-                    MultiplyBlockUnrollHx16(blockA, strideA, blockB, strideB, blockS, strideS);
-                }
-
-                if (blockS == blockTempS) // copy back
-                {
-                    for (int y = 0; y < blockSize; y++)
-                        for (int x = 0; x < blockSize; x++)
-                        {
-                            if (((rowA + x) < SM) && ((colB + y) < SN))
-                                S[(rowA + x) + SM * (colB + y) + batchOffSetS] = blockTempS[x + blockSize * y];
-                        }
-                }
-
-                FreeBlock(blockTempA);
-                FreeBlock(blockTempB);
-                FreeBlock(blockTempS);
-            }
-        }
-
-        static void MultiplyBlockUnrollHx16(half* Ap, int Astride, half* Bp, int Bstride, half* Sp, int Sstride)
-        {
-            for (int i = 0; i < blockSize; i++)
-            {
-                float sum0 = *(Sp + i + Sstride * 0);
-                float sum1 = *(Sp + i + Sstride * 1);
-                float sum2 = *(Sp + i + Sstride * 2);
-                float sum3 = *(Sp + i + Sstride * 3);
-                float sum4 = *(Sp + i + Sstride * 4);
-                float sum5 = *(Sp + i + Sstride * 5);
-                float sum6 = *(Sp + i + Sstride * 6);
-                float sum7 = *(Sp + i + Sstride * 7);
-                float sum8 = *(Sp + i + Sstride * 8);
-                float sum9 = *(Sp + i + Sstride * 9);
-                float sumA = *(Sp + i + Sstride * 10);
-                float sumB = *(Sp + i + Sstride * 11);
-                float sumC = *(Sp + i + Sstride * 12);
-                float sumD = *(Sp + i + Sstride * 13);
-                float sumE = *(Sp + i + Sstride * 14);
-                float sumF = *(Sp + i + Sstride * 15);
-
-                for (int l = 0; l < blockSize; l++)
-                {
-                    float A = *(Ap + i + Astride * l);
-
-                    float B0 = *(Bp + l * Bstride + 0);
-                    float B1 = *(Bp + l * Bstride + 1);
-                    float B2 = *(Bp + l * Bstride + 2);
-                    float B3 = *(Bp + l * Bstride + 3);
-                    float B4 = *(Bp + l * Bstride + 4);
-                    float B5 = *(Bp + l * Bstride + 5);
-                    float B6 = *(Bp + l * Bstride + 6);
-                    float B7 = *(Bp + l * Bstride + 7);
-                    float B8 = *(Bp + l * Bstride + 8);
-                    float B9 = *(Bp + l * Bstride + 9);
-                    float BA = *(Bp + l * Bstride + 10);
-                    float BB = *(Bp + l * Bstride + 11);
-                    float BC = *(Bp + l * Bstride + 12);
-                    float BD = *(Bp + l * Bstride + 13);
-                    float BE = *(Bp + l * Bstride + 14);
-                    float BF = *(Bp + l * Bstride + 15);
-
-
-                    sum0 += A * B0;
-                    sum1 += A * B1;
-                    sum2 += A * B2;
-                    sum3 += A * B3;
-                    sum4 += A * B4;
-                    sum5 += A * B5;
-                    sum6 += A * B6;
-                    sum7 += A * B7;
-                    sum8 += A * B8;
-                    sum9 += A * B9;
-                    sumA += A * BA;
-                    sumB += A * BB;
-                    sumC += A * BC;
-                    sumD += A * BD;
-                    sumE += A * BE;
-                    sumF += A * BF;
-                }
-
-                *(Sp + i + Sstride * 0 ) = (half)(sum0);
-                *(Sp + i + Sstride * 1 ) = (half)(sum1);
-                *(Sp + i + Sstride * 2 ) = (half)(sum2);
-                *(Sp + i + Sstride * 3 ) = (half)(sum3);
-                *(Sp + i + Sstride * 4 ) = (half)(sum4);
-                *(Sp + i + Sstride * 5 ) = (half)(sum5);
-                *(Sp + i + Sstride * 6 ) = (half)(sum6);
-                *(Sp + i + Sstride * 7 ) = (half)(sum7);
-                *(Sp + i + Sstride * 8 ) = (half)(sum8);
-                *(Sp + i + Sstride * 9 ) = (half)(sum9);
-                *(Sp + i + Sstride * 10) = (half)(sumA);
-                *(Sp + i + Sstride * 11) = (half)(sumB);
-                *(Sp + i + Sstride * 12) = (half)(sumC);
-                *(Sp + i + Sstride * 13) = (half)(sumD);
-                *(Sp + i + Sstride * 14) = (half)(sumE);
-                *(Sp + i + Sstride * 15) = (half)(sumF);
-            }
-        }
-    }
-
-    #endregion
-}
-}
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.DenseConv.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.DenseConv.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 417ca864422a2384ab3013114bf9f845
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Other.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Other.gen.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Other.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Other.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 30d1de61c64693a4895a66fecf45a004
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Reduce.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Reduce.gen.cs
@@ -1,890 +0,0 @@
-// This is auto-generated -- do not modify directly
-using UnityEngine;
-using System;
-using Unity.Burst;
-using Unity.Burst.Intrinsics;
-using Unity.Collections;
-using Unity.Jobs;
-using Unity.Mathematics;
-using static Unity.Burst.Intrinsics.X86.Avx;
-using static Unity.Burst.Intrinsics.X86.Fma;
-using Unity.Collections.LowLevel.Unsafe;
-using Unity.Jobs.LowLevel.Unsafe;
-using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
-
-namespace Unity.Barracuda {
-public partial class BurstCPUOps
-{
-    #region Reduce jobs declaration for mode: _Full_Float
-
-    internal partial struct ReduceMaxJobHelper
-    {
-        public JobHandle ScheduleXO(BurstTensorData pinX, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new ReduceMaxJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new ReduceMaxJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    internal partial struct ReduceMaxJobHelper
-    {
-        public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new ReduceMaxJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new ReduceMaxJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public ReduceMaxJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float maxV = float.MinValue;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                maxV = math.max(maxV, v);
-            }
-            Optr[y * data.offsetReduce + x] = (float)maxV;
-        }
-    }
-
-    internal partial struct ReduceSumJobHelper
-    {
-        public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new ReduceSumJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new ReduceSumJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceSumJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public ReduceSumJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float sumV = 0;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                sumV += v;
-            }
-            Optr[y * data.offsetReduce + x] = (float)(sumV);
-        }
-    }
-
-    internal partial struct ReduceMeanJobHelper
-    {
-        public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new ReduceMeanJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new ReduceMeanJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceMeanJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public ReduceMeanJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float sumV = 0;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                sumV += v;
-            }
-            Optr[y * data.offsetReduce + x] = (float)(sumV / (float)data.reduceDim);
-        }
-    }
-
-    internal partial struct ExpBiasReduceJobHelper
-    {
-        public JobHandle ScheduleXBO(BurstTensorData pinX, FencedMemoryAlloc pinB, FencedMemoryAlloc pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool WHalf = pinB.type == DataType.Half;
-            bool OHalf = pinO.type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf && WHalf)
-            {
-                var job = new ExpBiasReduceJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && WHalf)
-            {
-                var job = new ExpBiasReduceJob_ActAsFloat_WeightAsHalf();
-                job.data = this;
-                return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && !WHalf)
-            {
-                var job = new ExpBiasReduceJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else //if (AHalf && !WHalf)
-            {
-                UnityEngine.Assertions.Assert.IsTrue(false, "ExpBiasReduceJob does not support activation as half while weights are floats.");
-                return new JobHandle();
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ExpBiasReduceJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public ExpBiasReduceJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float accum = 0.0f;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                float b = Bptr[y * data.offsetReduce + x];
-                accum += math.exp(v - b);
-            }
-            Optr[y * data.offsetReduce + x] = (float)accum;
-        }
-    }
-
-    internal partial struct SoftmaxEndJobHelper
-    {
-        public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool WHalf = pinS.type == DataType.Half;
-            bool BHalf = pinB.type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
-            if (AHalf && WHalf)
-            {
-                var job = new SoftmaxEndJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && WHalf)
-            {
-                var job = new SoftmaxEndJob_ActAsFloat_WeightAsHalf();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && !WHalf)
-            {
-                var job = new SoftmaxEndJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else //if (AHalf && !WHalf)
-            {
-                UnityEngine.Assertions.Assert.IsTrue(false, "SoftmaxEndJob does not support activation as half while weights are floats.");
-                return new JobHandle();
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct SoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public SoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
-        }
-    }
-
-    internal partial struct LogSoftmaxEndJobHelper
-    {
-        public JobHandle ScheduleXSBO(BurstTensorData pinX, FencedMemoryAlloc pinS, FencedMemoryAlloc pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool WHalf = pinS.type == DataType.Half;
-            bool BHalf = pinB.type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
-            if (AHalf && WHalf)
-            {
-                var job = new LogSoftmaxEndJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && WHalf)
-            {
-                var job = new LogSoftmaxEndJob_ActAsFloat_WeightAsHalf();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else if (!AHalf && !WHalf)
-            {
-                var job = new LogSoftmaxEndJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else //if (AHalf && !WHalf)
-            {
-                UnityEngine.Assertions.Assert.IsTrue(false, "LogSoftmaxEndJob does not support activation as half while weights are floats.");
-                return new JobHandle();
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct LogSoftmaxEndJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public LogSoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
-        }
-    }
-
-    internal partial struct MaxPool2DJobHelper
-    {
-        public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new MaxPool2DJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new MaxPool2DJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct MaxPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public MaxPool2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.inChannels * sizeof(float);
-            float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                bool firstNotRejectedPixelInKernel = true;
-                // gather max results in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        float* dst = outputAccumulators;
-                        float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-
-                        int k = 0;
-                        if (firstNotRejectedPixelInKernel) // first pass, write-through
-                        {
-                            for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                                for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                    *dst = *src;
-                            for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                                *dst = *src;
-                        }
-                        else
-                        {
-                            for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                                for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                    *dst = (*dst) > (*src) ? (*dst) : (*src);
-                            for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                                *dst = (*dst) > (*src) ? (*dst) : (*src);
-                        }
-                        firstNotRejectedPixelInKernel = false;
-                    }
-                }
-
-                // safety net, if kernel was completely outside of X
-                // fill with padding_value (0) to avoid uninitialized memory
-                if (firstNotRejectedPixelInKernel)
-                    UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                { // write accumulators to memory
-                    int k = 0;
-                    float* src  = outputAccumulators;
-                    float* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    for (; k < data.inChannels - unrollSize + 1; k += unrollSize)  // unroll of inChannels loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++)
-                            *dst = *src;
-                    for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                        *dst = *src;
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    internal partial struct AvgPool2DJobHelper
-    {
-        public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            var pinX = Pin(X);
-            var pinO = Pin(O, uploadCache: false);
-            return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-        }
-        public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
-        {
-            bool AHalf = pinX.array.Type == DataType.Half;
-            bool OHalf = pinO.array.Type == DataType.Half;
-            UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
-            if (AHalf)
-            {
-                var job = new AvgPool2DJob_Full_Half();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-            else
-            {
-                var job = new AvgPool2DJob_Full_Float();
-                job.data = this;
-                return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
-            }
-        }
-    }
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct AvgPool2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public AvgPool2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.inChannels * sizeof(float);
-            float* outputAccumulators = (float*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                // reset accumulators & counter
-                int counter = 0;
-                UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                // gather sums in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        float* dst = outputAccumulators;
-                        float* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-
-                        int k = 0;
-                        for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                            for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                *dst += *src;
-                        for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                            *dst += *src;
-                        counter++;
-                    }
-                }
-
-                // safety net, if kernel was completely outside of X
-                counter = math.max(1, counter);
-
-                { // write accumulators to memory
-                    int k = 0;
-                    float invCounter = 1f / counter;
-                    float* src  = outputAccumulators;
-                    float* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    for (; k < data.inChannels - unrollSize + 1; k += unrollSize)  // unroll of inChannels loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++)
-                            *dst = (float)(*src * invCounter);
-                    for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                        *dst = (float)(*src * invCounter);
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    #endregion
-    #region Reduce jobs declaration for mode: _ActAsFloat_WeightAsHalf
-
-
-
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ExpBiasReduceJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public ExpBiasReduceJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float accum = 0.0f;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                float b = Bptr[y * data.offsetReduce + x];
-                accum += math.exp(v - b);
-            }
-            Optr[y * data.offsetReduce + x] = (float)accum;
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct SoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public SoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (float)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct LogSoftmaxEndJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
-        public LogSoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (float)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
-        }
-    }
-
-
-
-    #endregion
-    #region Reduce jobs declaration for mode: _Full_Half
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public ReduceMaxJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float maxV = float.MinValue;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                maxV = math.max(maxV, v);
-            }
-            Optr[y * data.offsetReduce + x] = (half)maxV;
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceSumJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public ReduceSumJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float sumV = 0;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                sumV += v;
-            }
-            Optr[y * data.offsetReduce + x] = (half)(sumV);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ReduceMeanJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public ReduceMeanJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float sumV = 0;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                sumV += v;
-            }
-            Optr[y * data.offsetReduce + x] = (half)(sumV / (float)data.reduceDim);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct ExpBiasReduceJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public ExpBiasReduceJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = i / data.offsetReduce;
-
-            float accum = 0.0f;
-            for (int z = 0; z < data.reduceDim; ++z)
-            {
-                float v = Xptr[y * data.offsetReduce * data.reduceDim + z * data.offsetReduce + x];
-                float b = Bptr[y * data.offsetReduce + x];
-                accum += math.exp(v - b);
-            }
-            Optr[y * data.offsetReduce + x] = (half)accum;
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct SoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public SoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (half)(math.exp(Xptr[i] - Bptr[z * data.offsetReduce + x]) / Sptr[z * data.offsetReduce + x]);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
-    unsafe struct LogSoftmaxEndJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
-        public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public LogSoftmaxEndJobHelper data;
-
-        public void Execute(int i)
-        {
-            int x = i % data.offsetReduce;
-            int y = ((i / data.offsetReduce) % data.reduceDim);
-            int z = ((i / data.offsetReduce) / data.reduceDim);
-
-            Optr[i] = (half)((Xptr[i] - Bptr[z * data.offsetReduce + x]) - math.log(Sptr[z * data.offsetReduce + x]));
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct MaxPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public MaxPool2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.inChannels * sizeof(half);
-            half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                bool firstNotRejectedPixelInKernel = true;
-                // gather max results in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        half* dst = outputAccumulators;
-                        half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-
-                        int k = 0;
-                        if (firstNotRejectedPixelInKernel) // first pass, write-through
-                        {
-                            for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                                for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                    *dst = *src;
-                            for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                                *dst = *src;
-                        }
-                        else
-                        {
-                            for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                                for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                    *dst = (*dst) > (*src) ? (*dst) : (*src);
-                            for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                                *dst = (*dst) > (*src) ? (*dst) : (*src);
-                        }
-                        firstNotRejectedPixelInKernel = false;
-                    }
-                }
-
-                // safety net, if kernel was completely outside of X
-                // fill with padding_value (0) to avoid uninitialized memory
-                if (firstNotRejectedPixelInKernel)
-                    UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                { // write accumulators to memory
-                    int k = 0;
-                    half* src  = outputAccumulators;
-                    half* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    for (; k < data.inChannels - unrollSize + 1; k += unrollSize)  // unroll of inChannels loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++)
-                            *dst = *src;
-                    for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                        *dst = *src;
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    [BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
-    unsafe struct AvgPool2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
-    {
-        public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
-        public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
-        public AvgPool2DJobHelper data;
-
-        const int unrollSize = 16;
-        public void Execute(int y)
-        {
-            int accumulatorMemSize = data.inChannels * sizeof(half);
-            half* outputAccumulators = (half*)UnsafeUtility.Malloc(accumulatorMemSize, JobsUtility.CacheLineSize, Allocator.TempJob);
-
-            for (int n = 0; n < data.outBatch; ++n)
-            for (int x = 0; x < data.outWidth; ++x)
-            {
-                // reset accumulators & counter
-                int counter = 0;
-                UnsafeUtility.MemClear(outputAccumulators, accumulatorMemSize);
-
-                // gather sums in accumulators
-                for (int dy = 0; dy < data.kernelHeight; ++dy)
-                {
-                    int readY = y * data.strideY + dy - data.padY;
-                    if (readY < 0) continue;
-                    if (readY >= data.inHeight) continue;
-
-                    for (int dx = 0; dx < data.kernelWidth; ++dx)
-                    {
-                        int readX = x * data.strideX + dx - data.padY;
-                        if (readX < 0) continue;
-                        if (readX >= data.inWidth) continue;
-
-                        half* dst = outputAccumulators;
-                        half* src = Xptr + n * data.inStrideN + readY * data.inStrideH + readX * data.inStrideW;
-
-                        int k = 0;
-                        for (; k < data.inChannels - unrollSize + 1; k += unrollSize) // unroll of inChannels loop
-                            for (int q = 0; q < unrollSize; q++, src++, dst++)
-                                *dst += *src;
-                        for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                            *dst += *src;
-                        counter++;
-                    }
-                }
-
-                // safety net, if kernel was completely outside of X
-                counter = math.max(1, counter);
-
-                { // write accumulators to memory
-                    int k = 0;
-                    float invCounter = 1f / counter;
-                    half* src  = outputAccumulators;
-                    half* dst  = Optr + n * data.outStrideN + y * data.outStrideH + x * data.outStrideW;
-                    for (; k < data.inChannels - unrollSize + 1; k += unrollSize)  // unroll of inChannels loop
-                        for (int q = 0; q < unrollSize; q++, src++, dst++)
-                            *dst = (half)(*src * invCounter);
-                    for (; k < data.inChannels; k++, src++, dst++) // remainder of inChannels loop
-                        *dst = (half)(*src * invCounter);
-                }
-            }
-
-            UnsafeUtility.Free(outputAccumulators, Allocator.TempJob);
-        }
-    }
-
-    #endregion
-}
-}
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Reduce.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.Reduce.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: f555ca3db5aa9674f9cdba4d5b715e79
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Jobs.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 1f9c24a13966b425fa5bfd1a4007c3f4
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.MatMul.gen.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.MatMul.gen.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.MatMul.gen.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.MatMul.gen.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: dd2cfd0651655b44ca226eb4f0b952aa
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Ops.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Ops.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Ops.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaBurstCPU.Ops.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 6bc05bfa1b9544e8a813df0c3eaab6b0
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaCompute.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaCompute.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaCompute.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaCompute.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: badd0d6a0383049eab2cb58e1d0d6fa9
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaComputeDebugUtils.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaComputeDebugUtils.cs
@@ -1,143 +0,0 @@
-using System.Diagnostics;
-using UnityEngine;
-using System.Runtime.InteropServices;
-
-namespace Unity.Barracuda {
-
-internal class ComputeDebugUtils
-{
-    /// <summary>
-    /// DEBUG ONLY: `debugKernels` allow to track out of bound read/write and assertion in kernels.
-    /// When set to true be sure to define KERNEL_ASSERTS or FORCE_DEBUG in the particular kernel(s)
-    /// you want to debug (see in DebugUtils.cginc).
-    /// Production code should not set this to 'true' as this will significantly degrade performances.
-    /// </summary>
-    public static bool debugKernels = false;
-
-    /// <summary>
-    /// DEBUG ONLY: if ComputeDebugUtils.debugKernels is true and debugger is attached, debugger will break when a kernel assertion is catch.
-    /// </summary>
-    public static bool breakOnAssertion = false;
-
-    //Keep in sync with DebugUtils.cginc KERNEL_ASSERT_CONTEXT defines
-    private enum KernelAssertContext
-    {
-        ReadOnlyTensor_Read = 0,
-        ReadWriteTensor_Read = 1,
-        ReadWriteTensor_Write = 2,
-        SharedTensor_Read = 3,
-        Assertion = 4,
-        AssertionWithValue = 5
-    }
-
-    static ComputeDebugUtils()
-    {
-        string[] args = System.Environment.GetCommandLineArgs ();
-        for (int i = 0; i < args.Length; i++) {
-            if (args [i] == "-barracuda-debug-gpu-kernels")
-            {
-                debugKernels = true;
-            }
-        }
-    }
-
-    [StructLayout(LayoutKind.Sequential, Pack = 1)]
-    public struct KernelAssertInfo
-    {
-        public KernelAssertInfo(uint[] data)
-        {
-            UnityEngine.Debug.Assert(numUintInKernelAssertInfo == data.Length);
-            UnityEngine.Debug.Assert(numUintInKernelAssertInfo == 8,
-                "Please change KernelAssertInfo constructor if altering the struct.");
-            lockValue = data[0];
-            lineNumber = data[1];
-            context = data[2];
-            index = data[3];
-            bufferSize = data[4];
-            debugValue = data[5];
-            padding1 = data[6];
-            padding2 = data[7];
-        }
-
-        public readonly uint lockValue;
-        public readonly uint lineNumber;
-        public readonly uint context;
-        public readonly uint index;
-        public readonly uint bufferSize;
-        public readonly uint debugValue;
-        public readonly uint padding1;
-        public readonly uint padding2;
-    }
-    private static readonly int numUintInKernelAssertInfo = Marshal.SizeOf(typeof(KernelAssertInfo))/sizeof(uint);
-
-    private static ComputeBuffer kernelDebugInfo = null;
-
-    private static void LogAssertion(KernelAssertInfo info, string kernelName)
-    {
-        if (info.lockValue != 0)
-        {
-            string source;
-            switch (info.context)
-            {
-                case (int) KernelAssertContext.ReadOnlyTensor_Read:
-                    source = $"Out of bound while Reading a ReadonlyTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
-                    break;
-                case (int) KernelAssertContext.ReadWriteTensor_Read:
-                    source = $"Out of bound while Reading a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
-                    break;
-                case (int) KernelAssertContext.ReadWriteTensor_Write:
-                    source = $"Out of bound while Writing to a ReadWriteTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
-                    break;
-                case (int) KernelAssertContext.SharedTensor_Read:
-                    source = $"Out of bound while Reading a SharedTensor of length {info.bufferSize} at index {info.index} (at Tensor.cginc line {info.lineNumber})";
-                    break;
-                case (int) KernelAssertContext.Assertion:
-                    source = $"Assertion at line {info.lineNumber}";
-                    break;
-                case (int) KernelAssertContext.AssertionWithValue:
-                    source = $"Assertion at line {info.lineNumber}, debug value is {info.debugValue}";
-                    break;
-                default:
-                    source = "Unknown error";
-                    break;
-            }
-
-            string message = $"{source} in kernel {kernelName}.";
-            D.LogError(message);
-
-            if (breakOnAssertion)
-            {
-                Debugger.Break();
-            }
-        }
-    }
-
-
-    public static void PrepareDispatch()
-    {
-        //Lazy alloc, will be released by GC.
-        if (debugKernels && kernelDebugInfo == null)
-        {
-            kernelDebugInfo = new ComputeBuffer(1, numUintInKernelAssertInfo*sizeof(uint));
-        }
-
-        if (debugKernels)
-        {
-            Shader.SetGlobalBuffer("KernelAssertInfoBuffer", kernelDebugInfo);
-            kernelDebugInfo.SetData(new uint[numUintInKernelAssertInfo]); //TODO use a kernel to zero out the buffer to avoid a extra sync.
-        }
-    }
-
-    public static void VerifyDispatch(string kernelName)
-    {
-        if (debugKernels)
-        {
-            UnityEngine.Debug.Assert(kernelDebugInfo != null);
-            var data = new uint[numUintInKernelAssertInfo];
-            kernelDebugInfo.GetData(data, 0, 0, numUintInKernelAssertInfo);
-            LogAssertion(new KernelAssertInfo(data), kernelName);
-        }
-    }
-}
-
-} // namespace Unity.Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaComputeDebugUtils.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaComputeDebugUtils.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 72797c6856a1f9642a53f0b22d65e5dc
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPixelShader.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPixelShader.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPixelShader.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPixelShader.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 1126b6ab4d825624a9135b0501f4d793
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPrecompiledCompute.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPrecompiledCompute.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPrecompiledCompute.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaPrecompiledCompute.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 5fea18c74a3be4c7680b4ee28cbe1a86
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCPU.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCPU.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCPU.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCPU.cs.meta
@@ -1,12 +0,0 @@
-fileFormatVersion: 2
-guid: e7398940fb81d45ee8e648e0b0f467f2
-timeCreated: 1503433373
-licenseType: Pro
-MonoImporter:
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCompute.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCompute.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCompute.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaReferenceCompute.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 3e48b2167ab1b453bb10a8fdac9dc531
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaUnsafeArrayCPU.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaUnsafeArrayCPU.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaUnsafeArrayCPU.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/BarracudaUnsafeArrayCPU.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: c077f9591cc6d4804bc89b66a2a67c0d
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOps.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOps.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOps.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOps.cs.meta
@@ -1,12 +0,0 @@
-fileFormatVersion: 2
-guid: 3d3848101f7774555899e75a86641621
-timeCreated: 1506427659
-licenseType: Pro
-MonoImporter:
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOpsUtils.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOpsUtils.cs
@@ -1,93 +0,0 @@
-namespace Unity.Barracuda {
-
-    /// <summary>
-    /// `CompareOps` utilities
-    /// </summary>
-public class CompareOpsUtils
-{
-    /// <summary>
-    /// `CompareOps` log level enum
-    /// </summary>
-    public enum LogLevel
-    {
-        /// <summary>
-        /// Warning
-        /// </summary>
-        Warning,
-
-        /// <summary>
-        /// Error
-        /// </summary>
-        Error
-    }
-
-    static internal void CheckSame(Tensor X, Tensor Y, Layer.Type type, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
-    {
-        CheckSame(X, Y, type.ToString(), logLevel, epsilon, inputs);
-    }
-
-    static internal void CheckSame(Tensor X, Tensor Y, string opName, LogLevel logLevel, float epsilon=0.0001f, params Tensor[] inputs)
-    {
-        if (!X.Approximately(Y, epsilon))
-        {
-            if (logLevel == LogLevel.Error)
-            {
-                string mainLogMessage = $"Tensors not equal after {opName}, epsilon {epsilon}";
-                D.LogError(mainLogMessage);
-            }
-            else
-            {
-                string mainLogMessage = $"Tensors not equal after {opName} max error: {X.MaxDifference(Y)}";
-                D.LogWarning(mainLogMessage);
-
-                D.Log("First: " + X.shape);
-                D.Log("Second:" + Y.shape);
-
-                X.PrintDataPart(X.channels * X.width * 2);
-                Y.PrintDataPart(Y.channels * Y.width * 2);
-
-                for (var i = 0; i < inputs.Length; i++)
-                {
-                    inputs[i].PrintDataPart(32, "input_" + i);
-                }
-            }
-
-
-        }
-        if (X.tensorOnDevice != Y.tensorOnDevice)
-            Y.Dispose();
-    }
-
-    static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, Layer.Type type, LogLevel logLevel)
-    {
-        return CheckApproximately(X, Y, count, epsilon, type.ToString(), logLevel);
-    }
-
-    static internal bool CheckApproximately(Tensor X, Tensor Y, int count, float epsilon, string opName, LogLevel logLevel)
-    {
-        if (!X.Approximately(Y, epsilon, count))
-        {
-            string mainLogMessage = $"Tensors not equal after {opName}";
-            if (logLevel == LogLevel.Error)
-                D.LogError(mainLogMessage);
-            else
-                D.LogWarning(mainLogMessage);
-
-            D.Log("First: " + X.shape);
-            D.Log("Second:" + Y.shape);
-
-            if (count < 0)
-                count = X.channels * X.width * 2;
-            X.PrintDataPart(count);
-            Y.PrintDataPart(count);
-            return false;
-        }
-        if (X.tensorOnDevice != Y.tensorOnDevice)
-            Y.Dispose();
-
-        return true;
-    }
-}
-
-
-} // namespace Unity.Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOpsUtils.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/CompareOpsUtils.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 5e3e5424b979b5c43997409257895b6b
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeInfo.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeInfo.cs
@@ -1,132 +0,0 @@
-using UnityEngine;
-using UnityEngine.Rendering;
-
-namespace Unity.Barracuda
-{
-    /// <summary>
-    /// GPU compute info
-    /// </summary>
-    public class ComputeInfo
-    {
-        /// <summary>
-        /// Channel order enum
-        /// </summary>
-        public enum ChannelsOrder
-        {
-            /// <summary>
-            /// Channels last
-            /// </summary>
-            NHWC,
-
-            /// <summary>
-            /// Channels first
-            /// </summary>
-            NCHW
-        }
-
-        /// <summary>
-        /// GPU supports shared memory
-        /// </summary>
-        public static bool supportsComputeSharedMemory = true;
-
-        /// <summary>
-        /// GPU supports Dense 32x32 kernels
-        /// </summary>
-        public static bool supportsDense32x32 = true;
-
-        /// <summary>
-        /// GPU supports Dense 64x64 kernels
-        /// </summary>
-        public static bool supportsDense64x64 = true;
-
-        /// <summary>
-        /// GPU supports compute
-        /// </summary>
-        public static bool supportsCompute = true;
-
-        /// <summary>
-        /// Max compute work group size supported by GPU
-        /// </summary>
-        public static uint maxComputeWorkGroupSize = 1024;
-
-        /// <summary>
-        /// GPU vendor
-        /// </summary>
-        public static string graphicsDeviceVendor = "";
-
-        /// <summary>
-        /// Helper for hardware selection
-        /// </summary>
-        public static bool IsMobileGPU() { return
-            (Application.platform == RuntimePlatform.Android) ||
-            (Application.platform == RuntimePlatform.IPhonePlayer) ||
-            graphicsDeviceVendor.Contains("Intel");
-        }
-        public static bool IsiPhoneGPU() { return
-            (Application.platform == RuntimePlatform.IPhonePlayer);
-        }
-        public static bool IsQualcommGPU() { return
-            (Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("Qualcomm");
-        }
-        public static bool IsARMGPU() { return
-            (Application.platform == RuntimePlatform.Android) && graphicsDeviceVendor.Contains("ARM");
-        }
-
-        /// <summary>
-        /// EXPERIMENTAL: Select Channel order of the compute backends.
-        /// Production code should stick to default (NHWC) for now.
-        /// </summary>
-        public static ChannelsOrder channelsOrder = ChannelsOrder.NHWC;
-
-        /// <summary>
-        /// Static constructor, initializes and caches data
-        /// </summary>
-        static ComputeInfo()
-        {
-            string[] args = System.Environment.GetCommandLineArgs ();
-            for (int i = 0; i < args.Length; i++) {
-                if (args [i] == "-barracuda-compute-use-nchw")
-                {
-                    channelsOrder = ChannelsOrder.NCHW;
-                }
-            }
-
-            supportsCompute = SystemInfo.supportsComputeShaders;
-
-            graphicsDeviceVendor = SystemInfo.graphicsDeviceVendor;
-
-            // TODO switch to SystemInfo.maxComputeWorkGroupSize when we bump min spec to 2019.3
-            if (Application.platform == RuntimePlatform.Android)
-            {
-                maxComputeWorkGroupSize = (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan) ? 256u : 128u;
-
-                var gpuName = SystemInfo.graphicsDeviceName ?? "";
-                var osName = SystemInfo.operatingSystem ?? "";
-
-                // Known issue with Adreno Vulkan drivers on Android 8.x
-                if (gpuName.Contains("Adreno") && osName.StartsWith("Android OS 8") &&
-                    SystemInfo.graphicsDeviceType == GraphicsDeviceType.Vulkan)
-                    maxComputeWorkGroupSize = 128u;
-            }
-            else if (Application.platform == RuntimePlatform.IPhonePlayer || Application.platform == RuntimePlatform.tvOS)
-            {
-                var gpuName = SystemInfo.graphicsDeviceName;
-                if (gpuName != null && gpuName.StartsWith("Apple A"))
-                {
-                    int gpuNumber = 0, idx = "Apple A".Length;
-                    while (idx < gpuName.Length && '0' <= gpuName[idx] && gpuName[idx] <= '9')
-                    {
-                        gpuNumber = gpuNumber * 10 + gpuName[idx++] - '0';
-                    }
-
-                    // TODO check on lower end iOS devices
-                    maxComputeWorkGroupSize = (gpuNumber <= 10) ? 224u : 256u;
-                }
-                else
-                {
-                    maxComputeWorkGroupSize = 256u;
-                }
-            }
-        }
-}
-}
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeInfo.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeInfo.cs.meta
@@ -1,3 +0,0 @@
-fileFormatVersion: 2
-guid: 96aee99fc4154e2a991ac0edd6056c2b
-timeCreated: 1558541124
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeShaderSingleton.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeShaderSingleton.cs
@@ -1,404 +0,0 @@
-using System.Collections;
-using System.Collections.Generic;
-using System.Linq;
-using UnityEngine;
-using UnityEngine.Profiling;
-
-namespace Unity.Barracuda
-{
-
-    internal enum ComputeShaderContext
-    {
-        Reference,
-        Optimized
-    }
-
-    /// <summary>
-    /// Stores compute kernel cache for GPU compute backends
-    /// </summary>
-    public sealed class ComputeShaderSingleton
-    {
-        /// <summary>
-        /// Enable kernel usage tracking
-        /// </summary>
-        public bool EnableDebug = false;
-
-        private static readonly ComputeShaderSingleton instance = new ComputeShaderSingleton ();
-
-        // Maps kernel name -> shader name
-        private Dictionary<string, string> mKernelToShaderName = new Dictionary<string, string>();
-
-        // Maps shader name -> ComputeShader
-        private Dictionary<string, ComputeShader> mShaderNameToComputeShader = new Dictionary<string, ComputeShader>();
-
-        private HashSet<string> mUsedOptimizedKernels = new HashSet<string>();
-        private HashSet<string> mUsedReferenceKernels = new HashSet<string>();
-
-        private ComputeShaderSingleton()
-        {
-            RegisterKernels("Barracuda/TextureUtils",
-                new[] {"TextureToTensor", "TensorToTextureNoLUT", "TensorToTexture3DLUT"});
-
-            RegisterKernels("Barracuda/ActivationA",
-                new[]
-                {
-                    "Relu_Flat", "Relu_FlatStrict", "Relu_Loop", "Relu6_Flat", "Relu6_FlatStrict", "Relu6_Loop",
-                    "Tanh_Flat", "Tanh_FlatStrict", "Tanh_Loop", "Swish_Flat", "Swish_FlatStrict", "Swish_Loop",
-                    "Sigmoid_Flat", "Sigmoid_FlatStrict", "Sigmoid_Loop", "LeakyRelu_Flat", "LeakyRelu_FlatStrict",
-                    "LeakyRelu_Loop", "Clip_Flat", "Clip_FlatStrict", "Clip_Loop", "PRelu_Flat", "PRelu_Loop"
-                });
-
-            RegisterKernels("Barracuda/ActivationB",
-                new[]
-                {
-                    "Reciprocal_Flat", "Reciprocal_FlatStrict", "Reciprocal_Loop", "Sqrt_Flat", "Sqrt_FlatStrict",
-                    "Sqrt_Loop", "HardSigmoid_Flat", "HardSigmoid_FlatStrict", "HardSigmoid_Loop"
-                });
-
-            RegisterKernels("Barracuda/ActivationBase",
-                new string[]
-                {
-                    "Abs_Flat", "Abs_FlatStrict", "Abs_Loop", "Neg_Flat", "Neg_FlatStrict", "Neg_Loop", "Ceil_Flat",
-                    "Ceil_FlatStrict", "Ceil_Loop", "Floor_Flat", "Floor_FlatStrict", "Floor_Loop",
-                    "Round_Flat", "Round_FlatStrict", "Round_Loop", "Selu_Flat",
-                    "Selu_FlatStrict", "Selu_Loop", "Softplus_Flat", "Softplus_FlatStrict", "Softplus_Loop", "Elu_Flat",
-                    "Elu_FlatStrict", "Elu_Loop", "Exp_Flat", "Exp_FlatStrict", "Exp_Loop", "Log_Flat",
-                    "Log_FlatStrict", "Log_Loop", "Pow_Flat", "Pow_FlatStrict", "Pow_Loop", "LogicalNot_Flat",
-                    "LogicalNot_FlatStrict", "LogicalNot_Loop",  "Sign_Flat", "Sign_FlatStrict", "Sign_Loop",
-                    "Acos_Flat", "Acos_FlatStrict", "Acos_Loop",
-                    "Acosh_Flat", "Acosh_FlatStrict", "Acosh_Loop", "Asin_Flat", "Asin_FlatStrict", "Asin_Loop",
-                    "Asinh_Flat", "Asinh_FlatStrict", "Asinh_Loop", "Atan_Flat", "Atan_FlatStrict", "Atan_Loop",
-                    "Atanh_Flat", "Atanh_FlatStrict", "Atanh_Loop", "Cos_Flat", "Cos_FlatStrict", "Cos_Loop",
-                    "Cosh_Flat", "Cosh_FlatStrict", "Cosh_Loop", "Sin_Flat", "Sin_FlatStrict", "Sin_Loop", "Sinh_Flat",
-                    "Sinh_FlatStrict", "Sinh_Loop", "Tan_Flat", "Tan_FlatStrict", "Tan_Loop", "Erf_Flat", "Erf_FlatStrict", "Erf_Loop",
-                    "Relu_NHWC", "Relu_NCHW", "Relu_CNyx_NHWC", "Relu_Nyxc_NHWC", "Relu6_NHWC", "Relu6_NCHW", "Relu6_CNyx_NHWC",
-                    "Relu6_Nyxc_NHWC", "PRelu_NHWC", "PRelu_NCHW", "PRelu_CNyx2_NHWC", "Selu_NHWC", "Selu_NCHW",
-                    "Selu_CNyx_NHWC", "Selu_Nyxc_NHWC", "Tanh_NHWC", "Tanh_NCHW", "Tanh_CNyx_NHWC", "Tanh_Nyxc_NHWC",
-                    "Swish_NHWC", "Swish_NCHW", "Swish_CNyx_NHWC", "Swish_Nyxc_NHWC", "Softplus_NHWC", "Softplus_NCHW",
-                    "Softplus_CNyx_NHWC", "Softplus_Nyxc_NHWC", "Sigmoid_NHWC", "Sigmoid_NCHW", "Sigmoid_CNyx_NHWC",
-                    "Sigmoid_Nyxc_NHWC", "HardSigmoid_NHWC", "HardSigmoid_NCHW", "HardSigmoid_CNyx_NHWC", "HardSigmoid_Nyxc_NHWC",
-                    "Elu_NHWC", "Elu_NCHW", "Elu_CNyx_NHWC", "Elu_Nyxc_NHWC", "LeakyRelu_NHWC",
-                    "LeakyRelu_NCHW", "LeakyRelu_CNyx_NHWC", "LeakyRelu_Nyxc_NHWC", "Exp_NHWC", "Exp_NCHW",
-                    "Exp_CNyx_NHWC", "Exp_Nyxc_NHWC", "Log_NHWC", "Log_NCHW", "Log_CNyx_NHWC", "Log_Nyxc_NHWC",
-                    "Sqrt_NHWC", "Sqrt_NCHW", "Sqrt_CNyx_NHWC", "Sqrt_Nyxc_NHWC", "Pow_NHWC", "Pow_NCHW",
-                    "Pow_CNyx_NHWC", "Pow_Nyxc_NHWC",
-                    "Clip_NHWC", "Clip_NCHW", "Clip_CNyx_NHWC", "Clip_Nyxc_NHWC", "Acos_NHWC",
-                    "Acos_NCHW", "Acos_CNyx_NHWC", "Acos_Nyxc_NHWC", "Acosh_NHWC", "Acosh_NCHW", "Acosh_CNyx_NHWC",
-                    "Acosh_Nyxc_NHWC", "Asin_NHWC", "Asin_NCHW", "Asin_CNyx_NHWC", "Asin_Nyxc_NHWC", "Asinh_NHWC",
-                    "Asinh_NCHW", "Asinh_CNyx_NHWC", "Asinh_Nyxc_NHWC", "Atan_NHWC", "Atan_NCHW", "Atan_CNyx_NHWC",
-                    "Atan_Nyxc_NHWC", "Atanh_NHWC", "Atanh_NCHW", "Atanh_CNyx_NHWC", "Atanh_Nyxc_NHWC", "Cos_NHWC",
-                    "Cos_NCHW", "Cos_CNyx_NHWC", "Cos_Nyxc_NHWC", "Cosh_NHWC", "Cosh_NCHW", "Cosh_CNyx_NHWC",
-                    "Cosh_Nyxc_NHWC", "Sin_NHWC", "Sin_NCHW", "Sin_CNyx_NHWC", "Sin_Nyxc_NHWC", "Sinh_NHWC",
-                    "Sinh_NCHW", "Sinh_CNyx_NHWC", "Sinh_Nyxc_NHWC", "Tan_NHWC", "Tan_NCHW", "Tan_CNyx_NHWC",
-                    "Tan_Nyxc_NHWC", "Erf_NHWC", "Erf_NCHW", "Erf_CNyx_NHWC", "Erf_Nyxc_NHWC"
-                });
-
-            RegisterKernels("Barracuda/Broadcast_NHWC",
-                new[]
-                {
-                    "BroadcastAdd_NHWC", "BroadcastSub_NHWC", "BroadcastMul_NHWC", "BroadcastDiv_NHWC",
-                    "BroadcastPow_NHWC", "BroadcastMin_NHWC", "BroadcastMax_NHWC", "BroadcastMean_NHWC",
-                    "BroadcastGreater_NHWC", "BroadcastGreaterEqual_NHWC", "BroadcastLess_NHWC",
-                    "BroadcastLessEqual_NHWC", "BroadcastEqual_NHWC", "BroadcastLogicalOr_NHWC",
-                    "BroadcastLogicalAnd_NHWC", "BroadcastLogicalXor_NHWC", "BroadcastWhere_NHWC",
-                    "BroadcastDivExpSub_NHWC", "LogSoftmaxEnd_NHWC"
-                });
-
-            RegisterKernels("Barracuda/Broadcast_NCHW",
-                new[]
-                {
-                    "BroadcastAdd_NCHW", "BroadcastSub_NCHW", "BroadcastMul_NCHW", "BroadcastDiv_NCHW",
-                    "BroadcastPow_NCHW", "BroadcastMin_NCHW", "BroadcastMax_NCHW", "BroadcastMean_NCHW",
-                    "BroadcastGreater_NCHW", "BroadcastGreaterEqual_NCHW", "BroadcastLess_NCHW",
-                    "BroadcastLessEqual_NCHW", "BroadcastEqual_NCHW", "BroadcastLogicalOr_NCHW",
-                    "BroadcastLogicalAnd_NCHW", "BroadcastLogicalXor_NCHW", "BroadcastWhere_NCHW",
-                    "BroadcastDivExpSub_NCHW", "LogSoftmaxEnd_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Conv2dA_NHWC",
-                new[]
-                {
-                    "Conv2D_NHWC", "Conv2D_RegisterBlock4x2_NHWC", "DepthwiseConv2D_NHWC",
-                    "Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NHWC", "Conv2DKernelKxK_T16x16_R4x4_NHWC",
-                    "Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NHWC"
-                });
-
-            RegisterKernels("Barracuda/Conv2dA_NCHW",
-                new[]
-                {
-                    "Conv2D_NCHW", "Conv2D_RegisterBlock4x2_NCHW", "DepthwiseConv2D_NCHW",
-                    "Conv2DKernelKxK_StrictC16K64_T16x16_R4x4_NCHW", "Conv2DKernelKxK_T16x16_R4x4_NCHW",
-                    "Conv2DKernel1x1_StrictC16K64_T16x16_R4x4_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Conv2dBase",
-                new[]
-                {
-                    "Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NHWC",
-                    "Conv2DKernelKxK_StrictC16StrictK64_T8x8_R8x8_NCHW",
-                    "Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NHWC", "Conv2DKernelKxK_StrictC16LaxK64_T8x8_R8x8_NCHW",
-                    "Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NHWC",
-                    "Conv2DKernelKxK_StrictC4StrictK16_T2x32_R8x8_NCHW",
-                    "Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_LaxC4StrictK16_T2x32_R8x8_NCHW",
-                    "Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NHWC", "Conv2DKernelKxK_StrictC4LaxK16_T2x32_R8x8_NCHW",
-                    "Conv2DTrans_NHWC", "Conv2DTrans_NCHW", "Conv2DTrans_KernelCached_K5x5_T16x16_NHWC",
-                    "Conv2DTrans_KernelCached_K5x5_T16x16_NCHW", "Conv2DTransFlipKernel", "Conv2DTransPadFill_NHWC",
-                    "Conv2DTransPadFill_NCHW", "KernelWinograd_3x3",
-                    "Conv2DWinograd_2x2_Kernel3x3_StrictC8StrictK16_T16x16_R4x4_NCHW",
-                    "Conv2DWinograd_2x2_Kernel3x3_StrictC8LaxK16_T16x16_R4x4_NCHW"
-                });
-            RegisterKernels("Barracuda/Conv2dMobile",
-                new[]
-                {
-                    //"Conv2D_Default_T8x8_R4x4_NHWC",
-                    //"Conv2D_Default_T8x8_R4x4_NHWC",
-                    "Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
-                    "Conv2D_Winograd_2x2_Kernel3x3_LDS_NHWC",
-                    //"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
-                    //"Conv2D_Winograd_2x2_Kernel3x3_NHWC",
-                    //"Conv2D_Kernel1x1_1x4x4_NHWC",
-                    //"Conv2D_Kernel1x1_1x4x4_NCHW",
-                    "Conv2D_KernelKxK_T16x16_R4x4_NHWC",
-                    "Conv2D_KernelKxK_T16x16_R4x4_NCHW",
-                    "Conv2D_Kernel1x1_T16x16_R4x4_NHWC",
-                    "Conv2D_Kernel1x1_T16x16_R4x4_NCHW",
-                    "Conv2D_KernelKxK_T8x8_R4x4_NHWC",
-                    "Conv2D_KernelKxK_T8x8_R4x4_NCHW",
-                    "Conv2D_Kernel1x1_T8x8_R4x4_NHWC",
-                    "Conv2D_Kernel1x1_T8x8_R4x4_NCHW", 
-                    "DepthwiseConv2D_Default_NHWC",
-                    "DepthwiseConv2D_Default_NCHW",
-                    "DepthwiseConv2D_Winograd_2x2_Kernel3x3_NHWC",
-                    "DepthwiseConv2D_Winograd_2x2_Kernel3x3_NCHW",
-                    //"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NHWC",
-                    //"DepthwiseConv2D_Winograd_2x2_Kernel5x5_NCHW",
-                    //"KernelWinograd_5x5"
-                });
-
-            RegisterKernels("Barracuda/Conv3d",
-                new[]
-                {
-                    "Conv3D_NHWC", "Conv3D_NCHW", "Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NHWC",
-                    "Conv3DKernelKxK_LaxC8LaxK32_T8x16_R4x4_NCHW", "Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NHWC",
-                    "Conv3DKernelKxK_StrictC8LaxK32_T8x16_R4x4_NCHW",
-                    "Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NHWC",
-                    "Conv3DKernelKxK_StrictC8StrictK32_T8x16_R4x4_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Dense",
-                new[]
-                {
-                    "Dense_L1Cached64", "DenseTiled16x16", "DenseTiled32x32", "DenseTiled64x64", "Dense_T8x8_R4x4",
-                    "Dense_T16x16_R4x4", "Dense_Tilled2x2_Cached", "Dense_Tilled4x4_Cached", "MatMulPackB0Bias",
-                    "Dense_V_L1Cached64"
-                });
-
-            RegisterKernels("Barracuda/MatMul",
-                new[]
-                {
-                    "MultidimMatMul_T16x16_R4x4_AR3_BR2_NHWC", "MultidimMatMul_T16x16_R4x4_AR3_BR2_NCHW",
-                    "MultidimMatMul_T8x8_R8x8_AR3_BR2_NHWC", "MultidimMatMul_T8x8_R8x8_AR3_BR2_NCHW",
-                    "MultidimMatMul_L1Cached64_AR3_BR2_NHWC", "MultidimMatMul_L1Cached64_AR3_BR2_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Dense3",
-                new[]
-                {
-                    "Dense3_T8x8_R8x8_NHWC", "Dense3_T8x8_R8x8_NCHW",
-                    "Dense3_T8x16_R4x4_NHWC", "Dense3_T8x16_R4x4_NCHW",
-                    "Dense3_L1Cached64_NHWC", "Dense3_L1Cached64_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Generic",
-                new[]
-                {
-                    "ScaleBias_NHWC", "ScaleBias_NCHW", "ScaleBias_CNyx_NHWC", "ScaleBias_CNyx2_NHWC",
-                    "ScaleBias_Flat_NHWC", "ScaleBias_Flat_NCHW", "ScaleBias_Loop_NHWC", "ScaleBias_Loop_NCHW",
-                    "InstanceNormTail_CNyx2_NHWC", "InstanceNormTail_Flat_NHWC", "InstanceNormTail_Flat_NCHW",
-                    "InstanceNormTail_Loop_NHWC", "InstanceNormTail_Loop_NCHW", "Upsample2D_NHWC", "Upsample2D_NCHW",
-                    "UpsampleBilinear2D_NHWC", "UpsampleBilinear2D_NCHW", "UpsampleBilinear2D_2x2_NHWC",
-                    "UpsampleBilinear2D_2x2_NCHW", "Copy_NHWC", "Copy_NCHW", "ReshapeFromNHWCModel_Flat_NCHW",
-                    "ReshapeFromNHWCModel_Loop_NCHW", "TransposeToChannelFirst"
-                });
-
-            RegisterKernels("Barracuda/Pad",
-                new[]
-                {
-                    "Border2D_NHWC", "Border2D_NCHW", "Pad2DEdge_NHWC", "Pad2DEdge_NCHW", "Pad2DReflect_NHWC",
-                    "Pad2DReflect_NCHW", "Pad2DSymmetric_NHWC", "Pad2DSymmetric_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Transpose",
-                new[]
-                {
-                    "Transpose2D_NHWC","Transpose2D_NCHW","Transpose_NHWC","Transpose_NCHW","Transpose8D"
-                });
-
-            RegisterKernels("Barracuda/Pool_NHWC",
-                new[]
-                {
-                    "AvgPool2D_NHWC", "MaxPool2D_NHWC", "AvgPool2DReduce_NHWC", "MaxPool2DReduce_NHWC",
-                    "GlobalAvgPool2D_NHWC", "GlobalMaxPool2D_NHWC", "AvgVariancePool2DReduce_NHWC",
-                    "GlobalAvgVariancePool2D_NHWC"
-                });
-
-            RegisterKernels("Barracuda/Pool_NCHW",
-                new[]
-                {
-                    "AvgPool2D_NCHW", "MaxPool2D_NCHW", "AvgPool2DReduce_NCHW", "MaxPool2DReduce_NCHW",
-                    "GlobalAvgPool2D_NCHW", "GlobalMaxPool2D_NCHW", "AvgVariancePool2DReduce_NCHW",
-                    "GlobalAvgVariancePool2D_NCHW"
-                });
-
-            RegisterKernels("Barracuda/Reduce",
-                new[]
-                {
-                    "PartialReduceMin", "PartialReduceMin_Loop",
-                    "GlobalReduceMin", "GlobalReduceMin_Loop",
-
-                    "PartialReduceMax", "PartialReduceMax_Loop",
-                    "GlobalReduceMax", "GlobalReduceMax_Loop",
-
-                    "PartialReduceSum", "PartialReduceSum_Loop",
-                    "GlobalReduceSum", "GlobalReduceSum_Loop",
-
-                    "PartialReduceMean", "PartialReduceMean_Loop",
-                    "GlobalReduceMean", "GlobalReduceMean_Loop",
-
-                    "PartialReduceProd", "PartialReduceProd_Loop",
-                    "GlobalReduceProd", "GlobalReduceProd_Loop",
-
-                    "PartialReduceExpBias", "PartialReduceExpBias_Loop",
-                    "GlobalReduceExpBias", "GlobalReduceExpBias_Loop"
-                });
-            RegisterKernels("Barracuda/ReduceSlow",
-                new[]
-                {
-                     "ArgMax_NHWC", "ArgMax_NCHW", "ArgMin_NHWC", "ArgMin_NCHW"
-                });
-        }
-
-        private void RegisterKernels(string shaderName, string[] kernels)
-        {
-            foreach (var kernel in kernels)
-            {
-                mKernelToShaderName[kernel] = shaderName;
-            }
-        }
-
-        internal ComputeShader FindComputeShader(ComputeShaderContext ctx, string kernelName)
-        {
-            if (ctx == ComputeShaderContext.Optimized)
-                return FindOptimizedComputeShader(kernelName);
-
-            return FindReferenceComputeShader(kernelName);
-        }
-
-        private ComputeShader FindReferenceComputeShader(string kernelName)
-        {
-            if (EnableDebug) mUsedReferenceKernels.Add(kernelName);
-
-            return FindComputeShader("Barracuda/BarracudaReferenceImpl");
-        }
-
-        private ComputeShader FindOptimizedComputeShader(string kernelName)
-        {
-            string shaderName = null;
-            mKernelToShaderName.TryGetValue(kernelName, out shaderName);
-
-            // Kernel not found
-            if (shaderName == null)
-                return null;
-
-            if (EnableDebug) mUsedOptimizedKernels.Add(kernelName);
-
-            return FindComputeShader(shaderName);
-        }
-
-        private ComputeShader FindComputeShader(string shaderName)
-        {
-            if (!mShaderNameToComputeShader.ContainsKey(shaderName))
-            {
-                Profiler.BeginSample(shaderName);
-                mShaderNameToComputeShader[shaderName] = Resources.Load<ComputeShader>(shaderName);
-                Profiler.EndSample();
-            }
-
-            return mShaderNameToComputeShader[shaderName];
-        }
-
-        /// <summary>
-        /// Warmup reference kernels
-        /// </summary>
-        /// <param name="kernels">list of kernels to warm up</param>
-        /// <returns>IEnumerator</returns>
-        public IEnumerator WarmupReferenceKernels(List<string> kernels)
-        {
-            if (kernels?.Count > 0)
-                FindComputeShader("Barracuda/BarracudaReferenceImpl");
-
-            yield break;
-        }
-
-        /// <summary>
-        /// Warmup optimized kernels
-        /// </summary>
-        /// <param name="kernels">list of kernels to warm up</param>
-        /// <returns>IEnumerator</returns>
-        public IEnumerator WarmupOptimizedKernels(List<string> kernels)
-        {
-            foreach (var kernel in kernels)
-            {
-                var shader = mKernelToShaderName[kernel];
-                if (!mShaderNameToComputeShader.ContainsKey(shader))
-                {
-                    FindComputeShader(shader);
-                    yield return null;
-                }
-            }
-            yield break;
-        }
-
-        /// <summary>
-        /// Get used reference kernels list
-        /// </summary>
-        /// <returns>list of kernels</returns>
-        public List<string> GetUsedReferenceKernels()
-        {
-            if (!EnableDebug)
-            {
-                D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
-                return null;
-            }
-
-            return mUsedReferenceKernels.ToList();
-        }
-
-        /// <summary>
-        /// Get used optimized kernels list
-        /// </summary>
-        /// <returns>list of kernels</returns>
-        public List<string> GetUsedOptimizedKernels()
-        {
-            if (!EnableDebug)
-            {
-                D.LogWarning("List of used kernels was requested while ComputeShaderSingleton.EnableDebug == false");
-                return null;
-            }
-
-            return mUsedOptimizedKernels.ToList();
-        }
-
-        /// <summary>
-        /// Singleton
-        /// </summary>
-        public static ComputeShaderSingleton Instance {
-            get { return instance; }
-        }
-
-        /// <summary>
-        /// Check if GPU compute is supported
-        /// </summary>
-        public bool supported { get { return SystemInfo.supportsComputeShaders; } }
-    }
-}
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeShaderSingleton.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ComputeShaderSingleton.cs.meta
@@ -1,12 +0,0 @@
-fileFormatVersion: 2
-guid: 815b6432da283415d87dabe9ef715cd9
-timeCreated: 1495620775
-licenseType: Pro
-MonoImporter:
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/GenericWorker.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/GenericWorker.cs
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/GenericWorker.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/GenericWorker.cs.meta
@@ -1,12 +0,0 @@
-fileFormatVersion: 2
-guid: f7473266805a8439287433d3dac88945
-timeCreated: 1506427659
-licenseType: Pro
-MonoImporter:
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/LayerFusingHelper.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/LayerFusingHelper.cs
@@ -1,758 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq; // ToArray(), ToDictionary()
-
-namespace Unity.Barracuda
-{
-    internal class LinearLayerFusing
-    {
-        public static bool IsLayerLinear(Layer layer, Dictionary<string, Layer> constantLayers)
-        {
-            var constInputs = layer.inputs.Count(x => constantLayers.ContainsKey(x));
-            bool allConstInputsButOne = (layer.inputs.Length - constInputs) == 1;
-
-            return layer.type == Layer.Type.Dense ||
-                   layer.type == Layer.Type.Conv2D || //TODO Conv3D
-                   layer.type == Layer.Type.DepthwiseConv2D ||
-                   layer.type == Layer.Type.ScaleBias ||
-                   IsLayerLinearMathOp(layer) && allConstInputsButOne;
-        }
-
-        public static bool IsLayerLinearMathOp(Layer layer)
-        {
-            return layer.type == Layer.Type.Add ||
-                   layer.type == Layer.Type.Mul;
-        }
-
-        public bool AreLayersFusable(Layer l0, Layer l1)
-        {
-            bool conditions = true;
-            if ((l0.type == Layer.Type.DepthwiseConv2D) || (l0.type == Layer.Type.Conv2D) || (l0.type == Layer.Type.ScaleBias) &&
-                (l1.type == Layer.Type.Conv2D) || (l1.type == Layer.Type.DepthwiseConv2D))
-                conditions = conditions && !l1.pad.Any(x => x != 0); // padding breaks bias merging for non-zero bias
-            if (IsLayerLinearMathOp(l0) && (l1.type == Layer.Type.Conv2D))
-            {
-                if (l0.datasets == null || l0.datasets.Length != 1)
-                    return false;
-                conditions = conditions && (l0.datasets[0].shape.length == 1) ||
-                    (l0.datasets[0].shape.batch == 1 && l0.datasets[0].shape.height == 1 && l0.datasets[0].shape.width == 1 && l0.datasets[0].shape.channels == l1.datasets[0].shape.kernelCount);
-            }
-            if ((l0.type == Layer.Type.Conv2D) && IsLayerLinearMathOp(l1))
-            {
-                if (l1.datasets == null || l1.datasets.Length != 1)
-                    return false;
-                conditions = conditions && (l1.datasets[0].shape.length == 1) ||
-                    (l1.datasets[0].shape.batch == 1 && l1.datasets[0].shape.height == 1 && l1.datasets[0].shape.width == 1 && l1.datasets[0].shape.channels == l0.datasets[0].shape.kernelCount);
-            }
-
-            return m_LayerFusers.ContainsKey((l0.type, l1.type)) && conditions;
-        }
-
-        private readonly BurstCPUOps m_Ops = new BurstCPUOps();
-
-        private readonly Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>> m_LayerFusers =
-            new Dictionary<(Layer.Type, Layer.Type), Func<Layer, Layer, Layer>>();
-
-        private void Add((Layer.Type, Layer.Type) layersType, Func<Layer, Layer, Layer> opFuseAction)
-        {
-            m_LayerFusers.Add(layersType, opFuseAction);
-        }
-        public LinearLayerFusing()
-        {
-            Add((Layer.Type.Add, Layer.Type.Add), (l0, l1) =>
-            {
-                Tensor bias0 = l0.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(0);
-
-                int rankO = Math.Max(bias0.dimensions, bias1.dimensions);
-                if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
-                {
-                    // broadcast rule
-                    int rank0 = l0.axis;
-                    List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias0.shape, rank0);
-                    rank0 = Math.Max(rank0, 1);
-                    int rank1 = l1.axis;
-                    List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(bias1.shape, rank1);
-                    rank1 = Math.Max(rank1, 1);
-
-                    rankO = Math.Max(rank0, rank1);
-                    for (int k = 0; k < rankO - rank0; k++)
-                        shape0.Insert(0, 1);
-                    for (int k = 0; k < rankO - rank1; k++)
-                        shape1.Insert(0, 1);
-
-                    bias0 = bias0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
-                    bias1 = bias1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
-                }
-
-                TensorShape biasShape = TensorExtensions.MaxShape(new [] { bias0, bias1 });
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = new Layer.DataSet[1];
-                lmerged.datasets[0].name = l0.datasets[0].name;
-                lmerged.datasets[0].shape = biasShape;
-                lmerged.datasets[0].itemSizeInBytes = 4;
-                lmerged.datasets[0].length = biasShape.length;
-                lmerged.datasets[0].offset = 0;
-                lmerged.weights = new BarracudaArray(biasShape.length);
-                lmerged.axis = rankO;
-
-                Tensor bias = m_Ops.Add(new [] { bias0, bias1 });
-
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
-
-                bias.Dispose();
-                bias0.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Mul, Layer.Type.Mul), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-                Tensor scale1 = l1.DataSetToTensor(0);
-
-                int rankO = Math.Max(scale0.dimensions, scale1.dimensions);
-                if (l0.axis >= 0 && l1.axis >= 0) // legacy tests don't store constant rank in axis
-                {
-                    // broadcast rule
-                    int rank0 = l0.axis;
-                    List<int> shape0 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale0.shape, rank0);
-                    rank0 = Math.Max(rank0, 1);
-                    int rank1 = l1.axis;
-                    List<int> shape1 = Compiler.IRShapeInferenceHelper.ShapeInference.ShapeToOnnxLayout(scale1.shape, rank1);
-                    rank1 = Math.Max(rank1, 1);
-
-                    rankO = Math.Max(rank0, rank1);
-                    for (int k = 0; k < rankO - rank0; k++)
-                        shape0.Insert(0, 1);
-                    for (int k = 0; k < rankO - rank1; k++)
-                        shape1.Insert(0, 1);
-
-                    scale0 = scale0.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape0.ToArray()));
-                    scale1 = scale1.Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.OnnxLayoutToTensorShape(shape1.ToArray()));
-                }
-
-                TensorShape biasShape = TensorExtensions.MaxShape(new[] { scale0, scale1 });
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = new Layer.DataSet[1];
-                lmerged.datasets[0].name = l0.datasets[0].name;
-                lmerged.datasets[0].shape = biasShape;
-                lmerged.datasets[0].itemSizeInBytes = 4;
-                lmerged.datasets[0].length = biasShape.length;
-                lmerged.datasets[0].offset = 0;
-                lmerged.weights = new BarracudaArray(biasShape.length);
-                lmerged.axis = rankO;
-
-                Tensor bias = m_Ops.Mul(new[] { scale0, scale1 });
-
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, 0, bias.length);
-
-                bias.Dispose();
-                scale0.Dispose();
-                scale1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.ScaleBias, Layer.Type.ScaleBias), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor scale1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // s1*(s0*x + b0)+b1 = s1*s0*x + s1*b0+b1
-                Tensor scale = m_Ops.Mul(new [] { scale1, scale0});
-                Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
-
-                BarracudaArray.Copy(scale.ToReadOnlyArray(), 0, lmerged.weights, 0, scale.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, scale.length, bias.length);
-
-                scale.Dispose();
-                bias.Dispose();
-                scale0.Dispose();
-                bias0.Dispose();
-                scale1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.ScaleBias, Layer.Type.Dense), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor weights1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l1.datasets;
-                lmerged.weights = new BarracudaArray(l1.weights.Length);
-
-                // b = W1 x b0 + b1
-                Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
-
-                // W = W1 x s
-                Tensor weights = new Tensor(weights1.shape);
-                for (int x = 0; x < weights1.flatWidth; ++x)
-                    for (int i = 0; i < weights1.flatHeight; ++i)
-                    {
-                        int c = i % bias0.length;
-                        float gamma = scale0[c];
-
-                        float w = weights1[i, x];
-                        weights[i, x] = w * gamma;
-                    }
-
-                BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
-
-                bias.Dispose();
-                weights.Dispose();
-                scale0.Dispose();
-                bias0.Dispose();
-                weights1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Dense, Layer.Type.ScaleBias), (l0, l1) =>
-            {
-                Tensor weights0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor scale1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // w = s1*w0
-                Tensor weights = m_Ops.Mul(new [] { scale1, weights0 });
-                // b = s1*b0+b1
-                Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
-
-                BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
-
-                weights.Dispose();
-                bias.Dispose();
-                weights0.Dispose();
-                bias0.Dispose();
-                scale1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Mul, Layer.Type.Conv2D), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-
-                Tensor kernel1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.pad = l1.pad;
-                lmerged.stride = l1.stride;
-                lmerged.pool = l1.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l1.datasets;
-                lmerged.weights = new BarracudaArray(l1.weights.Length);
-
-                // k = k * s
-                Tensor kernel = new Tensor(kernel1.shape);
-
-                for (int y = 0; y < kernel1.kernelHeight; ++y)
-                    for (int x = 0; x < kernel1.kernelWidth; ++x)
-                        for (int c = 0; c < kernel1.kernelDepth; ++c)
-                        {
-                            float gamma = scale0[scale0.IndexWithBroadcast(0, 0, 0, c)];
-                            for (int k = 0; k < kernel1.kernelCount; ++k)
-                            {
-                                float w = kernel1[y, x, c, k];
-                                kernel[y, x, c, k] = gamma * w;
-                            }
-                        }
-
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias1.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias1.length);
-
-                kernel.Dispose();
-                scale0.Dispose();
-                kernel1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Conv2D, Layer.Type.Mul), (l0, l1) =>
-            {
-                Tensor kernel0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor scale1 = l1.DataSetToTensor(0);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.pad = l0.pad;
-                lmerged.stride = l0.stride;
-                lmerged.pool = l0.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // k = s1*k0
-                Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
-                // b = s1*b0
-                Tensor bias = m_Ops.Mul(new[] { scale1, bias0 });
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel.Dispose();
-                bias.Dispose();
-                kernel0.Dispose();
-                bias0.Dispose();
-                scale1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Add, Layer.Type.Conv2D), (l0, l1) =>
-            {
-                Tensor bias0 = l0.DataSetToTensor(0);
-
-                Tensor kernel1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.pad = l1.pad;
-                lmerged.stride = l1.stride;
-                lmerged.pool = l1.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l1.datasets;
-                lmerged.weights = new BarracudaArray(l1.weights.Length);
-
-                // k = k
-                // b = Sum_k[wk * beta] + b
-                Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
-                for (int y = 0; y < kernel1.kernelHeight; ++y)
-                    for (int x = 0; x < kernel1.kernelWidth; ++x)
-                        for (int c = 0; c < kernel1.kernelDepth; ++c)
-                        {
-                            float beta = bias0[bias0.IndexWithBroadcast(0, 0, 0, c)];
-                            for (int k = 0; k < kernel1.kernelCount; ++k)
-                            {
-                                float w = kernel1[y, x, c, k];
-                                bias[k] += w * beta;
-                            }
-                        }
-
-
-                BarracudaArray.Copy(kernel1.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel1.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel1.length, bias.length);
-
-                bias.Dispose();
-                bias0.Dispose();
-                kernel1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Conv2D, Layer.Type.Add), (l0, l1) =>
-            {
-                Tensor kernel0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor bias1 = l1.DataSetToTensor(0);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.pad = l0.pad;
-                lmerged.stride = l0.stride;
-                lmerged.pool = l0.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // b = b0+b1
-                Tensor bias = m_Ops.Add( new [] { bias0, bias1 });
-
-                BarracudaArray.Copy(kernel0.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel0.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel0.length, bias.length);
-
-                bias.Dispose();
-                kernel0.Dispose();
-                bias0.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Conv2D, Layer.Type.ScaleBias), (l0, l1) =>
-            {
-                Tensor kernel0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor scale1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.pad = l0.pad;
-                lmerged.stride = l0.stride;
-                lmerged.pool = l0.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // k = s1*k0
-                Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
-                // b = s1*b0+b1
-                Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel.Dispose();
-                bias.Dispose();
-                kernel0.Dispose();
-                bias0.Dispose();
-                scale1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.ScaleBias, Layer.Type.Conv2D), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor kernel1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.pad = l1.pad;
-                lmerged.stride = l1.stride;
-                lmerged.pool = l1.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l1.datasets;
-                lmerged.weights = new BarracudaArray(l1.weights.Length);
-
-                // k = k * s
-                Tensor kernel = new Tensor(kernel1.shape);
-                // b = Sum_k[wk * beta] + b
-                Tensor bias = new Tensor(bias1.shape, bias1.ToReadOnlyArray());
-                for (int y = 0; y < kernel1.kernelHeight; ++y)
-                    for (int x = 0; x < kernel1.kernelWidth; ++x)
-                        for (int c = 0; c < kernel1.kernelDepth; ++c)
-                        {
-                            float beta = bias0[0, 0, 0, c];
-                            float gamma = scale0[0, 0, 0, c];
-                            for (int k = 0; k < kernel1.kernelCount; ++k)
-                            {
-                                float w = kernel1[y, x, c, k];
-                                kernel[y, x, c, k] = gamma * w;
-                                bias[k] += w * beta;
-                            }
-                        }
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel.Dispose();
-                bias.Dispose();
-                scale0.Dispose();
-                bias0.Dispose();
-                kernel1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.DepthwiseConv2D, Layer.Type.ScaleBias), (l0, l1) =>
-            {
-                Tensor kernel0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor scale1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l0.type);
-                lmerged.pad = l0.pad;
-                lmerged.stride = l0.stride;
-                lmerged.pool = l0.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l0.datasets;
-                lmerged.weights = new BarracudaArray(l0.weights.Length);
-
-                // k = s1*k0
-                Tensor kernel = m_Ops.Mul(new[] { scale1, kernel0 });
-                // b = s1*b0+b1
-                Tensor bias = m_Ops.ScaleBias(bias0, scale1, bias1);
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel.Dispose();
-                bias.Dispose();
-                kernel0.Dispose();
-                bias0.Dispose();
-                scale1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.ScaleBias, Layer.Type.DepthwiseConv2D), (l0, l1) =>
-            {
-                Tensor scale0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-
-                Tensor kernel1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.pad = l1.pad;
-                lmerged.stride = l1.stride;
-                lmerged.pool = l1.pool;
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = l1.datasets;
-                lmerged.weights = new BarracudaArray(l1.weights.Length);
-
-                // k = k * s
-                Tensor kernel = new Tensor(kernel1.shape);
-                // b = Sum_k[wk * beta] + b
-                Tensor bias = new Tensor(bias1.shape);
-                for (int k = 0; k < kernel1.kernelCount; ++k)
-                {
-                    float b = bias1[k];
-
-                    float beta = bias0[0, 0, 0, k];
-                    float gamma = scale0[0, 0, 0, k];
-                    for (int y = 0; y < kernel1.kernelHeight; ++y)
-                        for (int x = 0; x < kernel1.kernelWidth; ++x)
-                        {
-                            float w = kernel1[y, x, 0, k];
-                            kernel[y, x, 0, k] = gamma * w;
-                            b += w * beta;
-                        }
-
-                    bias[k] = b;
-                }
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel.Dispose();
-                bias.Dispose();
-                scale0.Dispose();
-                bias0.Dispose();
-                kernel1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Dense, Layer.Type.Dense), (l0, l1) =>
-            {
-                var weights0 = l0.DataSetToTensor(0);
-                var bias0 = l0.DataSetToTensor(1);
-
-                var weights1 = l1.DataSetToTensor(0);
-                var bias1 = l1.DataSetToTensor(1);
-
-                TensorShape weightsShape = new TensorShape(weights0.shape.flatHeight, weights1.shape.flatWidth);
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.datasets = new Layer.DataSet[2];
-                lmerged.datasets[0].name = weights0.name;
-                lmerged.datasets[0].shape = weightsShape;
-                lmerged.datasets[0].itemSizeInBytes = 4;
-                lmerged.datasets[0].length = weightsShape.length;
-                lmerged.datasets[0].offset = 0;
-
-                lmerged.datasets[1].name = bias0.name;
-                lmerged.datasets[1].shape = bias1.shape;
-                lmerged.datasets[1].itemSizeInBytes = 4;
-                lmerged.datasets[1].length = bias1.length;
-                lmerged.datasets[1].offset = weightsShape.length;
-                lmerged.weights = new BarracudaArray(weightsShape.length + bias1.shape.length);
-
-                // W = W1 x W0
-                Tensor weights = m_Ops.MatMul(weights0, false, weights1, false);
-                // b = W1 x b0 + b1
-                Tensor bias = m_Ops.Dense(bias0, weights1, bias1, Layer.FusedActivation.None);
-
-                BarracudaArray.Copy(weights.ToReadOnlyArray(), 0, lmerged.weights, 0, weights.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, weights.length, bias.length);
-
-                weights.Dispose();
-                bias.Dispose();
-                weights0.Dispose();
-                bias0.Dispose();
-                weights1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-            Add((Layer.Type.Conv2D, Layer.Type.Conv2D), (l0, l1) =>
-            {
-                Tensor kernel0 = l0.DataSetToTensor(0);
-                Tensor bias0 = l0.DataSetToTensor(1);
-                var strides0 = l0.stride;
-                var pad0 = l0.pad;
-
-                Tensor kernel1 = l1.DataSetToTensor(0);
-                Tensor bias1 = l1.DataSetToTensor(1);
-                var strides1 = l1.stride;
-                var pad1 = l1.pad;
-
-
-                // Y = (X * K0 + b0) * K1 +  b1
-                //   = (X * K0) * K1 + (b0 * K1 + b1)
-                //   = X * (K0 * k1) + (b0 * K1 + b1)
-                //   = X * K2 + b2
-                // K2 dimensions:
-                // kernelDepth and kernelCount:
-                // X = [n, . , . , c0], K0 = [ . , . , c0, d0] , K1 = [ . , . , c1, d1]
-                //                   => Km = [ x , x , c0, d1]
-                // kernelHeight and kernelHeight:
-                // Y = (((X + 2*p0 - k0)/s0 + 1) + 2*p1 - k1)/s1 + 1
-                //   = ((X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0)/s0)/s1 + 1
-                //   = (X + 2*p0 - k0 + s0 + 2*p1*s0 - k1*s0) / (s0*s1) + 1
-                //   = (X + 2*(p0+p1*s0) - (k0 + k1*s0 - s0)) / (s0*s1) + 1
-                // => pad = p0 + p1*s0
-                //    kernel = k0 + s0*(k1 - 1)
-                //    stride = s0*s1
-                TensorShape kernelShape = new TensorShape(kernel0.kernelHeight + (kernel1.kernelHeight - 1) * strides0[0],
-                                                          kernel0.kernelWidth  + (kernel1.kernelWidth  - 1) * strides0[1],
-                                                          kernel0.kernelDepth, kernel1.kernelCount);
-
-                var pad = new int[4] { pad0[0] + pad1[0] * strides0[0], pad0[1] + pad1[1] * strides0[1],
-                                       pad0[2] + pad1[2] * strides0[0], pad0[3] + pad1[3] * strides0[1] };
-                var strides = new int[2] { strides0[0] * strides1[0], strides0[1] * strides1[1] };
-
-                TensorShape biasShape = bias1.shape;
-
-
-                Layer lmerged = new Layer(l0.name, l1.type);
-                lmerged.inputs = l0.inputs;
-                lmerged.stride = strides;
-                lmerged.pad = pad;
-                lmerged.datasets = new Layer.DataSet[2];
-                lmerged.datasets[0].name = kernel0.name;
-                lmerged.datasets[0].shape = kernelShape;
-                lmerged.datasets[0].itemSizeInBytes = 4;
-                lmerged.datasets[0].length = kernelShape.length;
-                lmerged.datasets[0].offset = 0;
-
-                lmerged.datasets[1].name = bias0.name;
-                lmerged.datasets[1].shape = biasShape;
-                lmerged.datasets[1].itemSizeInBytes = 4;
-                lmerged.datasets[1].length = biasShape.length;
-                lmerged.datasets[1].offset = kernelShape.length;
-                lmerged.weights = new BarracudaArray(kernelShape.length + biasShape.length);
-
-
-                Tensor kernel = new Tensor(kernelShape); // 0-filled by default
-                // |x0  x1  x3 | x4                |y0 y1| y2              |z0| z1
-                // |x5  x6  x7 | x8   * k0 k1  =>  |y3 y4| y5 * l0 l1 =>    z2  z3
-                // |x9  x10 x11| x12    k2 k3       y6 y7  y8   l2 l3
-                //  x13 x14 x15  x13
-                //
-                // in order to compute z0, we need to do 2 convolutions
-                //
-                //    |y0        y1/
-                //  | |x0  /x1|  x3/  |
-                //  | |x5  /x6|  x7/  |
-                //  |  x9   x10   x11 |
-                //
-                //  |x0  x1| is convolved with K and then * l0
-                //  |x5  x6|
-                //  /x1  x3/ is convolved with K and then * l1
-                //  /x6  x7/
-                //
-                // by unwrapping the whole process
-                // z0 = [x0 * k0 * l0 + x1 * k1 * l0 + ....] + [x1 * k1 * l1 + ....]
-                //        l0 * y0-block                           l1 * y1-block
-                // resulting conv kernel is the following
-                //
-                // z0 = | x0 x1  x3  | * | [k0*l0]         [k1*l0 + k1*l1]                  [l2*l1] |
-                //      | x5 x6  x7  |   | [k2*l0 + k2*l2] [k3*l0 + k2*l1 + k1*l2 + k0*l3]  [k3*l1 + k3*l3] |
-                //      | x9 x10 x11 |   | [k2*l2]         [k2*l0 + k2*l3                   [k3*l3] |
-                Tensor kernel0T = m_Ops.Transpose(kernel0, new[] { 2, 0, 1, 3 });
-                Tensor emptyB = new Tensor(new TensorShape(1, 1, 1, kernel.kernelCount));
-                for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
-                    for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
-                    {
-                        Tensor kernel1XY = m_Ops.StridedSlice(kernel1, new[] { y1, x1, 0, 0 }, new[] { y1 + 1, x1 + 1, kernel1.kernelDepth, kernel.kernelCount }, new[] { 1, 1, 1, 1 });
-                        Tensor kernelk = m_Ops.Conv2D(kernel0T, kernel1XY, emptyB, new[] { 1, 1 }, new[] { 0, 0, 0, 0 }, Layer.FusedActivation.None);
-
-                        for (int y0 = 0; y0 < kernel0.kernelHeight; ++y0)
-                            for (int x0 = 0; x0 < kernel0.kernelWidth; ++x0)
-                            {
-                                int ox = x0 + strides0[0] * x1;
-                                int oy = y0 + strides0[1] * y1;
-                                for (int c = 0; c < kernel.kernelDepth; ++c)
-                                    for (int k = 0; k < kernel.kernelCount; ++k)
-                                    {
-                                        kernel[oy, ox, c, k] += kernelk[c,y0,x0,k];
-                                    }
-                            }
-                        kernel1XY.Dispose();
-                        kernelk.Dispose();
-                    }
-
-                // |y0 y1| * l0 l1  + bl = z0
-                // |y3 y4|   l2 l3
-                // y0 = Sum_k() + bk, y1 = Sum_k() + bk
-                // y2 = Sum_k() + bk, y2 = Sum_k() + bk
-                //
-                // moving b from the convolution process leads
-                // z0 = | x0 x1  x3  | * M + bl + l0*bk + l1*bk + l2*bk + l3*bk
-                //      | x5 x6  x7  |
-                //      | x9 x10 x11 |
-                // N.B: as you can see this breaks if there is some amount of zero-padding to the second conv layer
-                // because some weights of L will be * 0, essentialy masking out bk
-                Tensor bias = new Tensor(biasShape, bias1.ToReadOnlyArray());
-                for (int x1 = 0; x1 < kernel1.kernelWidth; ++x1)
-                    for (int y1 = 0; y1 < kernel1.kernelHeight; ++y1)
-                        for (int c = 0; c < kernel1.kernelDepth; ++c)
-                        {
-                            float bias0c = bias0[c];
-                            for (var k = 0; k < kernel.kernelCount; ++k)
-                            {
-                                bias[k] += kernel1[y1, x1, c, k] * bias0c;
-                            }
-                        }
-
-                BarracudaArray.Copy(kernel.ToReadOnlyArray(), 0, lmerged.weights, 0, kernel.length);
-                BarracudaArray.Copy(bias.ToReadOnlyArray(), 0, lmerged.weights, kernel.length, bias.length);
-
-                kernel0T.Dispose();
-                emptyB.Dispose();
-                kernel.Dispose();
-                bias.Dispose();
-                kernel0.Dispose();
-                bias0.Dispose();
-                kernel1.Dispose();
-                bias1.Dispose();
-
-                return lmerged;
-            });
-        }
-
-        public Layer FuseLayers(Layer l0, Layer l1)
-        {
-            var fnFuse = m_LayerFusers[(l0.type, l1.type)];
-            return fnFuse(l0, l1);
-        }
-    }
-
-} // namespace Unity.Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/LayerFusingHelper.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/LayerFusingHelper.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: b940ee731fee3c3478e90a161a7a7288
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MatrixUtils.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MatrixUtils.cs
@@ -1,259 +0,0 @@
-using System;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Threading.Tasks;
-using UnityEngine.Assertions;
-using UnityEngine.Scripting;
-
-using Unity.Collections;
-using Unity.Collections.LowLevel.Unsafe;
-using Unity.Jobs;
-
-[assembly: InternalsVisibleTo("Unity.Barracuda.BurstBLAS")]
-
-namespace Unity.Barracuda
-{
-    [Preserve]
-    internal class CSharpBLAS : BLASPlugin
-    {
-        public bool IsNative()
-        {
-            return false; // reference implementation
-        }
-
-        public bool IsCurrentPlatformSupported()
-        {
-            return true;
-        }
-
-        public unsafe void SGEMM(float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN, int bs,
-            bool transposeA = false, bool transposeB = false)
-        {
-            MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(Ap, AM, AN, Bp, BM, BN, Cp, CM, CN, bs,
-                transposeA, transposeB);
-        }
-
-        public unsafe JobHandle ScheduleSGEMM(JobHandle dependsOn,
-            float* Ap, int AM, int AN, float* Bp, int BM, int BN, float* Cp, int CM, int CN,
-            int bs,
-            bool transposeA = false, bool transposeB = false)
-        {
-            var job = new SGEMMJob();
-            job.Ap = Ap; job.AM = AM; job.AN = AN;
-            job.Bp = Bp; job.BM = BM; job.BN = BN;
-            job.Cp = Cp; job.CM = CM; job.CN = CN;
-            job.transposeA = transposeA;
-            job.transposeB = transposeB;
-            job.bs = bs;
-            return job.Schedule(dependsOn);
-        }
-
-        unsafe struct SGEMMJob : IJob
-        {
-            [NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Ap;
-            public int AM, AN;
-            [NativeDisableUnsafePtrRestriction][ReadOnly] public unsafe float* Bp;
-            public int BM, BN;
-            [NativeDisableUnsafePtrRestriction]           public unsafe float* Cp;
-            public int CM, CN;
-            public int bs;
-            public bool transposeA;
-            public bool transposeB;
-
-            public void Execute()
-            {
-                MatrixUtils.MultiplyBlockUnrollHx8ParallelWithPadding(
-                    Ap, AM, AN,
-                    Bp, BM, BN,
-                    Cp, CM, CN, bs,
-                    transposeA, transposeB);
-            }
-        }
-    }
-
-    internal class MatrixUtils
-    {
-        public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float[] blockOut, int bs, bool transpose = false)
-        {
-            Array.Clear(blockOut, 0, bs * bs);
-
-            var rowFinal = Math.Min(row + bs, M);
-            var count = Math.Min(col + bs, N) - col;
-
-            // @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
-            if (transpose)
-            {
-                // sequential access over blockOut, strided over matrixIn
-                //for (var i = row; i < rowFinal; i++)
-                //    for (var j = 0; j < count; ++j)
-                //        blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
-
-                // sequential access over matrixIn, strided over blockOut
-                for (var j = 0; j < count; ++j)
-                for (var i = row; i < rowFinal; i++)
-                    blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
-            }
-            else
-                for (var i = row; i < rowFinal; i++)
-                {
-                    //D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
-                    Marshal.Copy((IntPtr)(matrixIn + i * N + col), blockOut, (i - row) * bs, count);
-                }
-
-        }
-
-        public static unsafe void ClearFloatArray(float* arr, float val, int count)
-        {
-            for (int i = 0; i < count; i++)
-            {
-                arr[i] = val;
-            }
-        }
-
-        public static unsafe void CopyFloatArray(float* from, float* to, int count)
-        {
-            for (int i = 0; i < count; i++)
-            {
-                to[i] = from[i];
-            }
-        }
-
-        public static unsafe void CopyBlockWithPadding(float* matrixIn, int row, int M, int col, int N, float* blockOut, int bs, bool transpose = false)
-        {
-            ClearFloatArray(blockOut, 0, bs * bs);
-
-            var rowFinal = Math.Min(row + bs, M);
-            var count = Math.Min(col + bs, N) - col;
-
-            // @TODO: measure which one is better - sequential access over matrix memory or blockOut cache
-            if (transpose)
-            {
-                // sequential access over blockOut, strided over matrixIn
-                //for (var i = row; i < rowFinal; i++)
-                //    for (var j = 0; j < count; ++j)
-                //        blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * N];
-
-                // sequential access over matrixIn, strided over blockOut
-                for (var j = 0; j < count; ++j)
-                for (var i = row; i < rowFinal; i++)
-                    blockOut[(i - row) * bs + j] = matrixIn[i + (col + j) * M];
-            }
-            else
-                for (var i = row; i < rowFinal; i++)
-                {
-                    //D.Log(string.Format("Copy[{3}] {0} -> {1} {2}", i * M + col, (i - row) * bs, count, i));
-                    CopyFloatArray(matrixIn + i * N + col, blockOut + (i - row) * bs, count);
-                }
-
-        }
-
-        public static unsafe void CopyBlockWithPadding(float[] blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
-        {
-            var rowFinal = Math.Min(row + bs, M);
-            var count = Math.Min(col + bs, N) - col;
-
-            for (var i = row; i < rowFinal; i++)
-                Marshal.Copy(blockOut, (i - row) * bs, (IntPtr)(matrixIn + i * N + col), count);
-        }
-
-        public static unsafe void CopyBlockWithPadding(float* blockOut, float* matrixIn, int row, int M, int col, int N, int bs)
-        {
-            var rowFinal = Math.Min(row + bs, M);
-            var count = Math.Min(col + bs, N) - col;
-
-            for (var i = row; i < rowFinal; i++)
-                CopyFloatArray(blockOut + (i - row) * bs, matrixIn + i * N + col, count);
-        }
-
-        public static unsafe void MultiplyBlockUnrollHx8Padded(float* Ap,
-            float* Bp,
-            float* Cp, int bs)
-        {
-            for (int i = 0; i < bs; i++)
-            {
-                for (int j = 0; j < bs; j += 8)
-                {
-                    int baseC = i * bs + j;
-                    float sum0 = *(Cp + baseC);
-                    float sum1 = *(Cp + baseC + 1);
-                    float sum2 = *(Cp + baseC + 2);
-                    float sum3 = *(Cp + baseC + 3);
-                    float sum4 = *(Cp + baseC + 4);
-                    float sum5 = *(Cp + baseC + 5);
-                    float sum6 = *(Cp + baseC + 6);
-                    float sum7 = *(Cp + baseC + 7);
-
-                    for (int l = 0; l < bs; l++)
-                    {
-                        float A = Ap[i * bs + l];
-                        int baseB = l * bs + j;
-
-                        sum0 += A * *(Bp + baseB);
-                        sum1 += A * *(Bp + baseB + 1);
-                        sum2 += A * *(Bp + baseB + 2);
-                        sum3 += A * *(Bp + baseB + 3);
-                        sum4 += A * *(Bp + baseB + 4);
-                        sum5 += A * *(Bp + baseB + 5);
-                        sum6 += A * *(Bp + baseB + 6);
-                        sum7 += A * *(Bp + baseB + 7);
-                    }
-
-                    *(Cp + baseC) = sum0;
-                    *(Cp + baseC + 1) = sum1;
-                    *(Cp + baseC + 2) = sum2;
-                    *(Cp + baseC + 3) = sum3;
-                    *(Cp + baseC + 4) = sum4;
-                    *(Cp + baseC + 5) = sum5;
-                    *(Cp + baseC + 6) = sum6;
-                    *(Cp + baseC + 7) = sum7;
-                }
-            }
-        }
-
-        public static unsafe void MultiplyBlockUnrollHx8ParallelWithPadding(float* Ap, int AM, int AN,
-            float* Bp, int BM, int BN,
-            float* Cp, int CM, int CN, int bs,
-            bool transposeA = false, bool transposeB = false)
-        {
-            if (transposeA)
-            {
-                var tmp = AM; AM = AN; AN = tmp;
-            }
-            if (transposeB)
-            {
-                var tmp = BM; BM = BN; BN = tmp;
-            }
-
-            int N = AM;
-            {
-                Assert.IsTrue(bs >= 8, "Matrix Mul block size should be >= 8");
-
-                Parallel.For(0, (BN / bs) + (BN % bs > 0 ? 1 : 0), colB =>
-                {
-                    float[] blockA = new float[bs * bs];
-                    float[] blockB = new float[bs * bs];
-                    float[] blockC = new float[bs * bs];
-
-                    for (int rowA = 0; rowA < N; rowA += bs)
-                    {
-                        for (int l = 0; l < AN; l += bs)
-                        {
-
-                            CopyBlockWithPadding(Ap, rowA, AM, l, AN, blockA, bs, transposeA);
-                            CopyBlockWithPadding(Bp, l, BM, colB * bs, BN, blockB, bs, transposeB);
-                            CopyBlockWithPadding(Cp, rowA, CM, colB * bs, CN, blockC, bs);
-
-                            fixed (float* blockAp = blockA, blockBp = blockB, blockCp = blockC)
-                            {
-                                MultiplyBlockUnrollHx8Padded(blockAp, blockBp, blockCp, bs);
-                            }
-
-                            CopyBlockWithPadding(blockC, Cp, rowA, CM, colB * bs, CN, bs);
-                        }
-                    }
-                });
-            }
-        }
-    }
-}
-
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MatrixUtils.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MatrixUtils.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: bf04fe6d135714369af8cab2915b2735
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MemoryAndExecutionReportHelper.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MemoryAndExecutionReportHelper.cs
@@ -1,985 +0,0 @@
-#if ENABLE_BARRACUDA_STATS
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using UnityEngine.Assertions;
-
-namespace Unity.Barracuda {
-
-internal static class MemoryAndExecutionReportHelper
-{
-    public static void GenerateStringReport(StringBuilder stringBuilder, ModelExecutionReport modelExecutionReport,
-        bool spreadSheetFormat)
-    {
-        stringBuilder.Append($"Number of completed layers : {modelExecutionReport.CompletedLayerExecutionReports.Count}\n");
-        if (modelExecutionReport.CurrentLayerExecutionReport != null)
-            stringBuilder.Append("Warning: last layer was not completed. It will be logged, but it's information might be incomplete or erroneous.\n");
-        stringBuilder.Append("\n");
-
-        List<LayerExecutionReport> allLayerReports = new List<LayerExecutionReport>();
-        allLayerReports.AddRange(modelExecutionReport.CompletedLayerExecutionReports);
-        if (modelExecutionReport.CurrentLayerExecutionReport != null)
-            allLayerReports.Add(modelExecutionReport.CurrentLayerExecutionReport);
-
-        var layerExecutionViews = GenerateExecutionViews(allLayerReports, modelExecutionReport.CompletedLayerExecutionReports.Count);
-        GenerateReportForViews(stringBuilder, layerExecutionViews, spreadSheetFormat, "", false);
-    }
-
-    public static MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, List<MemorySnapshotReport> memorySnapshots,
-        bool spreadSheetFormat)
-    {
-        CollectAllAsFirstSeen(in memorySnapshots,
-            out var allTensorAsFirstSeen,
-            out var allAllocatorAsFirstSeen,
-            out var allTensorDataAsFirstSeen,
-            out var allTempMemoriesAsFirstSeen);
-
-        var summaryViews = GenerateSummaryViews(memorySnapshots, allTensorAsFirstSeen, allTensorDataAsFirstSeen, allTempMemoriesAsFirstSeen, out var memoryPeakSummary);
-        GenerateHeaderForSummaryViews(stringBuilder, summaryViews, spreadSheetFormat);
-        GenerateReportForViews(stringBuilder, summaryViews, spreadSheetFormat, "Tensors allocation and deallocation (diff from previous snapshot):", isSummaryView:true);
-        stringBuilder.Append("\n");
-        stringBuilder.Append("\n");
-
-        var tensorViews = GenerateTensorsViews(memorySnapshots, allTensorAsFirstSeen);
-        GenerateHeaderForTensorViews(stringBuilder, tensorViews, spreadSheetFormat);
-        GenerateReportForViews(stringBuilder, tensorViews, spreadSheetFormat, "All Tensors:", isSummaryView:false);
-        stringBuilder.Append("\n");
-        stringBuilder.Append("\n");
-
-        var allocatorViews = GenerateAllocatorViews(memorySnapshots, allAllocatorAsFirstSeen);
-        GenerateHeaderForAllocatorsViews(stringBuilder, allocatorViews, spreadSheetFormat);
-        GenerateReportForViews(stringBuilder, allocatorViews, spreadSheetFormat, "All Allocators:", isSummaryView:false);
-        stringBuilder.Append("\n");
-        stringBuilder.Append("\n");
-
-        var tensorDatasViews = GenerateTensorDatasViews(memorySnapshots, allTensorDataAsFirstSeen);
-        GenerateHeaderForTensorDatasViews(stringBuilder, tensorDatasViews, spreadSheetFormat);
-        GenerateReportForViews(stringBuilder, tensorDatasViews, spreadSheetFormat, "All TensorDatas:", isSummaryView:false);
-        stringBuilder.Append("\n");
-        stringBuilder.Append("\n");
-
-        var tempMemoriesDatasViews = GenerateTempMemoriesDatasViews(memorySnapshots, allTempMemoriesAsFirstSeen);
-        GenerateHeaderForTempMemoriesViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat);
-        GenerateReportForViews(stringBuilder, tempMemoriesDatasViews, spreadSheetFormat, "All worker temporary memories:", isSummaryView:false);
-        stringBuilder.Append("\n");
-        stringBuilder.Append("\n");
-
-        return memoryPeakSummary;
-    }
-
-    #region `Internal data format` declaration
-    private class SnapshotFields
-    {
-        public readonly string[] Titles;
-        public readonly Dictionary<string, string> Items;
-
-        public SnapshotFields(string[] titles)
-        {
-            Titles = titles;
-            Items = new Dictionary<string, string>();
-            foreach (var title in titles)
-            {
-                Items[title] = "";
-            }
-        }
-
-        public string this[string title]
-        {
-            set {
-                Assert.IsTrue(Items.ContainsKey(title));
-                Assert.IsTrue(Items[title] == "");
-                Items[title] = value;
-            }
-            get => Items[title];
-        }
-
-        public void AddTitlesToReport(StringBuilder stringBuilder, string separator)
-        {
-            foreach (var title in Titles)
-            {
-                stringBuilder.Append(title);
-                stringBuilder.Append(separator);
-            }
-        }
-
-        public void AddValuesToReport(StringBuilder stringBuilder, string separator)
-        {
-            foreach (var title in Titles)
-            {
-                stringBuilder.Append(Items[title]);
-                stringBuilder.Append(separator);
-            }
-        }
-
-        public void AddAllToReport(StringBuilder stringBuilder, string suffix, string prefix="")
-        {
-            bool first = true;
-            foreach (var title in Titles)
-            {
-                if (!first)
-                    stringBuilder.Append(suffix);
-
-                stringBuilder.Append(prefix);
-                stringBuilder.Append(title);
-                stringBuilder.Append(": ");
-                stringBuilder.Append(Items[title]);
-                first = false;
-            }
-        }
-    }
-
-    private class SnapshotFieldsWithContexts
-    {
-        public readonly string[] FieldTitles;
-        public readonly string[] ContextTitles;
-        public SortedDictionary<int, SnapshotFields> Fields { get; }
-        public SortedDictionary<int, SnapshotFields> Contexts { get; }
-
-        public SnapshotFieldsWithContexts(string[] fieldsTitles, string[] contextTitles)
-        {
-            FieldTitles = fieldsTitles;
-            ContextTitles = contextTitles;
-            Contexts = new SortedDictionary<int, SnapshotFields>();
-            Fields = new SortedDictionary<int, SnapshotFields>();
-        }
-
-        public void AddContext(int uniqueId)
-        {
-            Assert.IsFalse(Contexts.ContainsKey(uniqueId));
-            Contexts[uniqueId] = new SnapshotFields(ContextTitles);
-            Fields[uniqueId] = new SnapshotFields(FieldTitles);
-        }
-
-        public void SetContext(int uniqueId, string title, string value)
-        {
-            Assert.IsTrue(Contexts.ContainsKey(uniqueId));
-            Contexts[uniqueId][title] = value;
-        }
-
-        public string this[int uniqueId, string title]
-        {
-            set
-            {
-                Assert.IsTrue(Fields.ContainsKey(uniqueId));
-                Fields[uniqueId][title] = value;
-            }
-        }
-    }
-
-    private class SnapshotView
-    {
-        public SnapshotFields context;
-        public SnapshotFields summary;
-        public SnapshotFieldsWithContexts sections;
-
-        public SnapshotView(int snapShotIndex, MemorySnapshotReport report)
-        {
-            context = new SnapshotFields( new [] {"Snapshot index", "Type", "Name"} );
-            context["Snapshot index"] = snapShotIndex.ToString();
-            context["Type"] = report.ContextType;
-            context["Name"] = report.ContextName;
-        }
-
-        public SnapshotView(int snapShotIndex, LayerExecutionReport report)
-        {
-            context = new SnapshotFields( new [] {"Layer index", "Type", "Name"} );
-            context["Layer index"] = snapShotIndex.ToString();
-            context["Type"] = report.LayerType;
-            context["Name"] = report.LayerName;
-        }
-    }
-    #endregion
-
-    #region Helpers to find information in Reports
-
-    private static TempMemoryInfo FindTempMemoryInSnapshot(MemorySnapshotReport memorySnapshot, int tempMemoryId)
-    {
-        return memorySnapshot.TempMemoriesInfo.Find(memoryInfo => memoryInfo.UniqueId == tempMemoryId);
-    }
-
-    private static AllocatorMemoryInfo FindAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int allocatorId)
-    {
-        return memorySnapshot.AllocatorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == allocatorId);
-    }
-
-
-    private static string FindTensorDataAllocatorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
-    {
-        foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
-        {
-            var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
-            if (foundTensorData != null)
-                return $"{allocatorMemoryInfo.Name} / Id: {allocatorMemoryInfo.UniqueId}";
-        }
-        return "";
-    }
-
-    private static TensorDataMemoryInfo FindTensorDataInSnapshot(MemorySnapshotReport memorySnapshot, int tensorDataId)
-    {
-        bool MatchTensorDataGuidForTensor(TensorMemoryInfo memoryInfo) =>
-            memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId;
-
-        var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(MatchTensorDataGuidForTensor);
-        if (foundTensor != null)
-            return foundTensor.tensorDataMemoryInfo;
-
-        foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
-        {
-            var foundTensorData = allocatorMemoryInfo.TensorDatasMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorDataId);
-            if (foundTensorData != null)
-                return foundTensorData;
-        }
-
-        return null;
-    }
-
-    private static IEnumerable<TensorMemoryInfo> FindAllTensorsInSnapshotUsingTensorDataId(MemorySnapshotReport memorySnapshot, int tensorDataId)
-    {
-        SortedSet<TensorMemoryInfo> tensors = new SortedSet<TensorMemoryInfo>( Comparer<TensorMemoryInfo>.Create((a, b) => a.UniqueId.CompareTo(b.UniqueId)));
-
-        var foundTensors = memorySnapshot.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
-        tensors.UnionWith(foundTensors);
-
-        foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
-        {
-            var allocatorFoundTensor = allocatorMemoryInfo.TensorsMemoryInfo.FindAll(memoryInfo => memoryInfo.tensorDataMemoryInfo != null && memoryInfo.tensorDataMemoryInfo.UniqueId == tensorDataId);
-            tensors.UnionWith(allocatorFoundTensor);
-        }
-
-        return tensors;
-    }
-
-    private static TensorMemoryInfo FindTensorInSnapshot(MemorySnapshotReport memorySnapshot, int tensorId)
-    {
-        var foundTensor = memorySnapshot.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
-        if (foundTensor != null)
-            return foundTensor;
-
-        foreach (var allocatorMemoryInfo in memorySnapshot.AllocatorsMemoryInfo)
-        {
-            foundTensor = allocatorMemoryInfo.TensorsMemoryInfo.Find(memoryInfo => memoryInfo.UniqueId == tensorId);
-            if (foundTensor != null)
-                return foundTensor;
-        }
-
-        return null;
-    }
-
-    private static void CollectAllAsFirstSeen(in List<MemorySnapshotReport> memorySnapshots,
-        out SortedDictionary<int,TensorMemoryInfo> tensors,
-        out SortedDictionary<int,AllocatorMemoryInfo> allocators,
-        out SortedDictionary<int,TensorDataMemoryInfo> tensorDatas,
-        out SortedDictionary<int,TempMemoryInfo> tempMemories)
-    {
-        tensors = new SortedDictionary<int, TensorMemoryInfo>();
-        allocators = new SortedDictionary<int, AllocatorMemoryInfo>();
-        tensorDatas = new SortedDictionary<int, TensorDataMemoryInfo>();
-        tempMemories = new SortedDictionary<int, TempMemoryInfo>();
-
-        //Collect all unique tensors, tensors and allocator
-        foreach (var snapshot in memorySnapshots)
-        {
-            //From Vars
-            foreach (var tensor in snapshot.TensorsMemoryInfo)
-            {
-                tensors[tensor.UniqueId] = tensor;
-                if (tensor.tensorDataMemoryInfo != null)
-                    tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
-            }
-
-            //From allocators
-            foreach (var allocator in snapshot.AllocatorsMemoryInfo)
-            {
-                allocators[allocator.UniqueId] = allocator;
-                foreach (var tensor in allocator.TensorsMemoryInfo)
-                {
-                    tensors[tensor.UniqueId] = tensor;
-                    if (tensor.tensorDataMemoryInfo != null)
-                        tensorDatas[tensor.tensorDataMemoryInfo.UniqueId] = tensor.tensorDataMemoryInfo;
-                }
-
-                foreach (var tensorData in allocator.TensorDatasMemoryInfo)
-                {
-                    tensorDatas[tensorData.UniqueId] = tensorData;
-                }
-            }
-
-            //From temp memories
-            foreach (var tempMemoryInfo in snapshot.TempMemoriesInfo)
-            {
-                tempMemories[tempMemoryInfo.UniqueId] = tempMemoryInfo;
-            }
-        }
-    }
-    #endregion
-
-    #region Reports -> internal data format
-
-    private static List<SnapshotView> GenerateTempMemoriesDatasViews(List<MemorySnapshotReport> memorySnapshots,
-        SortedDictionary<int, TempMemoryInfo> allTempMemoryInfosAsFirstSeen)
-    {
-        List<SnapshotView> views = new List<SnapshotView>();
-        for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
-        {
-            long allTotal = 0L;
-            var snapshot = memorySnapshots[memorySnapshotIndex];
-
-            //Titles and contexts
-            SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
-            view.sections = new SnapshotFieldsWithContexts(
-                fieldsTitles: new[]
-                {
-                    "Allocated (bytes)",
-                    "On GPU"
-                },
-                contextTitles: new[] {"Name", "Id"});
-            foreach (var tempMemoryInfo in allTempMemoryInfosAsFirstSeen)
-            {
-                var id = tempMemoryInfo.Key;
-                view.sections.AddContext(id);
-                view.sections.SetContext(id, "Name", tempMemoryInfo.Value.Name);
-                view.sections.SetContext(id, "Id", id.ToString());
-            }
-            view.summary = new SnapshotFields(new[]
-            {
-                "Memory pressure in bytes (sum of all temp memory capacities)"
-            });
-
-            //Details
-            foreach (var alloc in allTempMemoryInfosAsFirstSeen)
-            {
-                var tempMemory = FindTempMemoryInSnapshot(snapshot, alloc.Key);
-                if (tempMemory != null)
-                {
-                    allTotal += tempMemory.TotalBytes;
-                    view.sections[tempMemory.UniqueId, "Allocated (bytes)"] = tempMemory.TotalBytes.ToString();
-                    view.sections[tempMemory.UniqueId, "On GPU"] = tempMemory.IsGPUMem ? "GPU" : "CPU";
-                }
-            }
-
-            //Summary
-            view.summary["Memory pressure in bytes (sum of all temp memory capacities)"] = allTotal.ToString();
-            views.Add(view);
-        }
-
-        return views;
-    }
-
-    private static List<SnapshotView> GenerateAllocatorViews(List<MemorySnapshotReport> memorySnapshots,
-        SortedDictionary<int, AllocatorMemoryInfo> allAllocatorAsFirstSeen)
-    {
-        List<SnapshotView> views = new List<SnapshotView>();
-        for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
-        {
-            long allTotal = 0L;
-            long allBusy = 0L;
-            long allUsed = 0L;
-            long allFragmented = 0L;
-            long allFree = 0L;
-            var snapshot = memorySnapshots[memorySnapshotIndex];
-
-            //Titles and contexts
-            SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
-            view.sections = new SnapshotFieldsWithContexts(
-                fieldsTitles: new[]
-                {
-                    "Memory pressure in bytes (sum of allocated tensorDatas capacities)",
-                    "Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
-                    "Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
-                    "Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
-                    "Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
-                },
-                contextTitles: new[] {"Name", "Id"});
-            foreach (var allocatorMemoryInfo in allAllocatorAsFirstSeen)
-            {
-                var id = allocatorMemoryInfo.Key;
-                view.sections.AddContext(id);
-                view.sections.SetContext(id, "Name", allocatorMemoryInfo.Value.Name);
-                view.sections.SetContext(id, "Id", id.ToString());
-            }
-            view.summary = new SnapshotFields(new[]
-            {
-                "Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)",
-                "Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)",
-                "Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)",
-                "Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)",
-                "Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"
-            });
-
-            //Details
-            foreach (var alloc in allAllocatorAsFirstSeen)
-            {
-                var allocator = FindAllocatorInSnapshot(snapshot, alloc.Key);
-                if (allocator != null)
-                {
-                    allTotal += allocator.TotalBytes;
-                    allBusy += allocator.BusyBytes;
-                    allUsed += allocator.UsedBytes;
-                    allFragmented += allocator.BusyBytes-allocator.UsedBytes;
-                    allFree += allocator.FreeBytes;
-                    view.sections[allocator.UniqueId, "Memory pressure in bytes (sum of allocated tensorDatas capacities)"] = allocator.TotalBytes.ToString();
-                    view.sections[allocator.UniqueId, "Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allocator.BusyBytes.ToString();
-                    view.sections[allocator.UniqueId, "Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allocator.UsedBytes.ToString();
-                    view.sections[allocator.UniqueId, "Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allocator.BytesLostToFragmentation.ToString();
-                    view.sections[allocator.UniqueId, "Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allocator.FreeBytes.ToString();
-                }
-            }
-
-            //Summary
-            view.summary["Memory pressure in bytes, for all allocators (sum of allocated tensorDatas capacities)"] = allTotal.ToString();
-            view.summary["Busy bytes, for all allocators (sum of 'in use' tensorDatas capacities)"] = allBusy.ToString();
-            view.summary["Needed bytes, for all allocators (sum of sizes of the part of the tensorDatas used by Tensors)"] = allUsed.ToString();
-            view.summary["Unusable bytes, for all allocators (sum of the part of tensorData lost because of allocator fragmentation)"] = allFragmented.ToString();
-            view.summary["Ready bytes, for all allocators (sum of capacities of tensorData not used but allocated)"] = allFree.ToString();
-            views.Add(view);
-        }
-
-        return views;
-    }
-
-    private static List<SnapshotView> GenerateTensorDatasViews(List<MemorySnapshotReport> memorySnapshots,
-        SortedDictionary<int,TensorDataMemoryInfo> allTensorDataAsFirstSeen)
-    {
-        List<SnapshotView> views = new List<SnapshotView>();
-        for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
-        {
-            long allGPUInBytes = 0L;
-            long allCPUInBytes = 0L;
-            long allUsedGPUInBytes = 0L;
-            long allUsedCPUInBytes = 0L;
-            long allFragmentedMemGPUInBytes = 0L;
-            long allFragmentedMemCPUInBytes = 0L;
-
-            var snapshot = memorySnapshots[memorySnapshotIndex];
-
-            //Titles and contexts
-            SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
-            view.sections = new SnapshotFieldsWithContexts(
-                fieldsTitles: new[]
-                {
-                    "In use", "Capacity (bytes)", "On GPU", "Allocator",
-                    "Tensor(s) Id(s)", "Tensor(s) max bytes", "Fragmented bytes"
-                },
-                contextTitles: new[] {"Id"});
-            foreach (var tensorData in allTensorDataAsFirstSeen)
-            {
-                var id = tensorData.Key;
-                view.sections.AddContext(id);
-                view.sections.SetContext(id, "Id", id.ToString());
-            }
-            view.summary = new SnapshotFields(new[]
-            {
-                "GPU sum of all allocated tensorData capacities (bytes)",
-                "CPU sum of all allocated tensorData capacities (bytes)",
-                "GPU sum of all 'in use' tensorData (bytes)",
-                "CPU sum of all 'in use' tensorData (bytes)",
-                "GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
-                "CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)",
-            });
-
-            foreach (var tData in allTensorDataAsFirstSeen)
-            {
-                TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
-                if (tensorData != null)
-                {
-                    var associatedTensors = FindAllTensorsInSnapshotUsingTensorDataId(snapshot, tensorData.UniqueId);
-                    string tensorNamesandIds = "";
-                    int tensorBytes = 0;
-                    bool first = true;
-                    foreach (var tensor in associatedTensors)
-                    {
-                        if (!first)
-                            tensorNamesandIds += " / ";
-                        tensorNamesandIds += tensor.Name + " Id:" + tensor.UniqueId;
-                        first = false;
-                        tensorBytes = Math.Max(tensorBytes, tensor.Shape.length * sizeof(float));
-                    }
-                    int fragmentedTensorDataBytes = (tensorData.InUse) ? tensorData.MaxBytes - tensorBytes : 0;
-
-                    if (tensorData.IsGPUMem)
-                    {
-                        allGPUInBytes += tensorData.MaxBytes;
-                        if (tensorData.InUse)
-                        {
-                            allFragmentedMemGPUInBytes += fragmentedTensorDataBytes;
-                            allUsedGPUInBytes += tensorData.MaxBytes;
-                        }
-                    }
-                    else
-                    {
-                        allCPUInBytes += tensorData.MaxBytes;
-                        if (tensorData.InUse)
-                        {
-                            allFragmentedMemCPUInBytes += fragmentedTensorDataBytes;
-                            allUsedCPUInBytes += tensorData.MaxBytes;
-                        }
-                    }
-
-                    view.sections[tensorData.UniqueId, "In use"] = tensorData.InUse ? "Yes" : "";
-                    view.sections[tensorData.UniqueId, "Capacity (bytes)"] = tensorData.MaxBytes.ToString();
-                    view.sections[tensorData.UniqueId, "On GPU"] = tensorData.IsGPUMem ? "GPU" : "CPU";
-                    view.sections[tensorData.UniqueId, "Allocator"] = FindTensorDataAllocatorInSnapshot(snapshot, tensorData.UniqueId);
-                    view.sections[tensorData.UniqueId, "Tensor(s) Id(s)"] = tensorNamesandIds;
-                    view.sections[tensorData.UniqueId, "Tensor(s) max bytes"] = tensorBytes.ToString();
-                    view.sections[tensorData.UniqueId, "Fragmented bytes"] = fragmentedTensorDataBytes.ToString();
-                }
-            }
-
-            //Summary
-            view.summary["GPU sum of all allocated tensorData capacities (bytes)"] = allGPUInBytes.ToString();
-            view.summary["CPU sum of all allocated tensorData capacities (bytes)"] = allCPUInBytes.ToString();
-            view.summary["GPU sum of all 'in use' tensorData (bytes)"] = allUsedGPUInBytes.ToString();
-            view.summary["CPU sum of all 'in use' tensorData (bytes)"] = allUsedCPUInBytes.ToString();
-            view.summary["GPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemGPUInBytes.ToString();
-            view.summary["CPU sum of all 'fragmented' tensorData mem ('in use' but not by large enough tensors) (bytes)"] = allFragmentedMemCPUInBytes.ToString();
-            views.Add(view);
-        }
-
-        return views;
-    }
-
-    private static List<SnapshotView> GenerateTensorsViews(List<MemorySnapshotReport> memorySnapshots,
-        SortedDictionary<int, TensorMemoryInfo> allTensorAsFirstSeen)
-    {
-        List<SnapshotView> views = new List<SnapshotView>();
-        for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
-        {
-            var snapshot = memorySnapshots[memorySnapshotIndex];
-
-            //Titles and contexts
-            SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
-            view.sections = new SnapshotFieldsWithContexts(
-                fieldsTitles: new[] {"Allocated (bytes)", "Name", "Shape", "Cache size (bytes)", "TensorData Id", "TensorData Capacity (bytes)"},
-                contextTitles: new[] {"Id"});
-            foreach (var tensorMemoryInfo in allTensorAsFirstSeen)
-            {
-                var id = tensorMemoryInfo.Key;
-                view.sections.AddContext(id);
-                view.sections.SetContext(id, "Id", id.ToString());
-            }
-            view.summary = new SnapshotFields(new[]
-            {
-                "Tensor memory on GPU (in bytes)",
-                "Tensor memory on CPU (in bytes)",
-                "On CPU tensor cache (in bytes)"
-            });
-
-            //Details
-            long cacheMemInBytes = 0L;
-            long gpuMem = 0L;
-            long cpuMem = 0L;
-            foreach (var tensorFromDict in allTensorAsFirstSeen)
-            {
-                var tensor = FindTensorInSnapshot(snapshot, tensorFromDict.Key);
-                if (tensor != null)
-                {
-                    cacheMemInBytes += tensor.CacheBytes;
-                    var dataBytes = tensor.Shape.length * sizeof(float);
-
-                    string allocatedStr = "Yes";
-                    if (tensor.tensorDataMemoryInfo != null)
-                    {
-                        allocatedStr += $" ({(tensor.Shape.length * sizeof(float)).ToString()})";
-                        view.sections[tensor.UniqueId, "TensorData Id"] = tensor.tensorDataMemoryInfo.UniqueId.ToString();
-                        view.sections[tensor.UniqueId, "TensorData Capacity (bytes)"] = tensor.tensorDataMemoryInfo.MaxBytes.ToString();
-                        if (tensor.tensorDataMemoryInfo.IsGPUMem)
-                            gpuMem += dataBytes;
-                        else
-                            cpuMem += dataBytes;
-                    }
-                    else
-                    {
-                        allocatedStr += " (0)";
-                    }
-                    view.sections[tensor.UniqueId, "Name"] = tensor.Name;
-                    view.sections[tensor.UniqueId, "Shape"] = tensor.Shape.ToString();
-                    view.sections[tensor.UniqueId, "Cache size (bytes)"] = tensor.CacheBytes.ToString();
-                    view.sections[tensor.UniqueId, "Allocated (bytes)"] = allocatedStr;
-                }
-            }
-
-            //Summary
-            view.summary["Tensor memory on GPU (in bytes)"] = gpuMem.ToString();
-            view.summary["Tensor memory on CPU (in bytes)"] = cpuMem.ToString();
-            view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
-            views.Add(view);
-        }
-
-        return views;
-    }
-
-    private static List<SnapshotView> GenerateExecutionViews(List<LayerExecutionReport> layerReports, int numCompletedLayer)
-    {
-        List<SnapshotView> views = new List<SnapshotView>();
-        for (var layerIndex = 0; layerIndex < layerReports.Count; layerIndex++)
-        {
-            var report = layerReports[layerIndex];
-
-            //Titles
-            SnapshotView view = new SnapshotView(layerIndex, report);
-            view.sections = new SnapshotFieldsWithContexts(null, null);
-            view.summary = new SnapshotFields(new[]
-            {
-                "Summary",
-                "Compute Kernels(workItems:X,Y,Z)",
-                "Theoretical ALU count",
-                "Theoretical Bandwidth (bytes)",
-                "Note"
-            });
-
-            //Summary
-            view.summary["Summary"] = report.Summary==""?"NA":report.Summary;
-            view.summary["Compute Kernels(workItems:X,Y,Z)"] = report.DispatchInfos;
-            view.summary["Theoretical ALU count"] = report.NumAlu.ToString();
-            view.summary["Theoretical Bandwidth (bytes)"] = report.NumBytes.ToString();
-            if (layerIndex >= numCompletedLayer)
-                view.summary["Note"] = "UNCOMPLETED LAYER";
-            views.Add(view);
-        }
-
-        return views;
-    }
-
-    private static List<SnapshotView> GenerateSummaryViews(List<MemorySnapshotReport> memorySnapshots,
-        SortedDictionary<int, TensorMemoryInfo> allTensorsAsFirstSeen,
-        SortedDictionary<int, TensorDataMemoryInfo> allTensorDatasAsFirstSeen,
-        SortedDictionary<int, TempMemoryInfo> allTempMemoriesAsFirstSeen,
-        out MemoryPeakSummary memoryPeakSummary)
-    {
-        HashSet<int> previousSnapshotTensorIds = new HashSet<int>();
-        List<SnapshotView> views = new List<SnapshotView>();
-
-        long peakMemoryUsageGPU = 0;
-        long peakMemoryUsageCPU = 0;
-        long peakMemoryUsageGPUAndCPU = 0;
-
-        for (var memorySnapshotIndex = 0; memorySnapshotIndex < memorySnapshots.Count; memorySnapshotIndex++)
-        {
-            var snapshot = memorySnapshots[memorySnapshotIndex];
-
-            //Titles and contexts
-            SnapshotView view = new SnapshotView(memorySnapshotIndex, snapshot);
-            view.sections = new SnapshotFieldsWithContexts(
-                fieldsTitles: new[] {"Allocated", "Released"},
-                contextTitles: new[] {"Type" });
-            view.sections.AddContext(0);
-            view.sections.SetContext(0, "Type", "Tensor");
-            view.summary = new SnapshotFields(new[]
-            {
-                "Total memory pressure on GPU (in bytes)",
-                "Total memory pressure on CPU (in bytes)",
-                "On CPU tensor cache (in bytes)"
-            });
-
-            //Summary
-            HashSet<int> currentSnapshotTensorIds = new HashSet<int>();
-            long cacheMemInBytes = 0L;
-            foreach (var tensor in snapshot.TensorsMemoryInfo)
-            {
-                cacheMemInBytes += tensor.CacheBytes;
-                currentSnapshotTensorIds.Add(tensor.UniqueId);
-            }
-            long gpuMem = 0L;
-            long cpuMem = 0L;
-            foreach (var tData in allTensorDatasAsFirstSeen)
-            {
-                TensorDataMemoryInfo tensorData = FindTensorDataInSnapshot(snapshot, tData.Key);
-                if (tensorData != null)
-                {
-                    if (tensorData.IsGPUMem)
-                        gpuMem += tensorData.MaxBytes;
-                    else
-                        cpuMem += tensorData.MaxBytes;
-                }
-            }
-            foreach (var mData in allTempMemoriesAsFirstSeen)
-            {
-                TempMemoryInfo tempMemoryInfo = FindTempMemoryInSnapshot(snapshot, mData.Key);
-                if (tempMemoryInfo != null)
-                {
-                    if (tempMemoryInfo.IsGPUMem)
-                        gpuMem += tempMemoryInfo.TotalBytes;
-                    else
-                        cpuMem += tempMemoryInfo.TotalBytes;
-                }
-            }
-            view.summary["Total memory pressure on GPU (in bytes)"] = gpuMem.ToString();
-            view.summary["Total memory pressure on CPU (in bytes)"] = cpuMem.ToString();
-            view.summary["On CPU tensor cache (in bytes)"] = cacheMemInBytes.ToString();
-
-            peakMemoryUsageGPU = Math.Max(peakMemoryUsageGPU, gpuMem);
-            peakMemoryUsageCPU = Math.Max(peakMemoryUsageCPU, cpuMem);
-            peakMemoryUsageGPUAndCPU = Math.Max(peakMemoryUsageGPUAndCPU, gpuMem+cpuMem);
-
-            if (memorySnapshotIndex != 0)
-            {
-                //Tensor allocated and freed (diff from snapshot to snapshot)
-                var allocatedTensorsId = currentSnapshotTensorIds.Except(previousSnapshotTensorIds);
-                var releasedTensorsId = previousSnapshotTensorIds.Except(currentSnapshotTensorIds);
-                StringBuilder tensorDiff = new StringBuilder();
-                bool first = true;
-                foreach (var tensorId in allocatedTensorsId)
-                {
-                    var tensor = FindTensorInSnapshot(snapshot, tensorId);
-                    string tensorDataInfo = "none";
-                    if (tensor.tensorDataMemoryInfo != null)
-                    {
-                        var data = tensor.tensorDataMemoryInfo;
-                        var memType = data.IsGPUMem ? "GPU" : "CPU";
-                        tensorDataInfo = $"id:{data.UniqueId} bytes:{data.MaxBytes} on:{memType}";
-                    }
-                    if (!first) tensorDiff.Append(" / ");
-                    first = false;
-                    tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId} tensorData:[{tensorDataInfo}]");
-
-                }
-                view.sections[0, "Allocated"] = tensorDiff.ToString();
-                tensorDiff.Clear();
-
-                first = true;
-                foreach (var tensorId in releasedTensorsId)
-                {
-                    var tensor = allTensorsAsFirstSeen[tensorId];
-                    if (!first) tensorDiff.Append(" / ");
-                    first = false;
-                    tensorDiff.Append($"{tensor.Name} {tensor.Shape} id:{tensor.UniqueId}");
-                }
-                view.sections[0, "Released"] = tensorDiff.ToString();
-            }
-
-            views.Add(view);
-            previousSnapshotTensorIds = currentSnapshotTensorIds;
-        }
-
-        memoryPeakSummary = new MemoryPeakSummary(peakMemoryUsageGPU, peakMemoryUsageCPU, peakMemoryUsageGPUAndCPU);
-        return views;
-    }
-
-    #endregion
-
-    #region Internal data format -> text
-
-    private static void Append(this StringBuilder sb, string str, int repeatCount)
-    {
-        for (int i = 0; i < repeatCount; ++i)
-            sb.Append(str);
-    }
-
-    private static void Append(this StringBuilder sb, string str, string separator)
-    {
-        sb.Append(str);
-        sb.Append(separator);
-    }
-
-    private static void GenerateReportForViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string sectionTitle, bool isSummaryView)
-    {
-        if (spreadSheetFormat)
-        {
-            //Columns Titles
-            views[0].context.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            views[0].summary.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            foreach (var tensorFields in views[0].sections.Fields)
-            {
-                tensorFields.Value.AddTitlesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            }
-            stringBuilder.Append("\n");
-
-            //All snapshots
-            foreach (var view in views)
-            {
-                view.context.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                view.summary.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                foreach (var tensorFields in view.sections.Fields)
-                {
-                    tensorFields.Value.AddValuesToReport(stringBuilder, ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                    stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-                }
-                stringBuilder.Append("\n");
-            }
-
-        }
-        else
-        {
-            string doubleIndentation = ModelExecutionsReporter.TextIndentation + ModelExecutionsReporter.TextIndentation;
-
-            foreach (var view in views)
-            {
-                view.context.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
-                stringBuilder.Append("\n");
-                view.summary.AddAllToReport(stringBuilder, suffix:"\n", prefix: ModelExecutionsReporter.TextIndentation);
-                stringBuilder.Append("\n"+ModelExecutionsReporter.TextIndentation + sectionTitle +"\n");
-
-                foreach (var context in view.sections.Contexts)
-                {
-                    stringBuilder.Append(doubleIndentation);
-                    if (isSummaryView)
-                    {
-                        view.sections.Fields[context.Key].AddAllToReport(stringBuilder, "\n"+doubleIndentation);
-                    }
-                    else
-                    {
-                        context.Value.AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
-                        stringBuilder.Append("\n"+doubleIndentation +"=> ");
-                        view.sections.Fields[context.Key].AddAllToReport(stringBuilder, ModelExecutionsReporter.TextFormatFieldSeparator);
-                        stringBuilder.Append("\n");
-                    }
-                }
-                stringBuilder.Append("\n");
-            }
-        }
-    }
-
-    private static void GenerateHeaderForSummaryViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
-    {
-        if (views.Count == 0)
-        {
-            stringBuilder.Append("<******** Summary info ********> NONE!\n");
-            return;
-        }
-
-        if (!spreadSheetFormat)
-        {
-            stringBuilder.Append("<******** Summary info ********>\n");
-            return;
-        }
-
-        //Columns names
-        int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
-        int sectionFieldCount = views[0].sections.FieldTitles.Length;
-
-        stringBuilder.Append("<******** Summary info ********>");
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        foreach (var context in views[0].sections.Contexts)
-        {
-            stringBuilder.Append(context.Value["Type"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
-            stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        }
-        stringBuilder.Append("\n");
-    }
-
-    private static void GenerateHeaderForTensorViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
-    {
-        GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "Tensors");
-    }
-
-    private static void GenerateHeaderForTensorDatasViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
-    {
-        GenerateHeaderForViewsByID(stringBuilder, views, spreadSheetFormat, "TensorDatas");
-    }
-
-    private static void GenerateHeaderForViewsByID(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat, string dataType)
-    {
-        if (views.Count == 0)
-        {
-            stringBuilder.Append($"<******** {dataType} info ********> NONE!\n");
-            return;
-        }
-
-        if (!spreadSheetFormat)
-        {
-            stringBuilder.Append($"<******** {dataType} info ********>\n");
-            return;
-        }
-
-        //Columns names
-        int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
-        int sectionFieldCount = views[0].sections.FieldTitles.Length;
-
-        stringBuilder.Append($"<******** {dataType} info ********>");
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        foreach (var context in views[0].sections.Contexts)
-        {
-            stringBuilder.Append("Id: ");
-            stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
-            stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        }
-        stringBuilder.Append("\n");
-    }
-
-    private static void GenerateHeaderForTempMemoriesViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
-    {
-        if (views.Count == 0)
-        {
-            stringBuilder.Append("<******** Worker temporary memories info ********> NONE!\n");
-            return;
-        }
-
-        if (!spreadSheetFormat)
-        {
-            stringBuilder.Append("<******** Worker temporary memories info ********>\n");
-            return;
-        }
-
-        //Columns names
-        int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
-        int sectionFieldCount = views[0].sections.FieldTitles.Length;
-
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        stringBuilder.Append("Temp memories names and ids:");
-        stringBuilder.Append("\n");
-
-        stringBuilder.Append("<******** Worker temporary memories info ********>");
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        foreach (var context in views[0].sections.Contexts)
-        {
-            stringBuilder.Append(context.Value["Name"], " / Id: ");
-            stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
-            stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        }
-        stringBuilder.Append("\n");
-    }
-
-    private static void GenerateHeaderForAllocatorsViews(StringBuilder stringBuilder, List<SnapshotView> views, bool spreadSheetFormat)
-    {
-        if (views.Count == 0)
-        {
-            stringBuilder.Append("<******** Allocators info ********> NONE!\n");
-            return;
-        }
-
-        if (!spreadSheetFormat)
-        {
-            stringBuilder.Append("<******** Allocators info ********>\n");
-            return;
-        }
-
-        //Columns names
-        int ctxFieldCount = views[0].context.Titles.Length + views[0].summary.Titles.Length;
-        int sectionFieldCount = views[0].sections.FieldTitles.Length;
-
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        stringBuilder.Append("Allocators names and shapes:");
-        stringBuilder.Append("\n");
-
-        stringBuilder.Append("<******** Allocators info ********>");
-        stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, ctxFieldCount);
-        stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        foreach (var context in views[0].sections.Contexts)
-        {
-            stringBuilder.Append(context.Value["Name"], " / Id: ");
-            stringBuilder.Append(context.Value["Id"], ModelExecutionsReporter.SpreadSheetFieldSeparator);
-            stringBuilder.Append(ModelExecutionsReporter.SpreadSheetFieldSeparator, sectionFieldCount-1);
-            stringBuilder.Append("|", ModelExecutionsReporter.SpreadSheetFieldSeparator);
-        }
-        stringBuilder.Append("\n");
-    }
-
-    #endregion
-}
-
-} // namespace Unity.Barracuda
-
-#endif //ENABLE_BARRACUDA_STATS
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MemoryAndExecutionReportHelper.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MemoryAndExecutionReportHelper.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 5b125a79bdbfb1b41adba78ef255dd80
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MemorySnapshotsReport.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MemorySnapshotsReport.cs
@@ -1,196 +0,0 @@
-#if ENABLE_BARRACUDA_STATS
-
-using System.Collections.Generic;
-using System.Text;
-
-namespace Unity.Barracuda {
-
-public class TensorDataMemoryInfo
-{
-    public int UniqueId { get; }
-    public int MaxBytes  { get; }
-    public bool InUse  { get; }
-    public bool IsGPUMem  { get; }
-
-    internal TensorDataMemoryInfo(ITensorDataStatistics tensorDataStatistics)
-    {
-        UniqueId = tensorDataStatistics.uniqueId;
-        MaxBytes = tensorDataStatistics.maxCapacity * sizeof(float);
-        InUse = tensorDataStatistics.inUse;
-        IsGPUMem = tensorDataStatistics.isGPUMem;
-    }
-
-    public override string ToString()
-    {
-        return $"TensorData of maxBytes {MaxBytes}, inUse:{InUse}, onGPU:{IsGPUMem}, uniqueId:{UniqueId}";
-    }
-}
-
-public class TempMemoryInfo
-{
-    public int UniqueId { get; }
-    public string Name { get; }
-    public long TotalBytes { get; }
-    public bool IsGPUMem  { get; }
-
-    internal TempMemoryInfo(TempMemoryStatistics tempMemoryStatistics)
-    {
-        UniqueId = tempMemoryStatistics.uniqueId;
-        Name = tempMemoryStatistics.name;
-        TotalBytes = tempMemoryStatistics.size;
-        IsGPUMem = tempMemoryStatistics.isGPUMem;
-    }
-
-    public override string ToString()
-    {
-        return $"Temp memory '{Name}' of totalBytes {TotalBytes}";
-    }
-}
-
-public class AllocatorMemoryInfo
-{
-    public int UniqueId { get; }
-    public string Name { get; }
-    public long UsedBytes { get; }
-    public long BusyBytes { get; }
-    public long FreeBytes { get; }
-    public long TotalBytes { get; }
-    public List<TensorDataMemoryInfo> TensorDatasMemoryInfo { get; }
-    public List<TensorMemoryInfo> TensorsMemoryInfo { get; }
-    public long BytesLostToFragmentation => BusyBytes - UsedBytes;
-
-    internal AllocatorMemoryInfo(IAllocatorStatistics allocatorStatistics)
-    {
-        UniqueId = allocatorStatistics.uniqueId;
-        Name = allocatorStatistics.name;
-        UsedBytes = allocatorStatistics.usedBytes;
-        BusyBytes = allocatorStatistics.busyBytes;
-        FreeBytes = allocatorStatistics.freeBytes;
-        TotalBytes = allocatorStatistics.totalBytes;
-        TensorDatasMemoryInfo = new List<TensorDataMemoryInfo>();
-        foreach (var tensorDataStatistics in allocatorStatistics.GetTensorDatasStatistics())
-        {
-            TensorDatasMemoryInfo.Add(new TensorDataMemoryInfo(tensorDataStatistics));
-        }
-        TensorsMemoryInfo = new List<TensorMemoryInfo>();
-        foreach (var tensorStatistics in allocatorStatistics.GetTensorsStatistics())
-        {
-            TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistics));
-        }
-    }
-
-    public override string ToString()
-    {
-        return $"Allocator '{Name}' of totalBytes {TotalBytes}, usedBytes:{UsedBytes}, lostToFragmentation:{BytesLostToFragmentation}, free:{FreeBytes}";
-    }
-}
-
-public class TensorMemoryInfo
-{
-    public int UniqueId { get; }
-    public string Name { get; }
-    public TensorShape Shape { get; }
-    public int CacheBytes { get; }
-    public TensorDataMemoryInfo tensorDataMemoryInfo { get; }
-
-    internal TensorMemoryInfo(ITensorStatistics tensorStatistics)
-    {
-        UniqueId = tensorStatistics.uniqueId;
-        Name = tensorStatistics.name;
-        Shape = tensorStatistics.shape;
-        CacheBytes = tensorStatistics.cacheBytes;
-        var tensorDataStats = tensorStatistics.GetTensorDataStatistics();
-        if (tensorDataStats != null)
-            tensorDataMemoryInfo = new TensorDataMemoryInfo(tensorDataStats);
-    }
-
-    public override string ToString()
-    {
-        var tensorDataStr = (tensorDataMemoryInfo != null) ? tensorDataMemoryInfo.ToString() : "";
-        return $"Tensor: {Name} of shape {Shape.ToString()}, cacheBytes: {CacheBytes} (data: {tensorDataStr})";
-    }
-}
-
-public class MemorySnapshotReport
-{
-    public string ContextType { get; }
-    public string ContextName  { get; }
-    public List<TensorMemoryInfo> TensorsMemoryInfo  { get; }
-    public List<AllocatorMemoryInfo> AllocatorsMemoryInfo  { get; }
-    public List<TempMemoryInfo> TempMemoriesInfo  { get; }
-
-    internal MemorySnapshotReport(IOps ops, IVarsStatistics vars, string context, Layer layer)
-    {
-        ContextType = context;
-        ContextName = "";
-        if (layer != null)
-        {
-            ContextType += ": " + layer.type + ((layer.type == Layer.Type.Activation) ? ("." + layer.activation) : "");
-            ContextName += layer.name;
-        }
-
-        TensorsMemoryInfo = new List<TensorMemoryInfo>();
-        AllocatorsMemoryInfo = new List<AllocatorMemoryInfo>();
-        TempMemoriesInfo = new List<TempMemoryInfo>();
-
-        foreach (var allocatorsStatistic in vars.GetAllocatorsStatistics())
-        {
-            AllocatorsMemoryInfo.Add(new AllocatorMemoryInfo(allocatorsStatistic));
-        }
-
-        foreach (var tensorStatistic in vars.GetTensorsStatistics())
-        {
-            TensorsMemoryInfo.Add(new TensorMemoryInfo(tensorStatistic));
-        }
-
-        foreach (var tempMemoryStatistic in ops.GetTempMemoryStatistics())
-        {
-            TempMemoriesInfo.Add(new TempMemoryInfo(tempMemoryStatistic));
-        }
-    }
-}
-
-public class MemorySnapshotsReport
-{
-    public List<MemorySnapshotReport> MemorySnapshotsReports { get; private set; }
-
-    public MemorySnapshotsReport()
-    {
-        Reset();
-    }
-
-    public void Reset()
-    {
-        MemorySnapshotsReports = new List<MemorySnapshotReport>();
-    }
-
-    public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
-    {
-        var varsWithStatistics = vars as IVarsStatistics;
-        if (varsWithStatistics == null)
-            return;
-
-        MemorySnapshotsReports.Add(new MemorySnapshotReport(ops, varsWithStatistics, context, layer));
-    }
-
-    public MemoryPeakSummary GenerateStringReport(StringBuilder stringBuilder, bool spreadSheetFormat)
-    {
-        stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - START ****************\n");
-        stringBuilder.Append($"Number of snapshots : {MemorySnapshotsReports.Count}\n\n");
-
-        var memoryPeakSummary = MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, MemorySnapshotsReports, spreadSheetFormat);
-        stringBuilder.Append("**************** MEMORY SNAPSHOTS REPORTS - STOP ****************\n");
-        return memoryPeakSummary;
-    }
-
-    public override string ToString()
-    {
-        var stringBuilder = new StringBuilder(10000);
-        GenerateStringReport(stringBuilder, spreadSheetFormat:false);
-        return stringBuilder.ToString();
-    }
-}
-
-} // namespace Unity.Barracuda
-
-#endif //ENABLE_BARRACUDA_STATS
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/MemorySnapshotsReport.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/MemorySnapshotsReport.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 0e26059fb46b5a345a0a59a9fe3eafae
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelAnalyzer.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelAnalyzer.cs
@@ -1,922 +0,0 @@
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Linq;
-using System.Runtime.CompilerServices;
-
-using UnityEngine;
-using UnityEngine.Assertions;
-using UnityEngine.Profiling;
-
-[assembly: InternalsVisibleTo("Unity.Barracuda.ONNX")]
-[assembly: InternalsVisibleTo("Unity.Barracuda.Editor")]
-
-namespace Unity.Barracuda {
-
-
-internal class ModelAnalyzer
-{
-    public static string GetDefaultInputName(Model model)
-    {
-        bool modelHasOnlyOneInput = model.inputs.Count == 1;
-        if (modelHasOnlyOneInput)
-            return model.inputs[0].name;
-
-        var memories = new HashSet<string>();
-        foreach (var m in model.memories)
-            memories.Add(m.input);
-
-        // find the first unconnected input as a default model input
-        var previousLayerNames = new HashSet<string>();
-        foreach (var l in model.layers)
-        {
-            previousLayerNames.Add(l.name);
-
-            bool layerDoesNotNeedInput = (l.type == Layer.Type.Load);
-
-            if (layerDoesNotNeedInput)
-                continue;
-
-            foreach (var inputName in l.inputs)
-            {
-                bool inputIsUnconnected = !previousLayerNames.Contains(inputName);
-                bool inputIsNotPartOfMemory = !memories.Contains(inputName);
-
-                if (inputIsUnconnected && inputIsNotPartOfMemory)
-                    return inputName;
-            }
-        }
-
-        return "";
-    }
-
-    static public string GetDefaultOutputName(Model model)
-    {
-        if (model.outputs.Count == 1)
-            return model.outputs[0];
-
-        if (model.layers.Count > 0)
-        {
-            var lastLayer = model.layers[model.layers.Count - 1];
-            return lastLayer.name;
-        }
-
-        return "";
-    }
-
-    public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes)
-    {
-        IDictionary<string, TensorShape?> shapesByName;
-        return ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
-    }
-
-    public static TensorShape?[] ListTemporaryTensorShapes(Model model, IDictionary<string, TensorShape> inputShapes,
-        out IDictionary<string, TensorShape?> shapesByName)
-    {
-        Profiler.BeginSample ("Barracuda.ListTemporaryTensorShapes");
-        var shapes = new List<TensorShape?>();
-        shapesByName = new Dictionary<string, TensorShape?>();
-        foreach (var entry in inputShapes)
-            shapesByName.Add(entry.Key, entry.Value);
-
-        TensorShape? Xn;
-        shapesByName.TryGetValue(GetDefaultInputName(model), out Xn); // default input
-        TensorShape? O = Xn;
-
-        foreach (var l in model.layers)
-        {
-            if (l.inputs.Length > 0 && shapesByName.TryGetValue(l.inputs[0], out TensorShape? xShape))
-                Xn = xShape;
-            else
-                Xn = O; // previous output is used, if-and-only-if layer has no explicit inputs
-
-            if (Xn == null)
-            {
-                shapes.Add(Xn);
-                shapesByName.Add(l.name, Xn);
-                continue;
-            }
-
-            TensorShape X = Xn.Value;
-
-            if (l.type == Layer.Type.Dense)
-            {
-                Assert.IsNotNull(l.datasets);
-                var W = l.datasets[0].shape;
-                O = new TensorShape(X.flatHeight, W.flatWidth);
-            }
-            else if (l.type == Layer.Type.Dense3)
-            {
-                Assert.IsNotNull(l.datasets);
-                var W = l.datasets[0].shape;
-                O = new TensorShape(X.batch, 1, W.channels, X.channels);
-            }
-            else if (l.type == Layer.Type.MatMul)
-            {
-                if (!shapesByName.ContainsKey(l.inputs[1]) || shapesByName[l.inputs[1]] == null)
-                {
-                    O = null;
-                    break;
-                }
-
-                var Y = shapesByName[l.inputs[1]].Value;
-
-                int rankX;
-                int rankY;
-                List<int> onnxXshape;
-                List<int> onnxYshape;
-
-                if (l.pool == null || l.pool.Length == 0)
-                {
-                    LegacyGetXYRanks(X, Y, out rankX, out rankY);
-                }
-                else
-                {
-                    rankX = l.pool[0];
-                    rankY = l.pool[1];
-                }
-
-                onnxXshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(X, rankX);
-                onnxYshape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToOnnxLayout(Y, rankY);
-
-                int rankO = Math.Max(rankX, rankY);
-
-                // pad 1 on front of shape to both be rankO shape
-                for (int i = 0; i < (rankX - rankY); i++)
-                    onnxYshape.Insert(0, 1);
-
-                for (int i = 0; i < (rankY - rankX); i++)
-                    onnxXshape.Insert(0, 1);
-
-                if (rankO == 2)
-                    O = new TensorShape(onnxXshape[0], 1, 1, onnxYshape[1]);
-                else if (rankO == 3)
-                    O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), 1, onnxYshape[2], onnxXshape[1]);
-                else
-                    O = new TensorShape(Math.Max(onnxXshape[0], onnxYshape[0]), onnxXshape[2], onnxYshape[3], Math.Max(onnxXshape[1], onnxYshape[1]));
-            }
-            else if (
-                l.type == Layer.Type.Conv2D ||
-                l.type == Layer.Type.Conv3D ||
-                l.type == Layer.Type.DepthwiseConv2D)
-            {
-                var K = l.datasets[0].shape;
-
-                Assert.IsNotNull(l.stride);
-                Assert.IsNotNull(l.pad);
-                var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
-
-                O = X.ApplyKernel(K, l.stride, pad);
-            }
-            else if (
-                l.type == Layer.Type.Conv2DTrans)
-            {
-                var K = l.datasets[0].shape;
-                Assert.IsNotNull(l.stride);
-                Assert.IsNotNull(l.pad);
-                // pool size is treated as output_adjustment aka output_padding here
-                var outputAdjustment = l.pool;
-                var pad = X.AdjustPadToKernel(K, l.stride, l.pad);
-                O = X.ApplyKernelInverse(K, l.stride, pad, outputAdjustment);
-            }
-            else if (
-                l.type == Layer.Type.Upsample2D)
-            {
-                if(l.pool.Length != 2)
-                {
-                    O = null;
-                }
-                else
-                {
-                    // pool size is treated as upsample coefficient here
-                    Assert.IsNotNull(l.pool);
-                    Assert.AreEqual(l.pool.Length, 2);
-                    O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels);
-                }
-            }
-            else if (
-                l.type == Layer.Type.Upsample3D)
-            {
-                if(l.pool.Length != 2)
-                {
-                    O = null;
-                }
-                else
-                {
-                    // pool size is treated as upsample coefficient here
-                    Assert.IsNotNull(l.pool);
-                    Assert.AreEqual(l.pool.Length, 3);
-                    O = new TensorShape(1,1,X.batch, 1, X.depth * l.pool[2], X.height * l.pool[1], X.width * l.pool[0], X.channels);
-                }
-            }
-            else if (
-                l.type == Layer.Type.Resample2D)
-            {
-                if(l.pool.Length != 2)
-                {
-                    O = null;
-                }
-                else
-                {
-                    // pool is treated as resample size here
-                    var size = l.pool;
-                    Assert.IsNotNull(size);
-                    Assert.AreEqual(size.Length, 2);
-                    O = new TensorShape(X.batch, size[1], size[0], X.channels);
-                }
-            }
-            else if (
-                l.type == Layer.Type.DepthToSpace)
-            {
-                    // pool size is treated as blocksize here
-                    Assert.IsNotNull(l.pool);
-                    Assert.AreEqual(l.pool.Length, 2);
-                    Assert.AreEqual(X.channels % (l.pool[0] * l.pool[1]), 0);
-                    O = new TensorShape(X.batch, X.height * l.pool[1], X.width * l.pool[0], X.channels / (l.pool[0] * l.pool[1]));
-            }
-            else if (
-                l.type == Layer.Type.SpaceToDepth)
-            {
-                // pool size is treated as blocksize here
-                Assert.IsNotNull(l.pool);
-                Assert.AreEqual(l.pool.Length, 2);
-                O = new TensorShape(X.batch, X.height / l.pool[1], X.width / l.pool[0], X.channels * (l.pool[0] * l.pool[1]));
-            }
-            else if (
-                l.type == Layer.Type.MaxPool2D ||
-                l.type == Layer.Type.AvgPool2D)
-            {
-                Assert.IsNotNull(l.pool);
-                Assert.IsNotNull(l.stride);
-                Assert.IsNotNull(l.pad);
-                var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
-                O = X.ApplyPool(l.pool, l.stride, pad);
-            }
-            else if (
-                l.type == Layer.Type.GlobalMaxPool2D ||
-                l.type == Layer.Type.GlobalAvgPool2D)
-            {
-                O = new TensorShape(X.batch, 1, 1, X.channels);
-            }
-            else if (l.type == Layer.Type.Border3D)
-            {
-                Assert.IsNotNull(l.pad);
-                // legacy support
-                if (l.pad.Length == 6)
-                    X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], l.pad[2], 0, l.pad[3], l.pad[4], l.pad[5], 0 });
-                else
-                    O = X.ApplyBorder(l.pad);
-            }
-                else if (
-                l.type == Layer.Type.Border2D ||
-                l.type == Layer.Type.Pad2DReflect ||
-                l.type == Layer.Type.Pad2DSymmetric ||
-                l.type == Layer.Type.Pad2DEdge)
-            {
-                Assert.IsNotNull(l.pad);
-                // legacy support
-                if (l.pad.Length == 4)
-                    X = X.ApplyBorder(new[] { l.pad[0], l.pad[1], 0, l.pad[2], l.pad[3], 0 });
-                else
-                    O = X.ApplyBorder(l.pad);
-            }
-            else if (
-                l.type == Layer.Type.Conv3D ||
-                l.type == Layer.Type.Conv3DTrans ||
-                l.type == Layer.Type.Upsample3D ||
-                l.type == Layer.Type.MaxPool3D ||
-                l.type == Layer.Type.AvgPool3D ||
-                l.type == Layer.Type.GlobalMaxPool3D ||
-                l.type == Layer.Type.GlobalAvgPool3D ||
-                l.type == Layer.Type.Border3D)
-            {
-                throw new NotImplementedException();
-            }
-            else if (
-                l.type == Layer.Type.RandomNormal ||
-                l.type == Layer.Type.RandomUniform)
-            {
-                Assert.IsNotNull(l.pool);
-                // pool size is treated as shape constant, if not empty
-                // otherwise shape of the previous tensor is used
-                if (l.pool.Length > 0)
-                    O = new TensorShape(l.pool);
-                else
-                    O = X;
-            }
-            else if (l.type == Layer.Type.ConstantOfShape)
-            {
-                if(l.axis != 1)
-                    O = null;
-                else
-                    O = X;
-            }
-            else if (
-                l.type == Layer.Type.Multinomial)
-            {
-                Assert.IsNotNull(l.pool);
-                Assert.AreEqual(l.pool.Length, 1);
-                O = new TensorShape(X.batch, l.pool[0]);
-            }
-            else if (
-                l.type == Layer.Type.OneHot)
-            {
-                Assert.IsNotNull(l.pool);
-                Assert.AreEqual(l.pool.Length, 1);
-                int depth = l.pool[0];
-                int inputRank = l.axis;
-                inputRank = inputRank < 0 ? X.dimensions : inputRank;
-
-                if (inputRank == 1)
-                    O = new TensorShape(X.flatHeight, depth);
-                else if (inputRank == 2)
-                    O = new TensorShape(X.flatHeight, 1, depth, X.flatWidth);
-                else
-                    O = new TensorShape(X.batch, X.height, depth, X.channels);
-            }
-            else if (l.type == Layer.Type.RoiAlign)
-            {
-                Assert.IsNotNull(l.pool);
-                Assert.AreEqual(l.pool.Length, 2);
-
-                if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape) && shape != null)
-                {
-                    int batches = shape.Value.flatHeight;
-                    O = new TensorShape(batches, l.pool[0], l.pool[1], X.channels);
-                }
-                else
-                    O = null;
-            }
-            else if (
-                l.type == Layer.Type.Add ||
-                l.type == Layer.Type.Sub ||
-                l.type == Layer.Type.Mul ||
-                l.type == Layer.Type.Div ||
-                l.type == Layer.Type.Pow ||
-                l.type == Layer.Type.Min ||
-                l.type == Layer.Type.Max ||
-                l.type == Layer.Type.Mean||
-                l.type == Layer.Type.Greater ||
-                l.type == Layer.Type.GreaterEqual ||
-                l.type == Layer.Type.Less ||
-                l.type == Layer.Type.LessEqual ||
-                l.type == Layer.Type.Equal ||
-                l.type == Layer.Type.LogicalOr ||
-                l.type == Layer.Type.LogicalAnd ||
-                l.type == Layer.Type.LogicalXor ||
-                l.type == Layer.Type.Where)
-            {
-                // gather shapes by names
-                var list = new List<TensorShape>(l.inputs.Length);
-                bool allShapesKnown = true;
-                foreach (var i in l.inputs)
-                {
-                    if (shapesByName.TryGetValue(i, out TensorShape? shape) && shape != null)
-                        list.Add(shape.Value);
-                    else
-                        allShapesKnown = false;
-                }
-
-                O = allShapesKnown ? TensorExtensions.Max(list.ToArray()) : default(TensorShape?);
-            }
-            else if (
-                l.type == Layer.Type.ReduceL1 ||
-                l.type == Layer.Type.ReduceL2 ||
-                l.type == Layer.Type.ReduceLogSum ||
-                l.type == Layer.Type.ReduceLogSumExp ||
-                l.type == Layer.Type.ReduceMax ||
-                l.type == Layer.Type.ReduceMean ||
-                l.type == Layer.Type.ReduceMin ||
-                l.type == Layer.Type.ReduceProd ||
-                l.type == Layer.Type.ReduceSum ||
-                l.type == Layer.Type.ReduceSumSquare ||
-                l.type == Layer.Type.ArgMax ||
-                l.type == Layer.Type.ArgMin)
-            {
-                O = X.Reduce(l.axis);
-            }
-            else if (
-                l.type == Layer.Type.Flatten)
-            {
-                O = X.Flatten();
-            }
-            else if (
-                l.type == Layer.Type.Reshape)
-            {
-                // pool size is treated as the shape, if not empty
-                var size = l.pool;
-
-                Assert.IsNotNull(size);
-
-                if (size.Length == 0 && l.inputs.Length > 1)
-                {
-                    switch (l.axis)
-                    {
-                        // Legacy - use the shape of the input tensor as the shape
-                        case -1:
-                            if (shapesByName.TryGetValue(l.inputs[1], out TensorShape? shape))
-                                size = shape.Value.ToArray();
-                            break;
-
-                        // Use the tensor values as the shape; Calculated at runtime
-                        case 1:
-                            O = null;
-                            break;
-                    }
-
-                    if (O == null)
-                        break;
-                }
-
-                Assert.IsTrue( (size.Length == 4) || (size.Length == 8));
-                O = X.Reshape(size);
-            }
-            else if (
-                l.type == Layer.Type.Expand)
-            {
-                // pool size is treated as new shape
-                var newShape = l.pool;
-
-                Assert.IsNotNull(newShape);
-                Assert.IsTrue(newShape.Length == 8 || newShape.Length == 4);
-
-                O = new TensorShape(newShape);
-            }
-            else if (
-                l.type == Layer.Type.Transpose)
-            {
-                var permutations = l.pool;
-                if (permutations == null)
-                    O = new TensorShape(X.flatWidth, X.flatHeight);
-                else
-                {
-                    Assert.IsTrue(permutations.Length == 8 || permutations.Length == 4);
-                    O = X.Permute(permutations);
-                }
-            }
-            else if (
-                l.type == Layer.Type.Gather)
-            {
-                if (!shapesByName.TryGetValue(l.inputs[0], out TensorShape? input0Shape) || input0Shape == null
-                    || !shapesByName.TryGetValue(l.inputs[1], out TensorShape? input1Shape) || input1Shape == null)
-                {
-                    O = null;
-                    break;
-                }
-
-                int[] shape = input0Shape.Value.ToArray();
-                shape[l.axis] = input1Shape.Value.length;
-
-                O = new TensorShape(shape);
-
-                if (l.pool != null && l.pool.Length == 2 && l.pool[1] > 1)
-                {
-                    int xRank = l.pool[0];
-                    int indicesRank = l.pool[1];
-                    var oShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(O.Value, xRank);
-                    var indicesShape = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaShapeToList(input1Shape.Value, indicesRank);
-
-                    int axis = Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaAxisToTensor(l.axis, xRank);
-                    oShape.InsertRange(axis, indicesShape);
-                    oShape.RemoveAt(axis + indicesShape.Count);
-
-                    O = (O.Value).Reshape(Compiler.IRShapeInferenceHelper.ShapeInference.BarracudaLayoutToTensorShapeLayout(oShape.ToArray()));
-
-                    // rank 2 -> 3
-                    if (xRank == 2 && oShape.Count == 3)
-                        O = (O.Value).Permute(new int[] { 0, 1, 3, 2 });
-                }
-
-            }
-            else if (l.type == Layer.Type.ScatterND)
-            {
-                O = X;
-            }
-            else if (
-                l.type == Layer.Type.Squeeze ||
-                l.type == Layer.Type.Unsqueeze)
-            {
-                O = X;
-            }
-            else if (
-                l.type == Layer.Type.Concat)
-            {
-                // gather shapes by names
-                var list = new List<TensorShape>(l.inputs.Length);
-                bool allShapesKnown = true;
-                foreach (var i in l.inputs)
-                {
-                    if (!shapesByName.TryGetValue(i, out var shape) || shape == null)
-                    {
-                        allShapesKnown = false;
-                        continue;
-                    }
-                    list.Add(shape.Value);
-                }
-
-                O = allShapesKnown ? TensorExtensions.Concat(list.ToArray(), l.axis) : default(TensorShape?);
-            }
-            else if (
-                l.type == Layer.Type.StridedSlice)
-            {
-                Assert.IsNotNull(l.pad);
-                Assert.IsNotNull(l.pool);
-                Assert.IsNotNull(l.stride);
-                O = X.ApplyStridedSlice(l.pad, l.pool, l.stride);
-            }
-            else if (
-                l.type == Layer.Type.Tile)
-            {
-                // pool size is treated as tiling coefficient here
-                Assert.IsNotNull(l.pool);
-                var scale = l.pool;
-                O = X.Scale(scale);
-            }
-            else if (
-                l.type == Layer.Type.Load)
-            {
-                O = l.datasets[0].shape;
-            }
-            else if (// elementwise operations
-                l.type == Layer.Type.Nop ||
-                l.type == Layer.Type.Activation ||
-                l.type == Layer.Type.ScaleBias ||
-                l.type == Layer.Type.Normalization ||
-                l.type == Layer.Type.LRN ||
-                l.type == Layer.Type.Dropout ||
-                l.type == Layer.Type.LogicalNot ||
-                l.type == Layer.Type.Sign)
-            {
-                // works in place, keeps the same shape size
-                O = X;
-            }
-            else if (
-                l.type == Layer.Type.TopKIndices ||
-                l.type == Layer.Type.TopKValues ||
-                l.type == Layer.Type.NonMaxSuppression ||
-                l.type == Layer.Type.LSTM ||
-                l.type == Layer.Type.NonZero)
-            {
-                // Calculated at runtime
-                O = null;
-            }
-            else if (l.type == Layer.Type.Shape)
-            {
-                int shapeRank = l.axis > 0 ? 1 : X.length;
-                O = new TensorShape(shapeRank, 1, 1, 1);
-            }
-            else if (
-                l.type == Layer.Type.Conv3D ||
-                l.type == Layer.Type.Conv3DTrans ||
-                l.type == Layer.Type.Upsample3D ||
-                l.type == Layer.Type.MaxPool3D ||
-                l.type == Layer.Type.AvgPool3D ||
-                l.type == Layer.Type.GlobalMaxPool3D ||
-                l.type == Layer.Type.GlobalAvgPool3D ||
-                l.type == Layer.Type.Border3D)
-            {
-                throw new NotImplementedException("3D operations are not implemented yet!");
-            }
-            else
-            {
-                throw new NotImplementedException($"Layer type {l.type} needs to be explicitly handled");
-            }
-
-            shapes.Add(O);
-            shapesByName.Add(l.name, O);
-        }
-
-        Profiler.EndSample();
-        return shapes.ToArray();
-    }
-
-    // TODO: Remove when the legacy importer / code path is no longer needed (i.e. when pool is always set)
-    public static void LegacyGetXYRanks(TensorShape X, TensorShape Y, out int rankX, out int rankY)
-    {
-        // ONNX rank 2 : N,C => N,1,1,C
-        //      rank 3 : one must be N C W, (batches = N) => N, 1, W, C
-        //      rank 4 : one must be N C H W, (batches = N * C) => N H W C
-        // X and Y can be different ranks
-        var onnxXshape = new List<int> { X.batch, X.channels, X.height, X.width };
-        if (X.height == 1) onnxXshape = new List<int> { X.batch, X.channels, X.width, 1 };
-        var onnxYshape = new List<int> { Y.batch, Y.channels, Y.height, Y.width };
-        if (Y.height == 1) onnxYshape = new List<int> { Y.batch, Y.channels, Y.width, 1 };
-
-        rankX = 0;
-        for (int i = 3; i >= 0; i--)
-        {
-            if (onnxXshape[i] != 1)
-            {
-                rankX = i + 1;
-                break;
-            }
-        }
-
-        rankY = 0;
-        for (int i = 3; i >= 0; i--)
-        {
-            if (onnxYshape[i] != 1)
-            {
-                rankY = i + 1;
-                break;
-            }
-        }
-    }
-
-    public static bool TryGetOutputTensorShape(Model model, IDictionary<string, TensorShape> inputShapes, string output, out TensorShape shape)
-    {
-        shape = new TensorShape();
-        IDictionary<string, TensorShape?> shapesByName;
-        ListTemporaryTensorShapes(model, inputShapes, out shapesByName);
-
-        TensorShape? dynamicShape;
-        bool found = shapesByName.TryGetValue(output, out dynamicShape) && dynamicShape != null;
-        if (found)
-            shape = dynamicShape.Value;
-        return found;
-    }
-
-    public static bool TryGetOutputTensorShape(Model model, string output, out TensorShape shape)
-    {
-        var inputShapes = new Dictionary<string, TensorShape>();
-        foreach (var i in model.inputs)
-            inputShapes.Add(i.name, new TensorShape(i.shape));
-        return TryGetOutputTensorShape(model, inputShapes, output, out shape);
-    }
-
-    public static bool FindLayerByName(Model model, string name, out Layer layer)
-    {
-        layer = new Layer("",Layer.Type.Nop);
-        foreach (var l in model.layers)
-        {
-            if (l.name == name)
-            {
-                layer = l;
-                return true;
-            }
-        }
-        return false;
-    }
-
-    public static HashSet<Layer> FindLayersThatRequireStorage(Model model)
-    {
-        var allInputsExceptFromPreviousLayer = new HashSet<string>();
-        Layer prevLayer = null;
-        foreach (var layer in model.layers)
-        {
-            foreach (var input in layer.inputs)
-                if (prevLayer != null && input != prevLayer.name)
-                    allInputsExceptFromPreviousLayer.Add(input);
-            prevLayer = layer;
-        }
-
-        var allOutputs = new HashSet<string>();
-        foreach (var output in model.outputs)
-            allOutputs.Add(output);
-        foreach (var memory in model.memories)
-            allOutputs.Add(memory.output);
-        allOutputs.Add(GetDefaultOutputName(model));
-
-        var requireStorage = new HashSet<Layer>();
-        foreach (var layer in model.layers)
-        {
-            // loading constant tensor requires storage
-            if (layer.type == Layer.Type.Load)
-                requireStorage.Add(layer);
-
-            // @TBD: implement safety check that ensures Nop never has input
-            // otherwise it has to be treated as Load operation
-            if (layer.type == Layer.Type.Nop)
-                requireStorage.Add(layer);
-
-            if (allInputsExceptFromPreviousLayer.Contains(layer.name) ||
-                allOutputs.Contains(layer.name))
-                requireStorage.Add(layer);
-        }
-
-        return requireStorage;
-    }
-
-    public static HashSet<Layer> FindUpstreamLayers(Model model, string[] outputs)
-    {
-        // TODO: replace with var layersByName = model.layers.ToDictionary(i => i.name, i => i);
-        var layersByName = new Dictionary<string, Layer>();
-        foreach (var l in model.layers)
-            layersByName.Add(l.name, l);
-
-        var connected = new HashSet<Layer>();
-        var layersToVisit = new HashSet<Layer>();
-        foreach (var o in outputs)
-            if (layersByName.ContainsKey(o))
-            {
-                layersToVisit.Add(layersByName[o]);
-                connected.Add(layersByName[o]);
-            }
-
-        while (layersToVisit.Count > 0)
-        {
-            var visitNext = new HashSet<Layer>();
-            foreach (var l in layersToVisit)
-                foreach (var i in l.inputs)
-                    if (layersByName.ContainsKey(i))
-                    {
-                        visitNext.Add(layersByName[i]);
-                        connected.Add(layersByName[i]);
-                    }
-
-            layersToVisit = visitNext;
-        }
-        return connected;
-    }
-
-    public static TensorShape FindLargestNecessaryTensorShape(Model model, IDictionary<string, TensorShape> inputShapes)
-    {
-        Profiler.BeginSample ("Barracuda.FindLargestNecessaryTensorShape");
-
-        var shapes = ListTemporaryTensorShapes(model, inputShapes);
-
-        var maxTensorShape = new TensorShape(1,1,1,1);
-        foreach (var X in shapes)
-            if (X?.length > maxTensorShape.length)
-                maxTensorShape = X.Value;
-
-        Profiler.EndSample ();
-
-        return maxTensorShape;
-    }
-
-    public static TensorShape FindLargestArgumentTensorShape(Model model)
-    {
-        TensorShape maxTensorShape = new TensorShape(1,1,1,1);
-        foreach (var layer in model.layers)
-            foreach (var arg in layer.datasets)
-                if (arg.shape.length > maxTensorShape.length)
-                    maxTensorShape = arg.shape;
-
-        return maxTensorShape;
-    }
-
-    public static string[] FindUnusedLayers(Model model)
-    {
-        var layerUsageByName = model.layers.ToDictionary(i => i.name, i => false);
-        foreach (var layer in model.layers)
-        {
-            if (layer.flags.HasFlag(Layer.Flags.Preserve))
-                layerUsageByName[layer.name] = true;
-
-            foreach (var i in layer.inputs)
-            {
-                layerUsageByName[i] = true;
-            }
-        }
-
-        foreach (var o in model.outputs)
-        {
-            layerUsageByName[o] = true;
-        }
-
-        foreach (var mem in model.memories)
-        {
-            layerUsageByName[mem.output] = true;
-        }
-
-        return layerUsageByName.Where(keyValue => !keyValue.Value).Select(keyValue => keyValue.Key).ToArray();
-    }
-
-    private static string[] FindBrokenLinks(Model model, HashSet<string> links)
-    {
-        var allVariables = new HashSet<string>(model.layers.Select(i => i.name));
-        var globalInputs = new HashSet<string>(model.inputs.Select(i => i.name));
-        var memoryInputs = new HashSet<string>(model.memories.Select(i => i.input));
-        allVariables.UnionWith(globalInputs);
-        allVariables.UnionWith(memoryInputs);
-
-        var brokenLinks = links;
-        brokenLinks.ExceptWith(allVariables);
-        return brokenLinks.ToArray();
-    }
-
-    private static string[] FindBrokenLinks(Model model, string[] links)
-    {
-        return FindBrokenLinks(model, new HashSet<string>(links));
-    }
-
-    public static string[] FindBrokenLinks(Model model)
-    {
-        // check global outputs
-        var linksToInspect = new HashSet<string>(model.outputs);
-
-        // and all layers
-        foreach (var layer in model.layers)
-            foreach (var i in layer.inputs)
-                linksToInspect.Add(i);
-
-        return FindBrokenLinks(model, linksToInspect);
-    }
-
-    public static string[] FindUnconnectedInputs(Model model)
-    {
-        var unconnected = model.inputs.ToDictionary(i => i.name, i => true);
-
-        // check global outputs
-        foreach (var o in model.outputs)
-            unconnected.Remove(o);
-
-        // and all layers
-        foreach (var layer in model.layers)
-            foreach (var i in layer.inputs)
-                unconnected.Remove(i);
-
-        return unconnected.Keys.ToArray();
-    }
-
-    public static string[] FindLayerOutputs(Model model, string layerName)
-    {
-        var allVariables = model.layers.Where(x => x.inputs.Contains(layerName)).Select(x => x.name);
-        var globalOutputs = model.outputs.Where(x => x == layerName); ;
-
-        allVariables.Union(globalOutputs);
-
-        return allVariables.ToArray();
-    }
-
-    static public string[] FindUnconnectedOutputs(Model model)
-    {
-        return FindBrokenLinks(model, model.outputs.ToArray());
-    }
-
-    public static bool IsLayerBroacastable(Layer layer)
-    {
-        return layer.type == Layer.Type.Add ||
-               layer.type == Layer.Type.Sub ||
-               layer.type == Layer.Type.Mul ||
-               layer.type == Layer.Type.Div ||
-               layer.type == Layer.Type.Pow ||
-               layer.type == Layer.Type.Min ||
-               layer.type == Layer.Type.Max ||
-               layer.type == Layer.Type.Mean ||
-               layer.type == Layer.Type.Greater ||
-               layer.type == Layer.Type.GreaterEqual ||
-               layer.type == Layer.Type.Less ||
-               layer.type == Layer.Type.LessEqual ||
-               layer.type == Layer.Type.Equal ||
-               layer.type == Layer.Type.LogicalOr ||
-               layer.type == Layer.Type.LogicalAnd ||
-               layer.type == Layer.Type.LogicalXor ||
-               layer.type == Layer.Type.Where ||
-               layer.type == Layer.Type.Concat;
-    }
-    public static bool IsLayerBroadcastSkippable(Layer layer)
-    {
-        if(layer.type == Layer.Type.ConstantOfShape)
-        {
-            // dynamic shape support
-            if (layer.axis != 1)
-                return true;
-            else
-                return false;
-        }
-
-        return false;
-    }
-
-    // Allow some unknown input dimension for shape inference pass
-    // for now batch does not yield problematic shape inference, so allow for unkown batch
-    public static bool IsInputShapeAcceptablyKnowForShapeInference(Model.Input input) // acceptable unknown shape : N
-    {
-        for (int i = 0; i < input.shape.Length; i++)
-        {
-            var x = input.shape[i];
-            if (x <= 0 && i != TensorShape.DataBatch)
-                return false;
-        }
-        return true;
-    }
-
-    public static bool DoesTransposeChangeTensorLayout(TensorShape shape, int[] permutations)
-    {
-        var activeDimLayout = new List<int>();
-        for (int i = 0; i < 8; i++)
-        {
-            if (shape[i] != 1)
-                activeDimLayout.Add(i);
-        }
-
-        if (permutations.Length == 4)
-            permutations = TensorExtensions.Get8DPermutationsForNHWCPermutationsAndShape(shape, permutations);
-
-        var transposedLayout = TensorExtensions.Permute(new[] { 0, 1, 2, 3, 4, 5, 6, 7 }, permutations);
-        var permutedShape = shape.Permute(permutations);
-        var premutedActiveDimLayout = new List<int>();
-        for (int i = 0; i < 8; i++)
-        {
-            if (permutedShape[i] != 1)
-                premutedActiveDimLayout.Add(transposedLayout[i]);
-        }
-
-        return activeDimLayout.SequenceEqual(premutedActiveDimLayout);
-    }
-}
-
-
-} // namespace Unity.Barracuda
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelAnalyzer.cs.meta
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelAnalyzer.cs.meta
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 58838262534854657974303d5782ea38
-MonoImporter:
-  externalObjects: {}
-  serializedVersion: 2
-  defaultReferences: []
-  executionOrder: 0
-  icon: {instanceID: 0}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelExecutionsReport.cs
+++ b/Packages/com.unity.barracuda/Runtime/Core/Backends/ModelExecutionsReport.cs
@@ -1,253 +0,0 @@
-#if ENABLE_BARRACUDA_STATS
-
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using UnityEngine;
-using UnityEngine.Assertions;
-
-namespace Unity.Barracuda {
-
-public readonly struct DispatchInfo
-{
-    public readonly string backend;
-    public readonly string kernel;
-    public readonly int workItemsX;
-    public readonly int workItemsY;
-    public readonly int workItemsZ;
-
-    public DispatchInfo(string backend, string kernel, int workItemsX, int workItemsY, int workItemsZ)
-    {
-        this.backend = backend;
-        this.kernel = kernel;
-        this.workItemsX = workItemsX;
-        this.workItemsY = workItemsY;
-        this.workItemsZ = workItemsZ;
-    }
-
-    public override string ToString()
-    {
-        return $"{backend}:{kernel}({workItemsX},{workItemsY},{workItemsZ})";
-    }
-
-    internal static DispatchInfo CreateFromComputeFunc(ComputeFunc computeFunc, int x, int y, int z)
-    {
-        var backend = computeFunc.computeShaderContext==ComputeShaderContext.Reference?"REF":"OPT";
-        return new DispatchInfo(backend, computeFunc.kernelName, x, y, z);
-    }
-}
-
-public class LayerExecutionReport
-{
-    public string LayerType { get; }
-    public string LayerName { get; }
-    public string DispatchInfos { get; private set; }
-    public string Summary { get; private set; }
-    public long NumAlu { get; private set; }
-    public long NumBytes { get; private set; }
-
-    internal LayerExecutionReport(Layer l)
-    {
-        LayerType = l.type + ((l.type == Layer.Type.Activation) ? ("." + l.activation) : "");
-        LayerName = l.name;
-        Summary = "";
-        DispatchInfos = "";
-        NumAlu = 0;
-        NumBytes = 0;
-    }
-
-    internal void SetSummary(string message)
-    {
-        Summary = message;
-    }
-
-    internal void SetALUAndMemStats(long alu, long bytes)
-    {
-        NumAlu = alu;
-        NumBytes = bytes;
-    }
-
-    internal void AddDispatch(DispatchInfo dispatchInfo)
-    {
-        if (DispatchInfos.Length != 0)
-            DispatchInfos = DispatchInfos + " / ";
-        DispatchInfos = DispatchInfos + dispatchInfo;
-    }
-}
-
-public class ModelExecutionReport
-{
-    public List<LayerExecutionReport> CompletedLayerExecutionReports { get; }
-    public LayerExecutionReport CurrentLayerExecutionReport { get; private set; }
-
-    internal ModelExecutionReport()
-    {
-        CompletedLayerExecutionReports = new List<LayerExecutionReport>();
-        CurrentLayerExecutionReport = null;
-    }
-
-    internal void LayerExecutionStarted(Layer layer)
-    {
-        Assert.IsNull(CurrentLayerExecutionReport);
-        CurrentLayerExecutionReport = new LayerExecutionReport(layer);
-    }
-
-    internal void LayerExecutionCompleted()
-    {
-        CompletedLayerExecutionReports.Add(CurrentLayerExecutionReport);
-        CurrentLayerExecutionReport = null;
-    }
-
-    internal void SetLayerSummary(string message)
-    {
-        Assert.IsNotNull(CurrentLayerExecutionReport);
-        CurrentLayerExecutionReport.SetSummary(message);
-    }
-
-    internal void SetLayerALUAndMemStats(long alu, long bytes)
-    {
-        Assert.IsNotNull(CurrentLayerExecutionReport);
-        CurrentLayerExecutionReport.SetALUAndMemStats(alu, bytes);
-    }
-
-    internal void AddLayerDispatch(DispatchInfo dispatchInfo)
-    {
-        Assert.IsNotNull(CurrentLayerExecutionReport);
-        CurrentLayerExecutionReport.AddDispatch(dispatchInfo);
-    }
-}
-
-public class ModelExecutionsReporter : IModelExecutionsReporter
-{
-    //Tabs separator make importing into spreadsheet software easy.
-    public static readonly string SpreadSheetFieldSeparator = "\t";
-    public static readonly string TextFormatFieldSeparator = " / ";
-    public static readonly string TextIndentation = "    ";
-
-    public List<ModelExecutionReport> CompletedModelExecutionReports { get; private set; }
-    public ModelExecutionReport CurrentModelExecutionReport { get; private set; }
-    public MemorySnapshotsReport MemorySnapshotsReport { get; private set; }
-
-    public ModelExecutionsReporter()
-    {
-        Reset();
-    }
-
-    public void Reset()
-    {
-        CompletedModelExecutionReports = new List<ModelExecutionReport>();
-        CurrentModelExecutionReport = null;
-        MemorySnapshotsReport = new MemorySnapshotsReport();
-    }
-
-    public void TakeMemorySnapshot(IOps ops, IVars vars, string context, Layer layer)
-    {
-        MemorySnapshotsReport.TakeMemorySnapshot(ops, vars, context, layer);
-    }
-
-    public void ModelExecutionStarted()
-    {
-        Assert.IsNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport = new ModelExecutionReport();
-    }
-
-    public void ModelExecutionCompleted()
-    {
-        CompletedModelExecutionReports.Add(CurrentModelExecutionReport);
-        CurrentModelExecutionReport = null;
-    }
-
-    public void LayerExecutionStarted(Layer layer)
-    {
-        Assert.IsNotNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport.LayerExecutionStarted(layer);
-    }
-
-    public void LayerExecutionCompleted()
-    {
-        Assert.IsNotNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport.LayerExecutionCompleted();
-    }
-
-    public void SetLayerSummary(string message)
-    {
-        Assert.IsNotNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport.SetLayerSummary(message);
-    }
-
-    public void SetLayerALUAndMemStats(long alu, long bytes)
-    {
-        Assert.IsNotNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport.SetLayerALUAndMemStats(alu, bytes);
-    }
-
-    public void AddLayerDispatch(DispatchInfo dispatchInfo)
-    {
-        Assert.IsNotNull(CurrentModelExecutionReport);
-        CurrentModelExecutionReport.AddLayerDispatch(dispatchInfo);
-    }
-
-    public override string ToString()
-    {
-        return GenerateStringReport(out var memoryPeakSummary, false);
-    }
-
-    public string GenerateStringReport(out MemoryPeakSummary memoryPeakSummary, bool spreadsheetFormat)
-    {
-        var stringBuilder = new StringBuilder(1000);
-
-        //**************** MODEL EXECUTIONS REPORT - START ****************
-        stringBuilder.Append($"**************** MODEL EXECUTIONS REPORT - START ****************\n");
-        stringBuilder.Append($"Number of completed executions : {CompletedModelExecutionReports.Count}\n");
-        if (CurrentModelExecutionReport != null)
-            stringBuilder.Append("Warning: last model execution was not completed. It will be logged, but information might be incomplete.\n");
-        stringBuilder.Append("\n");
-        int i = 0;
-        for (; i < CompletedModelExecutionReports.Count; ++i)
-        {
-            stringBuilder.Append($"--------- Execution index : {i} - START ---------\n");
-            MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CompletedModelExecutionReports[i], spreadsheetFormat);
-            stringBuilder.Append($"--------- Execution index : {i} - STOP ---------\n");
-            stringBuilder.Append("\n");
-        }
-        if (CurrentModelExecutionReport != null)
-        {
-            stringBuilder.Append($"--------- Uncompleted execution - START ---------\n");
-            MemoryAndExecutionReportHelper.GenerateStringReport(stringBuilder, CurrentModelExecutionReport, spreadsheetFormat);
-            stringBuilder.Append($"--------- Uncompleted execution - STOP ---------\n");
-            stringBuilder.Append("\n");
-        }
-        stringBuilder.Append($"**************** MODEL EXECUTION REPORT - STOP ****************\n");
-        stringBuilder.Append("\n");
-        //**************** MODEL EXECUTIONS REPORT - STOP ****************
-
-        //**************** MEMORY SNAPSHOTS REPORTS - START ****************
-        memoryPeakSummary = MemorySnapshotsReport.GenerateStringReport(stringBuilder, spreadsheetFormat);
-        //**************** MEMORY SNAPSHOTS REPORTS - STOP ****************
-
-        return stringBuilder.ToString();
-    }
-
-    #if UNITY_EDITOR
-    public static string ToTextFile(IModelExecutionsReporter report, bool spreadsheetFormat, out MemoryPeakSummary memoryPeakSummary, string filename = null)
-    {
-        string stringToSave = report.GenerateStringReport(out memoryPeakSummary, spreadsheetFormat);
-        string fullPath = Application.temporaryCachePath;
-        if (filename == null)
-        {
-            fullPath = Path.Combine(fullPath, "ModelExecutionReport");
-            fullPath = Path.ChangeExtension(fullPath, "txt");
-        }
-        else
-        {
-            fullPath = Path.Combine(fullPath, filename);
-        }
-        File.WriteAllText(fullPath, stringToSave);
-        return fullPath;
-    }
-    #endif
-}
-
-} // namespace Unity.Barracuda
-
-#endif //ENABLE_BARRACUDA_STATS
--- a/Show More
+++ b/Show More