// #define DEBUG_TIMING using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Linq; using System.Runtime.CompilerServices; using UnityEngine; using UnityEngine.Assertions; using UnityEngine.Profiling; [assembly: InternalsVisibleTo("Unity.Barracuda.Tests")] namespace Unity.Barracuda { /// /// Barracuda `Model` loader /// public static class ModelLoader { /// /// Return an object oriented representation (aka: `Model`) of a neural network from a binary representation of type `NNModel`. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// model /// verbose /// skip loading weights (fast loading, metadata only) /// loaded Model public static Model Load(NNModel model, bool verbose = false, bool skipWeights = false) { return Load(model.modelData.Value, verbose, skipWeights); } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a binary representation of type `NNModel`. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// binary representation of model /// object-oriented representation of model (must initialize before calling method) /// verbose /// skip loading weights (fast loading, metadata only) /// the maximum amount of time to spend between in computation before yielding /// IEnumerator (use with StartCoroutine) public static IEnumerator LoadAsync(NNModel nnModel, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f) { Assert.IsNotNull(model); var enumerator = LoadAsync(Open(nnModel.modelData.Value), model, verbose, true, skipWeights, maxTimePerYield); while (enumerator.MoveNext()) { model = (Model)enumerator.Current; if (model != null) yield return null; } } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file from the the streaming asset folder. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// file name /// verbose /// skip loading weights (fast loading, metadata only) /// loaded Model public static Model LoadFromStreamingAssets(string filename, bool verbose = false, bool skipWeights = false) { return Load(Path.Combine(Application.streamingAssetsPath, filename), verbose, skipWeights); } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file from the the streaming asset folder. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// file name /// object-oriented representation of model (must initialize before calling method) /// verbose /// skip loading weights (fast loading, metadata only) /// the maximum amount of time to spend between in computation before yielding /// IEnumerator (use with StartCoroutine) public static IEnumerator LoadAsyncFromStreamingAssets(string filename, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f) { Assert.IsNotNull(model); var enumerator = LoadAsync(Open(Path.Combine(Application.streamingAssetsPath, filename)), model, verbose, true, skipWeights, maxTimePerYield); do { model = (Model)enumerator.Current; if (model != null) yield return null; } while (enumerator.MoveNext()); } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// file name /// verbose /// skip loading weights (fast loading, metadata only) /// loaded Model public static Model Load(string filepath, bool verbose = false, bool skipWeights = false) { return Load(Open(filepath), verbose, true, skipWeights); } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// file name /// object-oriented representation of model (must initialize before calling method) /// verbose /// skip loading weights (fast loading, metadata only) /// the maximum amount of time to spend between in computation before yielding /// IEnumerator (use with StartCoroutine) public static IEnumerator LoadAsync(string filepath, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f) { Assert.IsNotNull(model); var enumerator = LoadAsync(Open(filepath), model, verbose, true, skipWeights, maxTimePerYield); while (enumerator.MoveNext()) { model = (Model)enumerator.Current; if (model != null) yield return null; } } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a byte[] array. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// binary representation of model as a byte array /// verbose /// skip loading weights (fast loading, metadata only) /// loaded Model public static Model Load(byte[] stream, bool verbose = false, bool skipWeights = false) { return Load(Open(stream), verbose, true, skipWeights); } /// /// Return an object oriented representation (aka: `Model`) of a neural network from a byte[] array. /// By default details are not logged to the console, set `verbose` to true to see loading details. /// /// binary representation of model as a byte array /// object-oriented representation of model (must initialize before calling method) /// verbose /// skip loading weights (fast loading, metadata only) /// the maximum amount of time to spend between in computation before yielding /// IEnumerator (use with StartCoroutine) public static IEnumerator LoadAsync(byte[] stream, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f) { Assert.IsNotNull(model); var enumerator = LoadAsync(Open(stream), model, verbose, true, skipWeights, maxTimePerYield); while (enumerator.MoveNext()) { model = (Model)enumerator.Current; if (model != null) yield return null; } } #region Private and internal internal static Model Load(byte[] stream, bool verbose = true, bool applyPatching = true, bool skipWeights = false) { return Load(Open(stream), verbose, applyPatching, skipWeights); } private static int ConvertLayerAxisFor8DShapeSupportIfNeeded(int axis, long version, Layer.Type layerType) { if (version > Model.LastVersionWithout8DSupport) return axis; //Prior to version 17, 8D tensors were not supported thus axis was expressed in NCHW format for Gather, Concat and Reduce layers. if (layerType == Layer.Type.ReduceL2 || layerType == Layer.Type.ReduceLogSum || layerType == Layer.Type.ReduceLogSumExp || layerType == Layer.Type.ReduceMax || layerType == Layer.Type.ReduceMean || layerType == Layer.Type.ReduceMin || layerType == Layer.Type.ReduceProd || layerType == Layer.Type.ReduceSum || layerType == Layer.Type.ReduceSumSquare || layerType == Layer.Type.Gather || layerType == Layer.Type.Concat) axis = TensorExtensions.Convert4DTo8DAxis(axis); return axis; } static Model Load(BinaryReader fileReader, bool verbose = true, bool applyPatching = true, bool skipWeights = false) { Model model = null; var enumerator = LoadAsync(fileReader, null, verbose, applyPatching, skipWeights); while (enumerator.MoveNext()) { model = (Model)enumerator.Current; if (model != null) break; } return model; } static IEnumerator LoadAsync(BinaryReader fileReader, Model model, bool verbose = true, bool applyPatching = true, bool skipWeights = false, float maxTimePerYield = 0f) { using (BinaryReader file = fileReader) { Profiler.BeginSample("Barracuda.LoadLayers"); float timeStart = Time.realtimeSinceStartup; if (model == null) model = new Model(); List layers = new List(); long version = file.ReadInt64() % 0xff; // magic if (version != Model.Version && version != Model.LastVersionWithout8DSupport && version != Model.LastVersionWithoutWeightsAlignmentSupport) throw new NotSupportedException($"Format version not supported: {version}"); var count = file.ReadInt32(); model.inputs = new List(count); for (var i = 0; i < count; ++i) { model.inputs.Add(new Model.Input {name = ReadString(file), shape = ReadInt32Array(file)}); if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } } model.outputs = ReadStringArray(file).ToList(); count = file.ReadInt32(); model.memories = new List(count); for (var m = 0; m < count; ++m) { model.memories.Add(new Model.Memory { shape = new TensorShape(ReadInt32Array(file)), input = ReadString(file), output = ReadString(file) }); if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } } int numberOfLayers = file.ReadInt32(); for (var l = 0; l < numberOfLayers; ++l) { var name = ReadString(file); var layerType = (Layer.Type)file.ReadInt32(); var activation = (Layer.Activation)file.ReadInt32(); Layer layer = new Layer(name, layerType, activation); ReadInt32Array(file); // dummy ReadInt32Array(file); // dummy layer.pad = ReadInt32Array(file); layer.stride = ReadInt32Array(file); layer.pool = ReadInt32Array(file); layer.axis = ConvertLayerAxisFor8DShapeSupportIfNeeded(file.ReadInt32(), version, layerType); layer.alpha = file.ReadSingle(); layer.beta = file.ReadSingle(); ReadInt32Array(file); // dummy layer.inputs = ReadStringArray(file); if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } layer.datasets = new Layer.DataSet[file.ReadInt32()]; for (var i = 0; i < layer.datasets.Length; ++i) { if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } layer.datasets[i].name = ReadString(file); layer.datasets[i].shape = new TensorShape(ReadInt32Array(file)); layer.datasets[i].offset = file.ReadInt64(); layer.datasets[i].itemSizeInBytes = file.ReadInt32(); layer.datasets[i].length = file.ReadInt32(); } layers.Add(layer); if (verbose) D.Log( $"layer {l}, {layer.name} type: {layer.type} " + $"{((layer.activation != Layer.Activation.None) ? $"activation {layer.activation} " : "")}" + $"tensors: {layer.datasets.Length} inputs: {String.Join(",", layer.inputs)}"); if (verbose) foreach (var t in layer.datasets) D.Log($" Tensor: {t.shape} offset: {t.offset} len: {t.length}"); if (applyPatching) PatchLayer(layers, layer); if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart + ": " + l); #endif yield return null; timeStart = Time.realtimeSinceStartup; } } model.layers = layers; Int64 numWeightsToRead = 0; for (var l = 0; l < model.layers.Count; ++l) { for (var d = 0; d < model.layers[l].datasets.Length; ++d) { numWeightsToRead += model.layers[l].datasets[d].length; if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } } } Profiler.EndSample(); DataType weightsDataType = DataType.Float; if (version >= 20) { //Version 20 introduce weights type but full model need to be in the same type. Per layer no supported yet. weightsDataType = (DataType)file.ReadInt32(); } if (version >= 19) { //Padding so weights are aligned on Model.WeightsAlignment bytes long streamCurrentPosition = file.BaseStream.Position; long paddingForAlignment = Model.WeightsAlignment - (streamCurrentPosition % Model.WeightsAlignment); file.BaseStream.Seek(paddingForAlignment, SeekOrigin.Current); } if (skipWeights) SkipLargeByteArray(file, numWeightsToRead * BarracudaArray.DataItemSize(weightsDataType)); else { if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } var sharedWeightsArray = ReadLargeWeightArray(file, numWeightsToRead, weightsDataType); Assert.AreEqual(weightsDataType, sharedWeightsArray.Type); for (var l = 0; l < model.layers.Count; ++l) { model.layers[l].weights = sharedWeightsArray; if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield) { #if DEBUG_TIMING UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart); #endif yield return null; timeStart = Time.realtimeSinceStartup; } } } // Importer Reporting try { model.IrSource = ReadString(file); model.IrVersion = ReadString(file); model.ProducerName = ReadString(file); int numWarnings = file.ReadInt32(); for (var i = 0; i < numWarnings; ++i) { model.Warnings.Add(new Model.ImporterWarning(ReadString(file), ReadString(file))); } if (version >= 18) { int numMetadataProps = file.ReadInt32(); for (var i = 0; i < numMetadataProps; ++i) { model.Metadata.Add(ReadString(file), ReadString(file)); } } } catch (EndOfStreamException) { //Do nothing Importer Reporting data might not be present for backward compatibility reasons } yield return model; } } private static void PatchLayer(List layers, Layer layer) { // Split Load so that each constant tensor gets its own layer // for the sake of simplicity of the execution code if (layer.type == Layer.Type.Load && layer.datasets.Length > 1) { foreach (var t in layer.datasets) { Layer layerC = new Layer(t.name, Layer.Type.Load); // load using tensor name layerC.inputs = layer.inputs; layerC.datasets = new[] { t }; layers.Add(layerC); } // patch original layer layer.name = layer.name + "_nop"; layer.type = Layer.Type.Nop; layer.datasets = new Layer.DataSet[] {}; } // Split activation part into separate layer when activation fusing is not supported. // NOTE: Keras specific. Only Keras exporter packs both Dense/Conv and Activation into the same layer. // @TODO: move layer split directly into Keras exporter if (layer.type != Layer.Type.Activation && layer.activation != Layer.Activation.None && (!ModelOptimizer.IsLayerSupportingActivationFusing(layer.type) || !ModelOptimizer.IsActivationFusable(layer.activation))) { var affineOutput = layer.name + "_tmp"; Layer layerA = new Layer(layer.name, layer.activation);// take the original layer name layerA.inputs = new[] { affineOutput }; // patch original layer layer.name = affineOutput; layer.activation = Layer.Activation.None; Assert.AreEqual(layers[layers.Count-1].name, layer.name); Assert.AreEqual(layers[layers.Count-1].activation, layer.activation); layers.Add(layerA); } // @TODO: Enable Dropout // @TEMP: disabled runtime Dropout noise to get more predictable results for auto testing if (layer.type == Layer.Type.Dropout) { layer.type = Layer.Type.Activation; layer.activation = Layer.Activation.None; } } private static void SkipLargeByteArray(BinaryReader file, Int64 count) { file.BaseStream.Seek(count, SeekOrigin.Current); } private static BarracudaArray ReadLargeWeightArray(BinaryReader file, Int64 count, DataType dataType) { int bytesToRead; Int64 bytesToReadInt64 = count * BarracudaArray.DataItemSize(dataType); try { bytesToRead = Convert.ToInt32(bytesToReadInt64); // throws OverflowException } catch (OverflowException) { throw new OverflowException($"Files larger than 2GB currently are not supported. Attempt to read {bytesToReadInt64} bytes."); } //1-Try to remap byte[] stream to avoid allocation Profiler.BeginSample("Barracuda.RemapWeights"); BarracudaArray remappedWeights = null; try { Stream stream = file.BaseStream; var memoryStream = stream as MemoryStream; var sourceBuffer = memoryStream?.GetBuffer(); int currentPosition = (int)memoryStream?.Position; remappedWeights = new BarracudaArrayFromManagedArray(sourceBuffer, currentPosition, dataType, (int) count); } #if UNITY_EDITOR catch (InvalidOperationException e) { UnityEngine.Debug.Log("ModelLoader: Can't remap memory stream to underlying data type, allocation and copy will occurs. Exception: " + e); } #else catch (InvalidOperationException) {} #endif if (remappedWeights != null) { //We remapped memory. Need to advance stream position to be consistent with read behavior. file.BaseStream.Position += bytesToRead; Profiler.EndSample(); return remappedWeights; } Profiler.EndSample(); //2-Can't remap will copy from managed memory to native Profiler.BeginSample("Barracuda.AllocWeights"); BarracudaArray loadedWeights = new BarracudaArray((int)count, dataType); Profiler.EndSample(); Profiler.BeginSample("Barracuda.LoadWeights"); try { var readBuffer = new byte[4096]; // 4Kb is close to optimal read size. // See for measurements: https://www.jacksondunstan.com/articles/3568 // Read size vs relative read-time: // 64b: x10, 128b: x6, 256b: x4, 1Kb: x3, 4Kb: x3 int writeOffset = 0; while (writeOffset < bytesToRead) { var bytesLeftToRead = bytesToRead - writeOffset; var readSizeInBytes = Math.Min(readBuffer.Length, bytesLeftToRead); Assert.IsTrue(readSizeInBytes > 0); Assert.IsTrue(readSizeInBytes <= readBuffer.Length); readSizeInBytes = file.BaseStream.Read(readBuffer, offset:0, count:readSizeInBytes); if (readSizeInBytes == 0) throw new IOException($"Unexpected EOF reached. Read {writeOffset / sizeof(float)} out of expected {count} floats before reaching end of file."); BarracudaArray.BlockCopy( sourceArray:readBuffer, sourceByteOffset:0, destinationArray:loadedWeights, destinationByteOffset:writeOffset, lengthInBytes:readSizeInBytes); writeOffset += readSizeInBytes; } Assert.AreEqual(writeOffset, bytesToRead); } finally { Profiler.EndSample(); } return loadedWeights; } private static Int32[] ReadInt32Array(BinaryReader file) { var arr = new Int32[file.ReadInt32()]; byte[] bytes = file.ReadBytes(Convert.ToInt32(arr.Length * sizeof(Int32))); Buffer.BlockCopy(bytes, 0, arr, 0, bytes.Length); return arr; } private static string ReadString(BinaryReader file) { var chars = file.ReadChars(file.ReadInt32()); return new string(chars); } private static string[] ReadStringArray(BinaryReader file) { var arr = new string[file.ReadInt32()]; for (var i = 0; i < arr.Length; ++i) arr[i] = ReadString(file); return arr; } private static BinaryReader Open(string filename) { return new BinaryReader(new FileStream(filename, FileMode.Open, FileAccess.Read)); } private static BinaryReader Open(byte[] bytes) { return new BinaryReader(new MemoryStream(bytes, 0, bytes.Length, false, true)); } #endregion } } // namespace Unity.Barracuda