Files
unity-application/Packages/com.unity.barracuda/Runtime/Core/ModelLoader.cs
2023-03-18 19:53:17 +00:00

607 lines
26 KiB
C#

// #define DEBUG_TIMING
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using UnityEngine;
using UnityEngine.Assertions;
using UnityEngine.Profiling;
[assembly: InternalsVisibleTo("Unity.Barracuda.Tests")]
namespace Unity.Barracuda {
/// <summary>
/// Barracuda `Model` loader
/// </summary>
public static class ModelLoader
{
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a binary representation of type `NNModel`.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="model">model</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <returns>loaded Model</returns>
public static Model Load(NNModel model, bool verbose = false, bool skipWeights = false)
{
return Load(model.modelData.Value, verbose, skipWeights);
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a binary representation of type `NNModel`.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="nnModel">binary representation of model</param>
/// <param name="model">object-oriented representation of model (must initialize before calling method)</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <param name="maxTimePerYield">the maximum amount of time to spend between in computation before yielding</param>
/// <returns>IEnumerator (use with StartCoroutine)</returns>
public static IEnumerator LoadAsync(NNModel nnModel, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f)
{
Assert.IsNotNull(model);
var enumerator = LoadAsync(Open(nnModel.modelData.Value), model, verbose, true, skipWeights, maxTimePerYield);
while (enumerator.MoveNext())
{
model = (Model)enumerator.Current;
if (model != null)
yield return null;
}
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file from the the streaming asset folder.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="filename">file name</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <returns>loaded Model</returns>
public static Model LoadFromStreamingAssets(string filename, bool verbose = false, bool skipWeights = false)
{
return Load(Path.Combine(Application.streamingAssetsPath, filename), verbose, skipWeights);
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file from the the streaming asset folder.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="filename">file name</param>
/// <param name="model">object-oriented representation of model (must initialize before calling method)</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <param name="maxTimePerYield">the maximum amount of time to spend between in computation before yielding</param>
/// <returns>IEnumerator (use with StartCoroutine)</returns>
public static IEnumerator LoadAsyncFromStreamingAssets(string filename, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f)
{
Assert.IsNotNull(model);
var enumerator = LoadAsync(Open(Path.Combine(Application.streamingAssetsPath, filename)), model, verbose, true, skipWeights, maxTimePerYield);
do
{
model = (Model)enumerator.Current;
if (model != null)
yield return null;
} while (enumerator.MoveNext());
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="filepath">file name</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <returns>loaded Model</returns>
public static Model Load(string filepath, bool verbose = false, bool skipWeights = false)
{
return Load(Open(filepath), verbose, true, skipWeights);
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a `.bc` file.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="filepath">file name</param>
/// <param name="model">object-oriented representation of model (must initialize before calling method)</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <param name="maxTimePerYield">the maximum amount of time to spend between in computation before yielding</param>
/// <returns>IEnumerator (use with StartCoroutine)</returns>
public static IEnumerator LoadAsync(string filepath, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f)
{
Assert.IsNotNull(model);
var enumerator = LoadAsync(Open(filepath), model, verbose, true, skipWeights, maxTimePerYield);
while (enumerator.MoveNext())
{
model = (Model)enumerator.Current;
if (model != null)
yield return null;
}
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a byte[] array.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="stream">binary representation of model as a byte array</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <returns>loaded Model</returns>
public static Model Load(byte[] stream, bool verbose = false, bool skipWeights = false)
{
return Load(Open(stream), verbose, true, skipWeights);
}
/// <summary>
/// Return an object oriented representation (aka: `Model`) of a neural network from a byte[] array.
/// By default details are not logged to the console, set `verbose` to true to see loading details.
/// </summary>
/// <param name="stream">binary representation of model as a byte array</param>
/// <param name="model">object-oriented representation of model (must initialize before calling method)</param>
/// <param name="verbose">verbose</param>
/// <param name="skipWeights">skip loading weights (fast loading, metadata only)</param>
/// <param name="maxTimePerYield">the maximum amount of time to spend between in computation before yielding</param>
/// <returns>IEnumerator (use with StartCoroutine)</returns>
public static IEnumerator LoadAsync(byte[] stream, Model model, bool verbose = false, bool skipWeights = false, float maxTimePerYield = 0.01f)
{
Assert.IsNotNull(model);
var enumerator = LoadAsync(Open(stream), model, verbose, true, skipWeights, maxTimePerYield);
while (enumerator.MoveNext())
{
model = (Model)enumerator.Current;
if (model != null)
yield return null;
}
}
#region Private and internal
internal static Model Load(byte[] stream, bool verbose = true, bool applyPatching = true, bool skipWeights = false)
{
return Load(Open(stream), verbose, applyPatching, skipWeights);
}
private static int ConvertLayerAxisFor8DShapeSupportIfNeeded(int axis, long version, Layer.Type layerType)
{
if (version > Model.LastVersionWithout8DSupport)
return axis;
//Prior to version 17, 8D tensors were not supported thus axis was expressed in NCHW format for Gather, Concat and Reduce layers.
if (layerType == Layer.Type.ReduceL2 ||
layerType == Layer.Type.ReduceLogSum ||
layerType == Layer.Type.ReduceLogSumExp ||
layerType == Layer.Type.ReduceMax ||
layerType == Layer.Type.ReduceMean ||
layerType == Layer.Type.ReduceMin ||
layerType == Layer.Type.ReduceProd ||
layerType == Layer.Type.ReduceSum ||
layerType == Layer.Type.ReduceSumSquare ||
layerType == Layer.Type.Gather ||
layerType == Layer.Type.Concat)
axis = TensorExtensions.Convert4DTo8DAxis(axis);
return axis;
}
static Model Load(BinaryReader fileReader, bool verbose = true, bool applyPatching = true, bool skipWeights = false)
{
Model model = null;
var enumerator = LoadAsync(fileReader, null, verbose, applyPatching, skipWeights);
while (enumerator.MoveNext())
{
model = (Model)enumerator.Current;
if (model != null)
break;
}
return model;
}
static IEnumerator LoadAsync(BinaryReader fileReader, Model model, bool verbose = true, bool applyPatching = true, bool skipWeights = false, float maxTimePerYield = 0f)
{
using (BinaryReader file = fileReader)
{
Profiler.BeginSample("Barracuda.LoadLayers");
float timeStart = Time.realtimeSinceStartup;
if (model == null)
model = new Model();
List<Layer> layers = new List<Layer>();
long version = file.ReadInt64() % 0xff; // magic
if (version != Model.Version && version != Model.LastVersionWithout8DSupport && version != Model.LastVersionWithoutWeightsAlignmentSupport)
throw new NotSupportedException($"Format version not supported: {version}");
var count = file.ReadInt32();
model.inputs = new List<Model.Input>(count);
for (var i = 0; i < count; ++i)
{
model.inputs.Add(new Model.Input {name = ReadString(file), shape = ReadInt32Array(file)});
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
}
model.outputs = ReadStringArray(file).ToList();
count = file.ReadInt32();
model.memories = new List<Model.Memory>(count);
for (var m = 0; m < count; ++m)
{
model.memories.Add(new Model.Memory
{
shape = new TensorShape(ReadInt32Array(file)),
input = ReadString(file),
output = ReadString(file)
});
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
}
int numberOfLayers = file.ReadInt32();
for (var l = 0; l < numberOfLayers; ++l)
{
var name = ReadString(file);
var layerType = (Layer.Type)file.ReadInt32();
var activation = (Layer.Activation)file.ReadInt32();
Layer layer = new Layer(name, layerType, activation);
ReadInt32Array(file); // dummy
ReadInt32Array(file); // dummy
layer.pad = ReadInt32Array(file);
layer.stride = ReadInt32Array(file);
layer.pool = ReadInt32Array(file);
layer.axis = ConvertLayerAxisFor8DShapeSupportIfNeeded(file.ReadInt32(), version, layerType);
layer.alpha = file.ReadSingle();
layer.beta = file.ReadSingle();
ReadInt32Array(file); // dummy
layer.inputs = ReadStringArray(file);
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
layer.datasets = new Layer.DataSet[file.ReadInt32()];
for (var i = 0; i < layer.datasets.Length; ++i)
{
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
layer.datasets[i].name = ReadString(file);
layer.datasets[i].shape = new TensorShape(ReadInt32Array(file));
layer.datasets[i].offset = file.ReadInt64();
layer.datasets[i].itemSizeInBytes = file.ReadInt32();
layer.datasets[i].length = file.ReadInt32();
}
layers.Add(layer);
if (verbose)
D.Log(
$"layer {l}, {layer.name} type: {layer.type} " +
$"{((layer.activation != Layer.Activation.None) ? $"activation {layer.activation} " : "")}" +
$"tensors: {layer.datasets.Length} inputs: {String.Join(",", layer.inputs)}");
if (verbose)
foreach (var t in layer.datasets)
D.Log($" Tensor: {t.shape} offset: {t.offset} len: {t.length}");
if (applyPatching)
PatchLayer(layers, layer);
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart + ": " + l);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
}
model.layers = layers;
Int64 numWeightsToRead = 0;
for (var l = 0; l < model.layers.Count; ++l)
{
for (var d = 0; d < model.layers[l].datasets.Length; ++d)
{
numWeightsToRead += model.layers[l].datasets[d].length;
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
}
}
Profiler.EndSample();
DataType weightsDataType = DataType.Float;
if (version >= 20)
{
//Version 20 introduce weights type but full model need to be in the same type. Per layer no supported yet.
weightsDataType = (DataType)file.ReadInt32();
}
if (version >= 19)
{
//Padding so weights are aligned on Model.WeightsAlignment bytes
long streamCurrentPosition = file.BaseStream.Position;
long paddingForAlignment = Model.WeightsAlignment - (streamCurrentPosition % Model.WeightsAlignment);
file.BaseStream.Seek(paddingForAlignment, SeekOrigin.Current);
}
if (skipWeights)
SkipLargeByteArray(file, numWeightsToRead * BarracudaArray.DataItemSize(weightsDataType));
else
{
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
var sharedWeightsArray = ReadLargeWeightArray(file, numWeightsToRead, weightsDataType);
Assert.AreEqual(weightsDataType, sharedWeightsArray.Type);
for (var l = 0; l < model.layers.Count; ++l)
{
model.layers[l].weights = sharedWeightsArray;
if (maxTimePerYield > 0 && Time.realtimeSinceStartup - timeStart > maxTimePerYield)
{
#if DEBUG_TIMING
UnityEngine.Debug.Log(Time.realtimeSinceStartup - timeStart);
#endif
yield return null;
timeStart = Time.realtimeSinceStartup;
}
}
}
// Importer Reporting
try
{
model.IrSource = ReadString(file);
model.IrVersion = ReadString(file);
model.ProducerName = ReadString(file);
int numWarnings = file.ReadInt32();
for (var i = 0; i < numWarnings; ++i)
{
model.Warnings.Add(new Model.ImporterWarning(ReadString(file), ReadString(file)));
}
if (version >= 18)
{
int numMetadataProps = file.ReadInt32();
for (var i = 0; i < numMetadataProps; ++i)
{
model.Metadata.Add(ReadString(file), ReadString(file));
}
}
}
catch (EndOfStreamException)
{
//Do nothing Importer Reporting data might not be present for backward compatibility reasons
}
yield return model;
}
}
private static void PatchLayer(List<Layer> layers, Layer layer)
{
// Split Load so that each constant tensor gets its own layer
// for the sake of simplicity of the execution code
if (layer.type == Layer.Type.Load &&
layer.datasets.Length > 1)
{
foreach (var t in layer.datasets)
{
Layer layerC = new Layer(t.name, Layer.Type.Load); // load using tensor name
layerC.inputs = layer.inputs;
layerC.datasets = new[] { t };
layers.Add(layerC);
}
// patch original layer
layer.name = layer.name + "_nop";
layer.type = Layer.Type.Nop;
layer.datasets = new Layer.DataSet[] {};
}
// Split activation part into separate layer when activation fusing is not supported.
// NOTE: Keras specific. Only Keras exporter packs both Dense/Conv and Activation into the same layer.
// @TODO: move layer split directly into Keras exporter
if (layer.type != Layer.Type.Activation &&
layer.activation != Layer.Activation.None &&
(!ModelOptimizer.IsLayerSupportingActivationFusing(layer.type) || !ModelOptimizer.IsActivationFusable(layer.activation)))
{
var affineOutput = layer.name + "_tmp";
Layer layerA = new Layer(layer.name, layer.activation);// take the original layer name
layerA.inputs = new[] { affineOutput };
// patch original layer
layer.name = affineOutput;
layer.activation = Layer.Activation.None;
Assert.AreEqual(layers[layers.Count-1].name, layer.name);
Assert.AreEqual(layers[layers.Count-1].activation, layer.activation);
layers.Add(layerA);
}
// @TODO: Enable Dropout
// @TEMP: disabled runtime Dropout noise to get more predictable results for auto testing
if (layer.type == Layer.Type.Dropout)
{
layer.type = Layer.Type.Activation;
layer.activation = Layer.Activation.None;
}
}
private static void SkipLargeByteArray(BinaryReader file, Int64 count)
{
file.BaseStream.Seek(count, SeekOrigin.Current);
}
private static BarracudaArray ReadLargeWeightArray(BinaryReader file, Int64 count, DataType dataType)
{
int bytesToRead;
Int64 bytesToReadInt64 = count * BarracudaArray.DataItemSize(dataType);
try
{
bytesToRead = Convert.ToInt32(bytesToReadInt64); // throws OverflowException
}
catch (OverflowException)
{
throw new OverflowException($"Files larger than 2GB currently are not supported. Attempt to read {bytesToReadInt64} bytes.");
}
//1-Try to remap byte[] stream to avoid allocation
Profiler.BeginSample("Barracuda.RemapWeights");
BarracudaArray remappedWeights = null;
try
{
Stream stream = file.BaseStream;
var memoryStream = stream as MemoryStream;
var sourceBuffer = memoryStream?.GetBuffer();
int currentPosition = (int)memoryStream?.Position;
remappedWeights = new BarracudaArrayFromManagedArray(sourceBuffer, currentPosition, dataType, (int) count);
}
#if UNITY_EDITOR
catch (InvalidOperationException e)
{
UnityEngine.Debug.Log("ModelLoader: Can't remap memory stream to underlying data type, allocation and copy will occurs. Exception: " + e);
}
#else
catch (InvalidOperationException) {}
#endif
if (remappedWeights != null)
{
//We remapped memory. Need to advance stream position to be consistent with read behavior.
file.BaseStream.Position += bytesToRead;
Profiler.EndSample();
return remappedWeights;
}
Profiler.EndSample();
//2-Can't remap will copy from managed memory to native
Profiler.BeginSample("Barracuda.AllocWeights");
BarracudaArray loadedWeights = new BarracudaArray((int)count, dataType);
Profiler.EndSample();
Profiler.BeginSample("Barracuda.LoadWeights");
try
{
var readBuffer = new byte[4096]; // 4Kb is close to optimal read size.
// See for measurements: https://www.jacksondunstan.com/articles/3568
// Read size vs relative read-time:
// 64b: x10, 128b: x6, 256b: x4, 1Kb: x3, 4Kb: x3
int writeOffset = 0;
while (writeOffset < bytesToRead)
{
var bytesLeftToRead = bytesToRead - writeOffset;
var readSizeInBytes = Math.Min(readBuffer.Length, bytesLeftToRead);
Assert.IsTrue(readSizeInBytes > 0);
Assert.IsTrue(readSizeInBytes <= readBuffer.Length);
readSizeInBytes = file.BaseStream.Read(readBuffer, offset:0, count:readSizeInBytes);
if (readSizeInBytes == 0)
throw new IOException($"Unexpected EOF reached. Read {writeOffset / sizeof(float)} out of expected {count} floats before reaching end of file.");
BarracudaArray.BlockCopy(
sourceArray:readBuffer, sourceByteOffset:0,
destinationArray:loadedWeights, destinationByteOffset:writeOffset,
lengthInBytes:readSizeInBytes);
writeOffset += readSizeInBytes;
}
Assert.AreEqual(writeOffset, bytesToRead);
}
finally
{
Profiler.EndSample();
}
return loadedWeights;
}
private static Int32[] ReadInt32Array(BinaryReader file)
{
var arr = new Int32[file.ReadInt32()];
byte[] bytes = file.ReadBytes(Convert.ToInt32(arr.Length * sizeof(Int32)));
Buffer.BlockCopy(bytes, 0, arr, 0, bytes.Length);
return arr;
}
private static string ReadString(BinaryReader file)
{
var chars = file.ReadChars(file.ReadInt32());
return new string(chars);
}
private static string[] ReadStringArray(BinaryReader file)
{
var arr = new string[file.ReadInt32()];
for (var i = 0; i < arr.Length; ++i)
arr[i] = ReadString(file);
return arr;
}
private static BinaryReader Open(string filename)
{
return new BinaryReader(new FileStream(filename, FileMode.Open, FileAccess.Read));
}
private static BinaryReader Open(byte[] bytes)
{
return new BinaryReader(new MemoryStream(bytes, 0, bytes.Length, false, true));
}
#endregion
}
} // namespace Unity.Barracuda