1188 lines
50 KiB
C#
1188 lines
50 KiB
C#
// This is auto-generated -- do not modify directly
|
|
using UnityEngine;
|
|
using System;
|
|
using Unity.Burst;
|
|
using Unity.Burst.Intrinsics;
|
|
using Unity.Collections;
|
|
using Unity.Jobs;
|
|
using Unity.Mathematics;
|
|
using static Unity.Burst.Intrinsics.X86.Avx;
|
|
using static Unity.Burst.Intrinsics.X86.Fma;
|
|
using Unity.Collections.LowLevel.Unsafe;
|
|
using Unity.Jobs.LowLevel.Unsafe;
|
|
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
|
|
|
|
namespace Unity.Barracuda {
|
|
public partial class BurstCPUOps
|
|
{
|
|
#region Other jobs declaration for mode: _Full_Float
|
|
|
|
internal partial struct CopyJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new CopyJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new CopyJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct CopyJob_Full_Float : IJob, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public CopyJobHelper data;
|
|
|
|
public void Execute()
|
|
{
|
|
UnsafeUtility.MemCpy(destination: Optr, source: Xptr, size: data.length * sizeof(float));
|
|
}
|
|
}
|
|
|
|
internal partial struct CopyStrideJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, int offsetX, BurstTensorData pinO, int offsetY, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new CopyStrideJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, offsetX, pinO, offsetY, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new CopyStrideJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, offsetX, pinO, offsetY, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct CopyStrideJob_Full_Float : IJob, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public CopyStrideJobHelper data;
|
|
|
|
public void Execute()
|
|
{
|
|
UnsafeUtility.MemCpyStride(destination: Optr, destinationStride: data.OStride * sizeof(float),
|
|
source: Xptr, sourceStride: data.XStride * sizeof(float),
|
|
elementSize: data.length * sizeof(float), count: data.count);
|
|
}
|
|
}
|
|
|
|
internal partial struct GenericSliceJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new GenericSliceJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new GenericSliceJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GenericSliceJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public GenericSliceJobHelper data;
|
|
|
|
public void Execute(int threadIndex)
|
|
{
|
|
int indexO = threadIndex * data.shapeO.channels;
|
|
int s = 0, r = 0, n = 0, t = 0;
|
|
int d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(indexO, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
s = data.startS + s * data.strideS;
|
|
r = data.startR + r * data.strideR;
|
|
n = data.startN + n * data.strideN;
|
|
t = data.startT + t * data.strideT;
|
|
d = data.startD + d * data.strideD;
|
|
h = data.startH + h * data.strideH;
|
|
w = data.startW + w * data.strideW;
|
|
c = data.startC + c * data.strideC;
|
|
int indexX = data.shapeX.Index(s, r, n, t, d, h, w, c);
|
|
UnsafeUtility.MemCpy(destination: Optr+indexO, source: Xptr+indexX, size: data.shapeO.channels * sizeof(float));
|
|
}
|
|
}
|
|
|
|
internal partial struct GenericStridedSliceJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new GenericStridedSliceJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new GenericStridedSliceJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GenericStridedSliceJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public GenericStridedSliceJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0;
|
|
int d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
s = data.startS + s * data.strideS;
|
|
r = data.startR + r * data.strideR;
|
|
n = data.startN + n * data.strideN;
|
|
t = data.startT + t * data.strideT;
|
|
d = data.startD + d * data.strideD;
|
|
h = data.startH + h * data.strideH;
|
|
w = data.startW + w * data.strideW;
|
|
c = data.startC + c * data.strideC;
|
|
Optr[i] = (float)(Xptr[data.shapeX.Index(s, r, n, t, d, h, w, c)]);
|
|
}
|
|
}
|
|
|
|
internal partial struct Border2DJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new Border2DJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new Border2DJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Border2DJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public Border2DJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
float v;
|
|
if (readX < 0 || readX >= data.CroppedWidth ||
|
|
readY < 0 || readY >= data.CroppedHeight ||
|
|
readC < 0 || readC >= data.CroppedChannels)
|
|
{
|
|
v = data.Beta;
|
|
}
|
|
else
|
|
{
|
|
v = Xptr[data.shapeX.Index(n, readY, readX, readC)];
|
|
}
|
|
|
|
Optr[i] = (float)(v);
|
|
}
|
|
}
|
|
|
|
internal partial struct TransposeJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new TransposeJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new TransposeJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct TransposeJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public TransposeJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeX.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
int* index = stackalloc int[8];
|
|
index[0] = s; index[1] = r; index[2] = n; index[3] = t; index[4] = d; index[5] = h; index[6] = w; index[7] = c;
|
|
|
|
int indexO = data.shapeO.Index(index[data.permutations[0]],
|
|
index[data.permutations[1]],
|
|
index[data.permutations[2]],
|
|
index[data.permutations[3]],
|
|
index[data.permutations[4]],
|
|
index[data.permutations[5]],
|
|
index[data.permutations[6]],
|
|
index[data.permutations[7]]);
|
|
Optr[indexO] = (float)(Xptr[i]);
|
|
}
|
|
}
|
|
|
|
internal partial struct Pad2DEdgeJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new Pad2DEdgeJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new Pad2DEdgeJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DEdgeJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public Pad2DEdgeJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = (float)(Xptr[data.shapeX.Index(n, readY, readX, readC)]);
|
|
}
|
|
}
|
|
|
|
internal partial struct Pad2DReflectJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new Pad2DReflectJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new Pad2DReflectJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DReflectJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public Pad2DReflectJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
int lastXIndex = data.shapeX.width - 1;
|
|
int lastYIndex = data.shapeX.height - 1;
|
|
int lastCIndex = data.shapeX.channels - 1;
|
|
|
|
//x reflect indexing
|
|
if (readX < 0)
|
|
readX = -readX;
|
|
else if (readX > lastXIndex)
|
|
readX = lastXIndex - (readX - lastXIndex);
|
|
|
|
//y reflect indexing
|
|
if (readY < 0)
|
|
readY = -readY;
|
|
else if (readY > lastYIndex)
|
|
readY = lastYIndex - (readY - lastYIndex);
|
|
|
|
//c reflect indexing
|
|
if (readC < 0)
|
|
readC = -readC;
|
|
else if (readC > lastCIndex)
|
|
readC = lastCIndex - (readC - lastCIndex);
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = Xptr[data.shapeX.Index(n, readY, readX, readC)];
|
|
}
|
|
}
|
|
|
|
internal partial struct Pad2DSymmetricJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new Pad2DSymmetricJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new Pad2DSymmetricJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DSymmetricJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public Pad2DSymmetricJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
int lastXIndex = data.shapeX.width - 1;
|
|
int lastYIndex = data.shapeX.height - 1;
|
|
int lastCIndex = data.shapeX.channels - 1;
|
|
|
|
//x symmetric indexing
|
|
if (readX < 0)
|
|
readX = -readX - 1;
|
|
else if (readX > lastXIndex)
|
|
readX = lastXIndex - (readX - lastXIndex) + 1;
|
|
|
|
//y symmetric indexing
|
|
if (readY < 0)
|
|
readY = -readY - 1;
|
|
else if (readY > lastYIndex)
|
|
readY = lastYIndex - (readY - lastYIndex) + 1;
|
|
|
|
//c symmetric indexing
|
|
if (readC < 0)
|
|
readC = -readC - 1;
|
|
else if (readC > lastCIndex)
|
|
readC = lastCIndex - (readC - lastCIndex) + 1;
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = (float)(Xptr[data.shapeX.Index(n, readY, readX, readC)]);
|
|
}
|
|
}
|
|
|
|
internal partial struct TileJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new TileJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new TileJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct TileJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public TileJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
s = s % data.shapeX[0];
|
|
r = r % data.shapeX[1];
|
|
n = n % data.shapeX[2];
|
|
t = t % data.shapeX[3];
|
|
d = d % data.shapeX[4];
|
|
h = h % data.shapeX[5];
|
|
w = w % data.shapeX[6];
|
|
c = c % data.shapeX[7];
|
|
|
|
float x = Xptr[data.shapeX.Index(s, r, n, t, d, h, w, c)];
|
|
Optr[i] = (float)(x);
|
|
}
|
|
}
|
|
|
|
internal partial struct GatherJobHelper
|
|
{
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new GatherJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new GatherJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GatherJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public GatherJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
int d0 = (data.axis == 0) ? (int) Bptr[s] : s;
|
|
int d1 = (data.axis == 1) ? (int) Bptr[r] : r;
|
|
int d2 = (data.axis == 2) ? (int) Bptr[n] : n;
|
|
int d3 = (data.axis == 3) ? (int) Bptr[t] : t;
|
|
int d4 = (data.axis == 4) ? (int) Bptr[d] : d;
|
|
int d5 = (data.axis == 5) ? (int) Bptr[h] : h;
|
|
int d6 = (data.axis == 6) ? (int) Bptr[w] : w;
|
|
int d7 = (data.axis == 7) ? (int) Bptr[c] : c;
|
|
|
|
Optr[i] = (float)(Xptr[data.shapeX.Index(d0, d1, d2, d3, d4, d5, d6, d7)]);
|
|
}
|
|
}
|
|
|
|
internal partial struct OneHotJobHelper
|
|
{
|
|
public JobHandle ScheduleXO(Tensor X, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXO(BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new OneHotJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new OneHotJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXO(pinX, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct OneHotJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public OneHotJobHelper data;
|
|
|
|
public void Execute(int idx)
|
|
{
|
|
// rank1: X = n,_,_,_
|
|
// rank2: X = n,_,_,c
|
|
// rank3: X = n,_,w,c
|
|
|
|
if (data.inputRank == 1) // TensorShape(X.flatHeight, depth)
|
|
{
|
|
int j = idx % data.depth;
|
|
int n = (idx / data.depth) % data.shapeX.flatHeight;
|
|
|
|
int index = (int)Xptr[n];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (float)(v);
|
|
}
|
|
else if (data.inputRank == 2) // TensorShape(X.flatHeight, 1, depth, X.channels));
|
|
{
|
|
int i = idx % data.shapeX.channels;
|
|
int j = (idx / data.shapeX.channels) % data.depth;
|
|
int n = ((idx / data.shapeX.channels) / data.depth) % data.shapeX.flatHeight;
|
|
|
|
int index = (int)Xptr[data.shapeX.Index(n, i)];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (float)(v);
|
|
}
|
|
else // TensorShape(X.batch, X.width, depth, X.channels))
|
|
{
|
|
int i = idx % data.shapeX.channels;
|
|
int j = (idx / data.shapeX.channels) % data.depth;
|
|
int k = ((idx / data.shapeX.channels) / data.depth) % data.shapeX.width;
|
|
int n = (((idx / data.shapeX.channels) / data.depth) / data.shapeX.width) % data.shapeX.batch;
|
|
|
|
int index = (int)Xptr[data.shapeX.Index(n, 0, k, i)];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (float)(v);
|
|
}
|
|
}
|
|
}
|
|
|
|
internal partial struct RandomNormalJobHelper
|
|
{
|
|
public JobHandle ScheduleO(BurstTensorData pinO, int offset, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
if (OHalf)
|
|
{
|
|
var job = new RandomNormalJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleO(pinO, offset, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new RandomNormalJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleO(pinO, offset, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct RandomNormalJob_Full_Float : IJobParallelFor, IJobResourceDeclarationO
|
|
{
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public RandomNormalJobHelper data;
|
|
|
|
float Gaussian(float mean, float stdDev)
|
|
{
|
|
float u, v, s;
|
|
do {
|
|
u = data.rng.NextFloat() * 2 - 1;
|
|
v = data.rng.NextFloat() * 2 - 1;
|
|
s = u * u + v * v;
|
|
} while (s >= 1 || s == 0);
|
|
float mul = Mathf.Sqrt(-2.0f * Mathf.Log(s) / s);
|
|
return mean + stdDev * u * mul;
|
|
}
|
|
|
|
public void Execute(int i)
|
|
{
|
|
Optr[i] = (float)(Gaussian(data.mean, data.scale));
|
|
}
|
|
}
|
|
|
|
internal partial struct RandomUniformJobHelper
|
|
{
|
|
public JobHandle ScheduleO(BurstTensorData pinO, int offset, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
if (OHalf)
|
|
{
|
|
var job = new RandomUniformJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleO(pinO, offset, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else
|
|
{
|
|
var job = new RandomUniformJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleO(pinO, offset, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct RandomUniformJob_Full_Float : IJobParallelFor, IJobResourceDeclarationO
|
|
{
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public RandomUniformJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = data.mean + data.scale * data.rng.NextFloat();
|
|
Optr[i] = (float)(v);
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
#region Other jobs declaration for mode: _ActAsFloat_WeightAsHalf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endregion
|
|
#region Other jobs declaration for mode: _Full_Half
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct CopyJob_Full_Half : IJob, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public CopyJobHelper data;
|
|
|
|
public void Execute()
|
|
{
|
|
UnsafeUtility.MemCpy(destination: Optr, source: Xptr, size: data.length * sizeof(half));
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct CopyStrideJob_Full_Half : IJob, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public CopyStrideJobHelper data;
|
|
|
|
public void Execute()
|
|
{
|
|
UnsafeUtility.MemCpyStride(destination: Optr, destinationStride: data.OStride * sizeof(half),
|
|
source: Xptr, sourceStride: data.XStride * sizeof(half),
|
|
elementSize: data.length * sizeof(half), count: data.count);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GenericSliceJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public GenericSliceJobHelper data;
|
|
|
|
public void Execute(int threadIndex)
|
|
{
|
|
int indexO = threadIndex * data.shapeO.channels;
|
|
int s = 0, r = 0, n = 0, t = 0;
|
|
int d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(indexO, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
s = data.startS + s * data.strideS;
|
|
r = data.startR + r * data.strideR;
|
|
n = data.startN + n * data.strideN;
|
|
t = data.startT + t * data.strideT;
|
|
d = data.startD + d * data.strideD;
|
|
h = data.startH + h * data.strideH;
|
|
w = data.startW + w * data.strideW;
|
|
c = data.startC + c * data.strideC;
|
|
int indexX = data.shapeX.Index(s, r, n, t, d, h, w, c);
|
|
UnsafeUtility.MemCpy(destination: Optr+indexO, source: Xptr+indexX, size: data.shapeO.channels * sizeof(half));
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GenericStridedSliceJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public GenericStridedSliceJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0;
|
|
int d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
s = data.startS + s * data.strideS;
|
|
r = data.startR + r * data.strideR;
|
|
n = data.startN + n * data.strideN;
|
|
t = data.startT + t * data.strideT;
|
|
d = data.startD + d * data.strideD;
|
|
h = data.startH + h * data.strideH;
|
|
w = data.startW + w * data.strideW;
|
|
c = data.startC + c * data.strideC;
|
|
Optr[i] = (half)(Xptr[data.shapeX.Index(s, r, n, t, d, h, w, c)]);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Border2DJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public Border2DJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
float v;
|
|
if (readX < 0 || readX >= data.CroppedWidth ||
|
|
readY < 0 || readY >= data.CroppedHeight ||
|
|
readC < 0 || readC >= data.CroppedChannels)
|
|
{
|
|
v = data.Beta;
|
|
}
|
|
else
|
|
{
|
|
v = Xptr[data.shapeX.Index(n, readY, readX, readC)];
|
|
}
|
|
|
|
Optr[i] = (half)(v);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct TransposeJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public TransposeJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeX.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
int* index = stackalloc int[8];
|
|
index[0] = s; index[1] = r; index[2] = n; index[3] = t; index[4] = d; index[5] = h; index[6] = w; index[7] = c;
|
|
|
|
int indexO = data.shapeO.Index(index[data.permutations[0]],
|
|
index[data.permutations[1]],
|
|
index[data.permutations[2]],
|
|
index[data.permutations[3]],
|
|
index[data.permutations[4]],
|
|
index[data.permutations[5]],
|
|
index[data.permutations[6]],
|
|
index[data.permutations[7]]);
|
|
Optr[indexO] = (half)(Xptr[i]);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DEdgeJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public Pad2DEdgeJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = (half)(Xptr[data.shapeX.Index(n, readY, readX, readC)]);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DReflectJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public Pad2DReflectJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
int lastXIndex = data.shapeX.width - 1;
|
|
int lastYIndex = data.shapeX.height - 1;
|
|
int lastCIndex = data.shapeX.channels - 1;
|
|
|
|
//x reflect indexing
|
|
if (readX < 0)
|
|
readX = -readX;
|
|
else if (readX > lastXIndex)
|
|
readX = lastXIndex - (readX - lastXIndex);
|
|
|
|
//y reflect indexing
|
|
if (readY < 0)
|
|
readY = -readY;
|
|
else if (readY > lastYIndex)
|
|
readY = lastYIndex - (readY - lastYIndex);
|
|
|
|
//c reflect indexing
|
|
if (readC < 0)
|
|
readC = -readC;
|
|
else if (readC > lastCIndex)
|
|
readC = lastCIndex - (readC - lastCIndex);
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = Xptr[data.shapeX.Index(n, readY, readX, readC)];
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct Pad2DSymmetricJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public Pad2DSymmetricJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int n = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref n, ref h, ref w, ref c);
|
|
|
|
int readX = w - data.PadWidth;
|
|
int readY = h - data.PadHeight;
|
|
int readC = c - data.PadChannels;
|
|
|
|
int lastXIndex = data.shapeX.width - 1;
|
|
int lastYIndex = data.shapeX.height - 1;
|
|
int lastCIndex = data.shapeX.channels - 1;
|
|
|
|
//x symmetric indexing
|
|
if (readX < 0)
|
|
readX = -readX - 1;
|
|
else if (readX > lastXIndex)
|
|
readX = lastXIndex - (readX - lastXIndex) + 1;
|
|
|
|
//y symmetric indexing
|
|
if (readY < 0)
|
|
readY = -readY - 1;
|
|
else if (readY > lastYIndex)
|
|
readY = lastYIndex - (readY - lastYIndex) + 1;
|
|
|
|
//c symmetric indexing
|
|
if (readC < 0)
|
|
readC = -readC - 1;
|
|
else if (readC > lastCIndex)
|
|
readC = lastCIndex - (readC - lastCIndex) + 1;
|
|
|
|
readX = math.max(readX, 0);
|
|
readY = math.max(readY, 0);
|
|
readC = math.max(readC, 0);
|
|
readX = math.min(readX, data.shapeX.width - 1);
|
|
readY = math.min(readY, data.shapeX.height - 1);
|
|
readC = math.min(readC, data.shapeX.channels- 1);
|
|
|
|
Optr[i] = (half)(Xptr[data.shapeX.Index(n, readY, readX, readC)]);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct TileJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public TileJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
s = s % data.shapeX[0];
|
|
r = r % data.shapeX[1];
|
|
n = n % data.shapeX[2];
|
|
t = t % data.shapeX[3];
|
|
d = d % data.shapeX[4];
|
|
h = h % data.shapeX[5];
|
|
w = w % data.shapeX[6];
|
|
c = c % data.shapeX[7];
|
|
|
|
float x = Xptr[data.shapeX.Index(s, r, n, t, d, h, w, c)];
|
|
Optr[i] = (half)(x);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct GatherJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public GatherJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
int d0 = (data.axis == 0) ? (int) Bptr[s] : s;
|
|
int d1 = (data.axis == 1) ? (int) Bptr[r] : r;
|
|
int d2 = (data.axis == 2) ? (int) Bptr[n] : n;
|
|
int d3 = (data.axis == 3) ? (int) Bptr[t] : t;
|
|
int d4 = (data.axis == 4) ? (int) Bptr[d] : d;
|
|
int d5 = (data.axis == 5) ? (int) Bptr[h] : h;
|
|
int d6 = (data.axis == 6) ? (int) Bptr[w] : w;
|
|
int d7 = (data.axis == 7) ? (int) Bptr[c] : c;
|
|
|
|
Optr[i] = (half)(Xptr[data.shapeX.Index(d0, d1, d2, d3, d4, d5, d6, d7)]);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct OneHotJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public OneHotJobHelper data;
|
|
|
|
public void Execute(int idx)
|
|
{
|
|
// rank1: X = n,_,_,_
|
|
// rank2: X = n,_,_,c
|
|
// rank3: X = n,_,w,c
|
|
|
|
if (data.inputRank == 1) // TensorShape(X.flatHeight, depth)
|
|
{
|
|
int j = idx % data.depth;
|
|
int n = (idx / data.depth) % data.shapeX.flatHeight;
|
|
|
|
int index = (int)Xptr[n];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (half)(v);
|
|
}
|
|
else if (data.inputRank == 2) // TensorShape(X.flatHeight, 1, depth, X.channels));
|
|
{
|
|
int i = idx % data.shapeX.channels;
|
|
int j = (idx / data.shapeX.channels) % data.depth;
|
|
int n = ((idx / data.shapeX.channels) / data.depth) % data.shapeX.flatHeight;
|
|
|
|
int index = (int)Xptr[data.shapeX.Index(n, i)];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (half)(v);
|
|
}
|
|
else // TensorShape(X.batch, X.width, depth, X.channels))
|
|
{
|
|
int i = idx % data.shapeX.channels;
|
|
int j = (idx / data.shapeX.channels) % data.depth;
|
|
int k = ((idx / data.shapeX.channels) / data.depth) % data.shapeX.width;
|
|
int n = (((idx / data.shapeX.channels) / data.depth) / data.shapeX.width) % data.shapeX.batch;
|
|
|
|
int index = (int)Xptr[data.shapeX.Index(n, 0, k, i)];
|
|
float v = (j == index) ? data.onValue: data.offValue;
|
|
Optr[idx] = (half)(v);
|
|
}
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct RandomNormalJob_Full_Half : IJobParallelFor, IJobResourceDeclarationO
|
|
{
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public RandomNormalJobHelper data;
|
|
|
|
float Gaussian(float mean, float stdDev)
|
|
{
|
|
float u, v, s;
|
|
do {
|
|
u = data.rng.NextFloat() * 2 - 1;
|
|
v = data.rng.NextFloat() * 2 - 1;
|
|
s = u * u + v * v;
|
|
} while (s >= 1 || s == 0);
|
|
float mul = Mathf.Sqrt(-2.0f * Mathf.Log(s) / s);
|
|
return mean + stdDev * u * mul;
|
|
}
|
|
|
|
public void Execute(int i)
|
|
{
|
|
Optr[i] = (half)(Gaussian(data.mean, data.scale));
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct RandomUniformJob_Full_Half : IJobParallelFor, IJobResourceDeclarationO
|
|
{
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public RandomUniformJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = data.mean + data.scale * data.rng.NextFloat();
|
|
Optr[i] = (half)(v);
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
}
|
|
}
|