1236 lines
60 KiB
C#
1236 lines
60 KiB
C#
// This is auto-generated -- do not modify directly
|
|
using UnityEngine;
|
|
using System;
|
|
using Unity.Burst;
|
|
using Unity.Burst.Intrinsics;
|
|
using Unity.Collections;
|
|
using Unity.Jobs;
|
|
using Unity.Mathematics;
|
|
using static Unity.Burst.Intrinsics.X86.Avx;
|
|
using static Unity.Burst.Intrinsics.X86.Fma;
|
|
using Unity.Collections.LowLevel.Unsafe;
|
|
using Unity.Jobs.LowLevel.Unsafe;
|
|
using FencingHelperMode = Unity.Barracuda.BurstSchedulingHelper.FencingHelperMode;
|
|
|
|
namespace Unity.Barracuda {
|
|
public partial class BurstCPUOps
|
|
{
|
|
#region Broadcast Jobs declaration for mode: _Full_Float
|
|
|
|
internal partial struct VectorBroadcastScaleBiasJobHelper
|
|
{
|
|
public JobHandle ScheduleXSBO(Tensor X, Tensor S, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinS = Pin(S);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
return ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
public JobHandle ScheduleXSBO(BurstTensorData pinX, BurstTensorData pinS, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinS.array.Type == DataType.Half;
|
|
bool BHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(WHalf, BHalf);
|
|
if (AHalf && WHalf)
|
|
{
|
|
var job = new VectorBroadcastScaleBiasJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else if (!AHalf && WHalf)
|
|
{
|
|
var job = new VectorBroadcastScaleBiasJob_ActAsFloat_WeightAsHalf();
|
|
job.data = this;
|
|
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else if (!AHalf && !WHalf)
|
|
{
|
|
var job = new VectorBroadcastScaleBiasJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXSBO(pinX, pinS, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (AHalf && !WHalf)
|
|
{
|
|
UnityEngine.Assertions.Assert.IsTrue(false, "VectorBroadcastScaleBiasJob does not support activation as half while weights are floats.");
|
|
return new JobHandle();
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
|
unsafe struct VectorBroadcastScaleBiasJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXSBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource S { get; set; } float* Sptr => S.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public VectorBroadcastScaleBiasJobHelper data;
|
|
|
|
const int unrollSize = 32;
|
|
public void Execute(int i)
|
|
{
|
|
float* src = Xptr + i * data.inOutChannels;
|
|
float* dst = Optr + i * data.inOutChannels;
|
|
float* gamma = Sptr;
|
|
float* beta = Bptr;
|
|
|
|
int j = 0;
|
|
for (; j < data.inOutChannels - unrollSize + 1; j += unrollSize) // unroll of inOutChannels loop
|
|
for (int q = 0; q < unrollSize; q++, src++, dst++, gamma++, beta++)
|
|
*dst = (float)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
for (; j < data.inOutChannels; j++, src++, dst++, gamma++, beta++) // remainder of inOutChannels loop
|
|
*dst = (float)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
}
|
|
}
|
|
|
|
internal partial struct ScalarBroadcastAddJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastAddJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastAddJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastAddJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Bptr[0] * data.alpha + Xptr[i];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastAddJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastAddJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastAddJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastAddJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Bptr[i] * data.alpha + Xptr[i];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct ScalarBroadcastMulJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMulJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMulJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMulJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] * Bptr[0];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastMulJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastMulJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastMulJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMulJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] * Bptr[i];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct ScalarBroadcastDivJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastDivJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastDivJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastDivJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] / Bptr[0];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastDivJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastDivJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastDivJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastDivJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] / Bptr[i];
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct ScalarBroadcastMinJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMinJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMinJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMinJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.min(Xptr[i], Bptr[0]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastMinJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastMinJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastMinJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMinJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.min(Xptr[i], Bptr[i]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct ScalarBroadcastMaxJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMaxJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastMaxJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.max(Xptr[i], Bptr[0]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastMaxJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastMaxJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastMaxJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.max(Xptr[i], Bptr[i]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct ScalarBroadcastPowJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ScalarBroadcastPowJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ScalarBroadcastPowJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastPowJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ScalarBroadcastPowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.pow(Xptr[i], Bptr[0]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal partial struct BroadcastPowJobHelper
|
|
{
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new BroadcastPowJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new BroadcastPowJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastPowJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public BroadcastPowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.pow(Xptr[i], Bptr[i]);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
|
|
internal unsafe struct ElementwiseAddJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwiseAddJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwiseAddJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseAddJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwiseAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = data.alpha * y + x;
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal unsafe struct ElementwiseMulJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwiseMulJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwiseMulJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMulJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwiseMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = x * y;
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal unsafe struct ElementwiseDivJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwiseDivJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwiseDivJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseDivJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwiseDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = x / y;
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal unsafe struct ElementwiseMinJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwiseMinJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwiseMinJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMinJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwiseMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.min(x , y);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal unsafe struct ElementwiseMaxJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwiseMaxJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwiseMaxJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMaxJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwiseMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.max(x , y);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
internal unsafe struct ElementwisePowJobHelper
|
|
{
|
|
[ReadOnly] public TensorShape shapeO;
|
|
[ReadOnly] public fixed int stridesX[8];
|
|
[ReadOnly] public fixed int stridesY[8];
|
|
[ReadOnly] public float alpha;
|
|
public JobHandle ScheduleXBO(Tensor X, Tensor B, Tensor O, int arrayLength, int innerBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling)
|
|
{
|
|
var pinX = Pin(X);
|
|
var pinB = Pin(B);
|
|
var pinO = Pin(O, uploadCache: false);
|
|
bool AHalf = pinX.array.Type == DataType.Half;
|
|
bool WHalf = pinB.array.Type == DataType.Half;
|
|
bool OHalf = pinO.array.Type == DataType.Half;
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, OHalf);
|
|
UnityEngine.Assertions.Assert.AreEqual(AHalf, WHalf);
|
|
if (AHalf)
|
|
{
|
|
var job = new ElementwisePowJob_Full_Half();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
else //if (!AHalf)
|
|
{
|
|
var job = new ElementwisePowJob_Full_Float();
|
|
job.data = this;
|
|
return job.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerBatchCount, fencingMode);
|
|
}
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwisePowJob_Full_Float : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource B { get; set; } float* Bptr => B.ptrfloat;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public ElementwisePowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.pow(x, y);
|
|
Optr[i] = (float)v;
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
#region Broadcast Jobs declaration for mode: _ActAsFloat_WeightAsHalf
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
|
unsafe struct VectorBroadcastScaleBiasJob_ActAsFloat_WeightAsHalf : IJobParallelFor, IJobResourceDeclarationXSBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } float* Xptr => X.ptrfloat;
|
|
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } float* Optr => O.ptrfloat;
|
|
public VectorBroadcastScaleBiasJobHelper data;
|
|
|
|
const int unrollSize = 32;
|
|
public void Execute(int i)
|
|
{
|
|
float* src = Xptr + i * data.inOutChannels;
|
|
float* dst = Optr + i * data.inOutChannels;
|
|
half* gamma = Sptr;
|
|
half* beta = Bptr;
|
|
|
|
int j = 0;
|
|
for (; j < data.inOutChannels - unrollSize + 1; j += unrollSize) // unroll of inOutChannels loop
|
|
for (int q = 0; q < unrollSize; q++, src++, dst++, gamma++, beta++)
|
|
*dst = (float)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
for (; j < data.inOutChannels; j++, src++, dst++, gamma++, beta++) // remainder of inOutChannels loop
|
|
*dst = (float)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#endregion
|
|
#region Broadcast Jobs declaration for mode: _Full_Half
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
|
|
unsafe struct VectorBroadcastScaleBiasJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXSBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource S { get; set; } half* Sptr => S.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public VectorBroadcastScaleBiasJobHelper data;
|
|
|
|
const int unrollSize = 32;
|
|
public void Execute(int i)
|
|
{
|
|
half* src = Xptr + i * data.inOutChannels;
|
|
half* dst = Optr + i * data.inOutChannels;
|
|
half* gamma = Sptr;
|
|
half* beta = Bptr;
|
|
|
|
int j = 0;
|
|
for (; j < data.inOutChannels - unrollSize + 1; j += unrollSize) // unroll of inOutChannels loop
|
|
for (int q = 0; q < unrollSize; q++, src++, dst++, gamma++, beta++)
|
|
*dst = (half)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
for (; j < data.inOutChannels; j++, src++, dst++, gamma++, beta++) // remainder of inOutChannels loop
|
|
*dst = (half)((*src) * (*gamma) + (*beta) * data.alpha);
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastAddJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Bptr[0] * data.alpha + Xptr[i];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastAddJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Bptr[i] * data.alpha + Xptr[i];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMulJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] * Bptr[0];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMulJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] * Bptr[i];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastDivJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] / Bptr[0];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastDivJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = Xptr[i] / Bptr[i];
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMinJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.min(Xptr[i], Bptr[0]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMinJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.min(Xptr[i], Bptr[i]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.max(Xptr[i], Bptr[0]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.max(Xptr[i], Bptr[i]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ScalarBroadcastPowJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ScalarBroadcastPowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.pow(Xptr[i], Bptr[0]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct BroadcastPowJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public BroadcastPowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
float v = math.pow(Xptr[i], Bptr[i]);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseAddJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwiseAddJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = data.alpha * y + x;
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMulJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwiseMulJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = x * y;
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseDivJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwiseDivJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = x / y;
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMinJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwiseMinJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.min(x , y);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwiseMaxJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwiseMaxJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.max(x , y);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
[BurstCompile(OptimizeFor = OptimizeFor.Performance, FloatMode = FloatMode.Default, FloatPrecision = FloatPrecision.Standard)]
|
|
unsafe struct ElementwisePowJob_Full_Half : IJobParallelFor, IJobResourceDeclarationXBO
|
|
{
|
|
public ReadOnlyMemResource X { get; set; } half* Xptr => X.ptrhalf;
|
|
public ReadOnlyMemResource B { get; set; } half* Bptr => B.ptrhalf;//Always use activation type
|
|
public ReadWriteMemResource O { get; set; } half* Optr => O.ptrhalf;
|
|
public ElementwisePowJobHelper data;
|
|
|
|
public void Execute(int i)
|
|
{
|
|
int s = 0, r = 0, n = 0, t = 0, d = 0, h = 0, w = 0, c = 0;
|
|
data.shapeO.GetPositionsFromIndex(i, ref s, ref r, ref n, ref t, ref d, ref h, ref w, ref c);
|
|
|
|
float x = Xptr[data.stridesX[0] * s + data.stridesX[1] * r + data.stridesX[2] * n + data.stridesX[3] * t + data.stridesX[4] * d + data.stridesX[5] * h + data.stridesX[6] * w + data.stridesX[7] * c];
|
|
float y = Bptr[data.stridesY[0] * s + data.stridesY[1] * r + data.stridesY[2] * n + data.stridesY[3] * t + data.stridesY[4] * d + data.stridesY[5] * h + data.stridesY[6] * w + data.stridesY[7] * c];
|
|
|
|
float v = math.pow(x, y);
|
|
Optr[i] = (half)v;
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
}
|
|
}
|