using UnityEngine; using UnityEngine.Assertions; using System; using System.Collections.Generic; using Unity.Collections; using Unity.Collections.LowLevel.Unsafe; using Unity.Jobs; using Unity.Mathematics; namespace Unity.Barracuda { //#region Job output context helper internal static class BurstSchedulingHelper { #region Private scheduling helpers with pointer aliasing verification private static unsafe JobHandle ScheduleXSBOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrX, void* ptrS, void* ptrB, void* ptrO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO { T jobDataInternalCopy = jobData; jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX}; jobDataInternalCopy.S = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrS}; jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB}; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart); } private static unsafe JobHandle ScheduleXBOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrX, void* ptrB, void* ptrO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO { T jobDataInternalCopy = jobData; jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX}; jobDataInternalCopy.B = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrB}; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart); } private static unsafe JobHandle ScheduleXOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrX, void* ptrO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO { T jobDataInternalCopy = jobData; jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX}; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart); } private static unsafe JobHandle ScheduleXOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrX, void* ptrO) where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO { Assert.IsTrue(ptrO != ptrX); T jobDataInternalCopy = jobData; jobDataInternalCopy.X = new BurstCPUOps.ReadOnlyMemResource() {ptr = ptrX}; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(fenceBeforeJobStart); } private static unsafe JobHandle ScheduleOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrO) where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO { T jobDataInternalCopy = jobData; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(fenceBeforeJobStart); } private static unsafe JobHandle ScheduleOInternal(T jobData, JobHandle fenceBeforeJobStart, void* ptrO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO { T jobDataInternalCopy = jobData; jobDataInternalCopy.O = new BurstCPUOps.ReadWriteMemResource() {ptr = ptrO}; return jobDataInternalCopy.Schedule(arrayLength, innerloopBatchCount, fenceBeforeJobStart); } #endregion #region Private fencing helper for readability private static JobHandle GetFenceBeforeJobStartXSBO( IDependableMemoryResource pinX, IDependableMemoryResource pinS, IDependableMemoryResource pinB, IDependableMemoryResource pinO) { return BurstCPUOps.Dependencies(pinX.fence, pinS.fence, pinB.fence, pinO.reuse); } private static JobHandle GetFenceBeforeJobStartXBO( IDependableMemoryResource pinX, IDependableMemoryResource pinB, IDependableMemoryResource pinO) { return BurstCPUOps.Dependencies(pinX.fence, pinB.fence, pinO.reuse); } private static JobHandle GetFenceBeforeJobStartXO( IDependableMemoryResource pinX, IDependableMemoryResource pinO) { return BurstCPUOps.Dependencies(pinX.fence, pinO.reuse); } private static void SetXSBOFences(this JobHandle jobFence, IDependableMemoryResource pinX, IDependableMemoryResource pinS, IDependableMemoryResource pinB, IDependableMemoryResource pinO) { pinX.reuse = jobFence; pinS.reuse = jobFence; pinB.reuse = jobFence; pinO.fence = jobFence; } private static void SetXBOFences(this JobHandle jobFence, IDependableMemoryResource pinX, IDependableMemoryResource pinB, IDependableMemoryResource pinO) { pinX.reuse = jobFence; pinB.reuse = jobFence; pinO.fence = jobFence; } private static void SetXOFences(this JobHandle jobFence, IDependableMemoryResource pinX, IDependableMemoryResource pinO) { pinX.reuse = jobFence; pinO.fence = jobFence; } #endregion #region Immediate scheduling helper internal enum FencingHelperMode { UpdateResourcesFencesOnScheduling, CustomResourcesFencesHandling, } internal static unsafe JobHandle ScheduleXSBO(this T jobData, IDependableMemoryResource rX, IDependableMemoryResource rS, IDependableMemoryResource rB, IDependableMemoryResource rO, int arrayLength, int innerloopBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXSBO { var fenceBeforeJobStart = GetFenceBeforeJobStartXSBO(rX, rS, rB, rO); JobHandle jobFence; { jobFence = ScheduleXSBOInternal(jobData, fenceBeforeJobStart, rX.rawPtr, rS.rawPtr, rB.rawPtr, rO.rawPtr, arrayLength, innerloopBatchCount); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { jobFence.SetXSBOFences(rX, rS, rB, rO); } return jobFence; } internal static unsafe JobHandle ScheduleXBO(this T jobData, IDependableMemoryResource X, IDependableMemoryResource B, IDependableMemoryResource O, int arrayLength, int innerloopBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO { var fenceBeforeJobStart = GetFenceBeforeJobStartXBO(X, B, O); JobHandle jobFence; { jobFence = ScheduleXBOInternal(jobData, fenceBeforeJobStart, X.rawPtr, B.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { jobFence.SetXBOFences(X, B, O); } return jobFence; } internal static unsafe JobHandle ScheduleO(this T jobData, IDependableMemoryResource O, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationO { var fenceBeforeJobStart = O.reuse; JobHandle jobFence; { jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, O.rawPtr); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { O.fence = jobFence; } return jobFence; } internal static unsafe JobHandle ScheduleXO(this T jobData, IDependableMemoryResource X, IDependableMemoryResource O, int arrayLength, int innerloopBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO { var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O); JobHandle jobFence; { jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr, arrayLength, innerloopBatchCount); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { jobFence.SetXOFences(X, O); } return jobFence; } internal static unsafe JobHandle ScheduleO(this T jobData, BurstTensorData pinO, int offsetO, int arrayLength, int innerloopBatchCount, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationO { var fenceBeforeJobStart = pinO.reuse; JobHandle jobFence; { void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO); jobFence = ScheduleOInternal(jobData, fenceBeforeJobStart, ptrO, arrayLength, innerloopBatchCount); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { pinO.fence = jobFence; } return jobFence; } internal static unsafe JobHandle ScheduleXO(this T jobData, BurstTensorData pinX, int offsetX, BurstTensorData pinO, int offsetO, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO { var fenceBeforeJobStart = GetFenceBeforeJobStartXO(pinX, pinO); JobHandle jobFence; { void* ptrX = pinX.array.RawAddressAt(pinX.offset+offsetX); void* ptrO = pinO.array.RawAddressAt(pinO.offset+offsetO); jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, ptrX, ptrO); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { jobFence.SetXOFences(pinX, pinO); } return jobFence; } internal static unsafe JobHandle ScheduleXO(this T jobData, IDependableMemoryResource X, IDependableMemoryResource O, FencingHelperMode fencingMode=FencingHelperMode.UpdateResourcesFencesOnScheduling) where T : struct, IJob, BurstCPUOps.IJobResourceDeclarationXO { var fenceBeforeJobStart = GetFenceBeforeJobStartXO(X, O); JobHandle jobFence; { jobFence = ScheduleXOInternal(jobData, fenceBeforeJobStart, X.rawPtr, O.rawPtr); } if (fencingMode==FencingHelperMode.UpdateResourcesFencesOnScheduling) { jobFence.SetXOFences(X, O); } return jobFence; } #endregion } #region Schedulling helper for parrallel jobs internal struct ParallelJobsContext : IDisposable { internal static Dictionary s_ReadDependencyTracker = new Dictionary(100); private readonly IDependableMemoryResource outputResource; private JobHandle combinedJobFence; public ParallelJobsContext(IDependableMemoryResource output) { outputResource = output; combinedJobFence = new JobHandle(); Assert.AreEqual(0, s_ReadDependencyTracker.Count, "s_ReadDependencyTracker should be empty meaning ParrallelJobs was not disposed properly."); } //For now only CopyStrideJobHelper and tests need ParallelJobsContext. If this code need to be duplicated for more case in the future: //- Maybe add generic version by having CopyStrideJobHelper and other helper struct implement an interface (but beware of GC). //- Or make ParallelJobsContext partial and code generated by jobs template. public JobHandle ScheduleXO( BurstCPUOps.CopyStrideJobHelper jobData,//See comment above. BurstTensorData pinX, int offsetX, BurstTensorData pinO, int offsetO) { Assert.IsTrue(pinO == outputResource); var jobFence = jobData.ScheduleXO(pinX, offsetX, pinO, offsetO, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling); TrackJobReadDependencies(pinX, jobFence); AddJobDependencyToOutputFence(jobFence); return jobFence; } public JobHandle ScheduleXO( T jobData, BurstTensorData pinX, BurstTensorData pinO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXO { Assert.IsTrue(pinO == outputResource); var jobFence = jobData.ScheduleXO(pinX, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling); TrackJobReadDependencies(pinX, jobFence); AddJobDependencyToOutputFence(jobFence); return jobFence; } public JobHandle ScheduleXBO( T jobData, BurstTensorData pinX, BurstTensorData pinB, BurstTensorData pinO, int arrayLength, int innerloopBatchCount) where T : struct, IJobParallelFor, BurstCPUOps.IJobResourceDeclarationXBO { Assert.IsTrue(pinO == outputResource); var jobFence = jobData.ScheduleXBO(pinX, pinB, pinO, arrayLength, innerloopBatchCount, BurstSchedulingHelper.FencingHelperMode.CustomResourcesFencesHandling); TrackJobReadDependencies(pinX, jobFence); TrackJobReadDependencies(pinB, jobFence); AddJobDependencyToOutputFence(jobFence); return jobFence; } internal void AddJobDependencyToOutputFence(JobHandle jobFence) { //Once all jobs writing to O will be done, further jobs will be able to read from O. //We combine job fences from all job writing to O here and assign to O.fence in Dispose(). combinedJobFence = JobHandle.CombineDependencies(combinedJobFence, jobFence); } internal void TrackJobReadDependencies(IDependableMemoryResource T, JobHandle jobFence) { //Once all jobs reading from T will be done, further jobs will be able to write to T. //We combine job fences from all jobs reading from T here and assign to T.reuse in Dispose(). if (T != null) { if (s_ReadDependencyTracker.ContainsKey(T)) s_ReadDependencyTracker[T] = JobHandle.CombineDependencies(s_ReadDependencyTracker[T], jobFence); else s_ReadDependencyTracker[T] = jobFence; } } public void Dispose() { foreach (var key in s_ReadDependencyTracker.Keys) { key.reuse = s_ReadDependencyTracker[key]; } outputResource.fence = combinedJobFence; s_ReadDependencyTracker.Clear(); } } #endregion #region Memory allocation wrapper usable by job fencing helpers internal unsafe class FencedMemoryAlloc : IDependableMemoryResource { private JobHandle m_ReadFence; private JobHandle m_WriteFence; private void* data; public void* rawPtr => data; public half* halfdata { get { Assert.AreEqual(DataType.Half, type); return (half*) data; } } public float* floatdata { get { Assert.AreEqual(DataType.Float, type);return (float*) data; } } public DataType type; public int elementCount; public int elementSize; /// public JobHandle fence { get { return m_ReadFence; } set { m_ReadFence = value; m_WriteFence = value; } } /// public JobHandle reuse { get { return m_WriteFence; } set { m_WriteFence = value; } } public void Allocate(int numElement, DataType dataType, int alignment, Allocator allocator) { m_ReadFence = new JobHandle(); m_WriteFence = new JobHandle(); elementCount = numElement; elementSize = BarracudaArray.DataItemSize(dataType); type = dataType; Assert.IsTrue(data == null, "Please call ClearState() when freeing underlying memory."); Assert.IsTrue(alignment % elementSize == 0); data = UnsafeUtility.Malloc(elementCount * elementSize, alignment, allocator); Assert.IsTrue(data != null); } public void ClearState() { m_ReadFence = new JobHandle(); m_WriteFence = new JobHandle(); elementCount = 0; elementSize = 0; type = DataType.Float; data = null; } public FencedMemoryAlloc() { ClearState(); } } #endregion } // namespace Barracuda