Skip to content

Commit

Permalink
Optimized PTX IntrinsicMath implementation to use LibDevice.
Browse files Browse the repository at this point in the history
  • Loading branch information
MoFtZ committed Jan 17, 2024
1 parent 82fd218 commit 37af1f7
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 65 deletions.
4 changes: 2 additions & 2 deletions Src/ILGPU.Algorithms.Tests/XMathTests.Sqrt.tt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
// Copyright (c) 2020-2023 ILGPU Project
// Copyright (c) 2020-2024 ILGPU Project
// www.ilgpu.net
//
// File: XMathTests.Sqrt.tt/XMathTests.Sqrt.cs
Expand Down Expand Up @@ -32,7 +32,7 @@ using Xunit;

var rsqrtFunctions = new []
{
new XMathFunction("Rsqrt" , "float" , new Precision(15, 15, 7)),
new XMathFunction("Rsqrt" , "float" , new Precision(15, 6, 7)),
new XMathFunction("Rsqrt" , "double", new Precision(15, 15, 15)),
};
#>
Expand Down
66 changes: 24 additions & 42 deletions Src/ILGPU.Algorithms/PTX/PTXContext.Generated.tt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
// Copyright (c) 2019-2021 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXContext.Generated.tt/PTXContext.Generated.cs
Expand All @@ -18,34 +18,34 @@
<#@ output extension=".cs" #>
<#
var hardwareMathFunctions =
new ValueTuple<ValueTuple<string, Type, string, string>, string>[]
new ValueTuple<string, Type, string, string>[]
{
( UnaryMathFunctions[10], null ), // IsInfinity
( UnaryMathFunctions[11], null ), // IsInfinity
( UnaryMathFunctions[12], null ), // IsNaN
( UnaryMathFunctions[13], null ), // IsNaN

( UnaryMathFunctions[18], null ), // Rcp
( UnaryMathFunctions[19], null ), // Rcp

( UnaryMathFunctions[20], null ), // Sqrt
( UnaryMathFunctions[21], null ), // Sqrt

( UnaryMathFunctions[24], null ), // Sin
( UnaryMathFunctions[30], null ), // Cos

( UnaryMathFunctions[16], null ), // Exp2

( UnaryMathFunctions[8], null ), // Log2

( UnaryMathFunctions[40], "SM_75" ), // TanH
UnaryMathFunctions[10], // IsInfinity
UnaryMathFunctions[11], // IsInfinity
UnaryMathFunctions[12], // IsNaN
UnaryMathFunctions[13], // IsNaN
UnaryMathFunctions[18], // Rcp
UnaryMathFunctions[19], // Rcp
UnaryMathFunctions[20], // Sqrt
UnaryMathFunctions[21], // Sqrt
UnaryMathFunctions[24], // Sin
UnaryMathFunctions[30], // Cos
UnaryMathFunctions[16], // Exp2
UnaryMathFunctions[8], // Log2
UnaryMathFunctions[40], // TanH
};
var unaryMathFunctions = UnaryMathFunctions.Where(t =>
!hardwareMathFunctions.Any(t2 => {
var functionName = t.Item1;
var dataType = t.Item2;
var hardwareFunctionName = t2.Item1.Item1;
var hardwareDataType = t2.Item1.Item2;
var hardwareFunctionName = t2.Item1;
var hardwareDataType = t2.Item2;
return functionName == hardwareFunctionName && dataType == hardwareDataType;
}));
var binaryMathFunctions = BinaryMathFunctions;
Expand Down Expand Up @@ -87,29 +87,11 @@ namespace ILGPU.Algorithms.PTX
typeof(<#= type #>)));
<# } #>

<# foreach (var ((name, type, kind, basicType), sm) in hardwareMathFunctions) { #>
<#
if (string.IsNullOrWhiteSpace(sm)) {
// Register hardware intrinsic
#>
<# foreach (var (name, type, kind, basicType) in hardwareMathFunctions) { #>
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= basicType #>,
MathCodeGeneratorIntrinsic);
<#
} else {
// Register software fallback first, so that it gets replaced
// by the specialized hardware intrinsic.
#>
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= basicType #>,
GetMathIntrinsic("<#= name #>", typeof(<#= type #>)));
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= basicType #>,
GetMathCodeGeneratorIntrinsic(CudaArchitecture.<#= sm #>));
<# } #>
<# } #>

<# foreach (var functionName in xmathUnaryRedirects) { #>
Expand Down
25 changes: 9 additions & 16 deletions Src/ILGPU.Algorithms/PTX/PTXContext.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
// Copyright (c) 2019-2023 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXContext.cs
Expand Down Expand Up @@ -46,7 +46,9 @@ static partial class PTXContext
private static readonly PTXIntrinsic MathCodeGeneratorIntrinsic =
new PTXIntrinsic(
MathCodeGenerator,
IntrinsicImplementationMode.GenerateCode)
IntrinsicImplementationMode.GenerateCode,
null,
maxArchitecture: PTXLibDevicePtx.MinArchtecture)
.ThrowIfNull();

/// <summary>
Expand All @@ -59,19 +61,6 @@ static partial class PTXContext
/// </summary>
internal static readonly Type PTXWarpExtensionsType = typeof(PTXWarpExtensions);

/// <summary>
/// Resolves a PTX code generator for the given math-function configuration.
/// </summary>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <returns>The resolved intrinsic representation.</returns>
private static PTXIntrinsic GetMathCodeGeneratorIntrinsic(
CudaArchitecture minArchitecture) =>
new PTXIntrinsic(
PTXMathType,
nameof(PTXMath.GenerateMathIntrinsic),
IntrinsicImplementationMode.GenerateCode,
minArchitecture);

/// <summary>
/// Resolves a PTX intrinsic for the given math-function configuration.
/// </summary>
Expand All @@ -87,7 +76,11 @@ private static PTXIntrinsic GetMathIntrinsic(string name, params Type[] types)
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect);
return new PTXIntrinsic(
targetMethod,
IntrinsicImplementationMode.Redirect,
null,
maxArchitecture: PTXLibDevicePtx.MinArchtecture);
}

/// <summary>
Expand Down
42 changes: 41 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsic.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2019-2021 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsic.cs
Expand Down Expand Up @@ -52,6 +52,46 @@ public PTXIntrinsic(MethodInfo targetMethod, IntrinsicImplementationMode mode)
mode)
{ }

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures
/// newer or equal to <paramref name="minArchitecture"/>.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture minArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <param name="maxArchitecture">The max architecture (exclusive).</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture? minArchitecture,
CudaArchitecture? maxArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
MaxArchitecture = maxArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures.
/// </summary>
Expand Down
95 changes: 94 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2016-2021 ILGPU Project
// Copyright (c) 2016-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsics.Generated.tt/PTXIntrinsics.Generated.cs
Expand Down Expand Up @@ -35,6 +35,61 @@ var fp16Ops = new (string, string, string, string)[]

("Ternary", "MultiplyAdd", "FmaFP32", "SM_53"),
};

var unaryMathFunctions = new (string, string, TypeInformation)[]
{
("AcosF", "Acos", FloatTypes[2]),
("AsinF", "Asin", FloatTypes[2]),
("AtanF", "Atan", FloatTypes[2]),
("CeilingF", "Ceil", FloatTypes[2]),
("CosF", "Cos", FloatTypes[2]),
("CoshF", "Cosh", FloatTypes[2]),
("ExpF", "Exp", FloatTypes[2]),
("Exp2F", "Exp2", FloatTypes[2]),
("FloorF", "Floor", FloatTypes[2]),
("LogF", "Log", FloatTypes[2]),
("Log2F", "Log2", FloatTypes[2]),
("Log10F", "Log10", FloatTypes[2]),
("RsqrtF", "Rsqrt", FloatTypes[2]),
("SinF", "Sin", FloatTypes[2]),
("SinhF", "Sinh", FloatTypes[2]),
("SqrtF", "Sqrt", FloatTypes[2]),
("TanF", "Tan", FloatTypes[2]),
("TanhF", "Tanh", FloatTypes[2]),

("AcosF", "Acos", FloatTypes[1]),
("AsinF", "Asin", FloatTypes[1]),
("AtanF", "Atan", FloatTypes[1]),
("CeilingF", "Ceil", FloatTypes[1]),
("CosF", "Cos", FloatTypes[1]),
("CoshF", "Cosh", FloatTypes[1]),
("ExpF", "Exp", FloatTypes[1]),
("Exp2F", "Exp2", FloatTypes[1]),
("FloorF", "Floor", FloatTypes[1]),
("LogF", "Log", FloatTypes[1]),
("Log2F", "Log2", FloatTypes[1]),
("Log10F", "Log10", FloatTypes[1]),
("RsqrtF", "Rsqrt", FloatTypes[1]),
("SinF", "Sin", FloatTypes[1]),
("SinhF", "Sinh", FloatTypes[1]),
("SqrtF", "Sqrt", FloatTypes[1]),
("TanF", "Tan", FloatTypes[1]),
("TanhF", "Tanh", FloatTypes[1]),
};

var binaryMathFunctions = new (string, string, string, TypeInformation)[]
{
("Atan2F", "Atan", null, FloatTypes[2]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[2]),
("PowF", "Pow", null, FloatTypes[2]),
("Rem", "Fmod", null, FloatTypes[2]),

("Atan2F", "Atan", null, FloatTypes[1]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[1]),
("PowF", "Pow", null, FloatTypes[1]),
("Rem", "Fmod", null, FloatTypes[1]),
};

#>
using ILGPU.IR.Intrinsics;
using ILGPU.IR.Values;
Expand Down Expand Up @@ -157,5 +212,43 @@ namespace ILGPU.Backends.PTX
}

#endregion

#region Math

/// <summary>
/// Registers all Math intrinsics with the given manager.
/// </summary>
/// <param name="manager">The target implementation manager.</param>
private static void RegisterMathFunctions(IntrinsicImplementationManager manager)
{
<# foreach (var (kind, methodName, type) in unaryMathFunctions) { #>
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
CreateMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>)));
<# } #>

<# foreach (var (kind, methodName, baseClass, type) in binaryMathFunctions) { #>
manager.RegisterBinaryArithmetic(
BinaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
<# if (baseClass == null) { #>
CreateMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } else { #>
CreateMathIntrinsic(
typeof(<#= baseClass #>),
nameof(<#= baseClass #>.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } #>
<# } #>
}

#endregion
}
}
Loading

0 comments on commit 37af1f7

Please sign in to comment.