From 57f9d4bc4b2cbc6c3eb759de007a7cadaaad343f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 Jan 2024 12:15:31 -0800 Subject: [PATCH 1/4] Adding vectorized implementations of Log to Vector64/128/256/512 --- .../System/Runtime/Intrinsics/Vector128.cs | 32 ++ .../System/Runtime/Intrinsics/Vector256.cs | 32 ++ .../System/Runtime/Intrinsics/Vector512.cs | 32 ++ .../src/System/Runtime/Intrinsics/Vector64.cs | 44 +++ .../System/Runtime/Intrinsics/VectorMath.cs | 284 +++++++++++++++++- .../tests/Vectors/Vector128Tests.cs | 16 + .../tests/Vectors/Vector256Tests.cs | 16 + .../tests/Vectors/Vector512Tests.cs | 16 + .../tests/Vectors/Vector64Tests.cs | 18 +- .../tests/Vectors/VectorTestMemberData.cs | 94 ++++++ 10 files changed, 581 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index ea8cf23dd06ecb..0342f1b144aac6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1781,6 +1781,38 @@ internal static Vector128 LoadUnsafe(ref char source) => internal static Vector128 LoadUnsafe(ref char source, nuint elementOffset) => LoadUnsafe(ref Unsafe.As(ref source), elementOffset); + /// + public static Vector128 Log(Vector128 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogDouble, Vector128, Vector128>(vector); + } + else + { + return Create( + Vector64.Log(vector._lower), + Vector64.Log(vector._upper) + ); + } + } + + /// + public static Vector128 Log(Vector128 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogSingle, Vector128, Vector128>(vector); + } + else + { + return Create( + Vector64.Log(vector._lower), + Vector64.Log(vector._upper) + ); + } + } + /// public static Vector128 Log2(Vector128 vector) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 7d70941d31b8a2..9a5422e2385187 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1755,6 +1755,38 @@ internal static Vector256 LoadUnsafe(ref char source) => internal static Vector256 LoadUnsafe(ref char source, nuint elementOffset) => LoadUnsafe(ref Unsafe.As(ref source), elementOffset); + /// + public static Vector256 Log(Vector256 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogDouble, Vector256, Vector256>(vector); + } + else + { + return Create( + Vector128.Log(vector._lower), + Vector128.Log(vector._upper) + ); + } + } + + /// + public static Vector256 Log(Vector256 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogSingle, Vector256, Vector256>(vector); + } + else + { + return Create( + Vector128.Log(vector._lower), + Vector128.Log(vector._upper) + ); + } + } + /// public static Vector256 Log2(Vector256 vector) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index b926ff67edf662..42685879ebb05e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1806,6 +1806,38 @@ internal static Vector512 LoadUnsafe(ref char source) => internal static Vector512 LoadUnsafe(ref char source, nuint elementOffset) => LoadUnsafe(ref Unsafe.As(ref source), elementOffset); + /// + public static Vector512 Log(Vector512 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogDouble, Vector512, Vector512>(vector); + } + else + { + return Create( + Vector256.Log(vector._lower), + Vector256.Log(vector._upper) + ); + } + } + + /// + public static Vector512 Log(Vector512 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogSingle, Vector512, Vector512>(vector); + } + else + { + return Create( + Vector256.Log(vector._lower), + Vector256.Log(vector._upper) + ); + } + } + /// public static Vector512 Log2(Vector512 vector) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 79de8e2ad2ef3b..e3e4f8b3b40b02 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1571,6 +1571,50 @@ public static Vector64 LoadUnsafe(ref readonly T source, nuint elementOffs return Unsafe.ReadUnaligned>(in address); } + internal static Vector64 Log(Vector64 vector) + where T : ILogarithmicFunctions + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + T value = T.Log(vector.GetElement(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Computes the log of each element in a vector. + /// The vector that will have its log computed. + /// A vector whose elements are the log of the elements in . + public static Vector64 Log(Vector64 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogDouble, Vector64, Vector64>(vector); + } + else + { + return Log(vector); + } + } + + /// Computes the log of each element in a vector. + /// The vector that will have its log computed. + /// A vector whose elements are the log of the elements in . + public static Vector64 Log(Vector64 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.LogSingle, Vector64, Vector64>(vector); + } + else + { + return Log(vector); + } + } + internal static Vector64 Log2(Vector64 vector) where T : ILogarithmicFunctions { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs index 8cdb1d91010bca..20e5cf83bc47a1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs @@ -7,6 +7,288 @@ namespace System.Runtime.Intrinsics { internal static class VectorMath { + public static TVectorDouble LogDouble(TVectorDouble x) + where TVectorDouble : unmanaged, ISimdVector + where TVectorInt64 : unmanaged, ISimdVector + where TVectorUInt64 : unmanaged, ISimdVector + { + // This code is based on `vrd2_log` from amd/aocl-libm-ose + // Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Reduce x into the form: + // x = (-1)^s*2^n*m + // s will be always zero, as log is defined for positive numbers + // n is an integer known as the exponent + // m is mantissa + // + // x is reduced such that the mantissa, m lies in [2/3,4/3] + // x = 2^n*m where m is in [2/3,4/3] + // log(x) = log(2^n*m) We have log(a*b) = log(a)+log(b) + // = log(2^n) + log(m) We have log(a^n) = n*log(a) + // = n*log(2) + log(m) + // = n*log(2) + log(1+(m-1)) + // = n*log(2) + log(1+f) Where f = m-1 + // = n*log(2) + log1p(f) f lies in [-1/3,+1/3] + // + // Thus we have : + // log(x) = n*log(2) + log1p(f) + // In the above, the first term n*log(2), n can be calculated by using right shift operator and the value of log(2) + // is known and is stored as a constant + // The second term log1p(F) is approximated by using a polynomial + + const ulong V_MIN = double.SmallestNormalBits; + const ulong V_MAX = double.PositiveInfinityBits; + const ulong V_MSK = 0x000FFFFF_FFFFFFFF; // (1 << 52) - 1 + const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0 + + const double LN2_HEAD = 0.693359375; + const double LN2_TAIL = -0.00021219444005469057; + + const double C02 = -0.499999999999999560; + const double C03 = +0.333333333333414750; + const double C04 = -0.250000000000297430; + const double C05 = +0.199999999975985220; + const double C06 = -0.166666666608919500; + const double C07 = +0.142857145600277100; + const double C08 = -0.125000005127831270; + const double C09 = +0.111110952357159440; + const double C10 = -0.099999750495501240; + const double C11 = +0.090914349823462390; + const double C12 = -0.083340600527551860; + const double C13 = +0.076817603328311300; + const double C14 = -0.071296718946287310; + const double C15 = +0.067963465211535730; + const double C16 = -0.063995035098960040; + const double C17 = +0.049370587082412105; + const double C18 = -0.045370170994891980; + const double C19 = +0.088970636003577750; + const double C20 = -0.086906174116908760; + + TVectorDouble specialResult = x; + + // x is zero, subnormal, infinity, or NaN + TVectorUInt64 specialMask = TVectorUInt64.GreaterThanOrEqual(Unsafe.BitCast(x) - TVectorUInt64.Create(V_MIN), TVectorUInt64.Create(V_MAX - V_MIN)); + + if (specialMask != TVectorUInt64.Zero) + { + TVectorInt64 xBits = Unsafe.BitCast(x); + + // (x < 0) ? float.NaN : x + TVectorDouble lessThanZeroMask = Unsafe.BitCast(TVectorInt64.LessThan(xBits, TVectorInt64.Zero)); + + specialResult = TVectorDouble.ConditionalSelect( + lessThanZeroMask, + TVectorDouble.Create(double.NaN), + specialResult + ); + + // double.IsZero(x) ? double.NegativeInfinity : x + TVectorDouble zeroMask = Unsafe.BitCast(TVectorInt64.Equals(xBits << 1, TVectorInt64.Zero)); + + specialResult = TVectorDouble.ConditionalSelect( + zeroMask, + TVectorDouble.Create(double.NegativeInfinity), + specialResult + ); + + // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x) + TVectorDouble temp = zeroMask + | lessThanZeroMask + | Unsafe.BitCast(TVectorInt64.GreaterThanOrEqual(xBits, TVectorInt64.Create(double.PositiveInfinityBits))); + + // subnormal + TVectorDouble subnormalMask = TVectorDouble.AndNot(Unsafe.BitCast(specialMask), temp); + + // multiply by 2^52, then normalize + x = TVectorDouble.ConditionalSelect( + subnormalMask, + Unsafe.BitCast(Unsafe.BitCast(x * 4503599627370496.0) - TVectorUInt64.Create(52ul << 52)), + x + ); + + specialMask = Unsafe.BitCast(temp); + } + + // Reduce the mantissa to [+2/3, +4/3] + TVectorUInt64 vx = Unsafe.BitCast(x) - TVectorUInt64.Create(V_OFF); + TVectorDouble n = ConvertToDouble(Unsafe.BitCast(vx) >> 52); + vx = (vx & TVectorUInt64.Create(V_MSK)) + TVectorUInt64.Create(V_OFF); + + // Adjust the mantissa to [-1/3, +1/3] + TVectorDouble r = Unsafe.BitCast(vx) - TVectorDouble.One; + + TVectorDouble r02 = r * r; + TVectorDouble r04 = r02 * r02; + TVectorDouble r08 = r04 * r04; + TVectorDouble r16 = r08 * r08; + + // Compute log(x + 1) using Polynomial approximation + // C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20) + + TVectorDouble poly = (((r04 * C20) + + ((((r * C19) + TVectorDouble.Create(C18)) * r02) + + ((r * C17) + TVectorDouble.Create(C16)))) * r16) + + (((((((r * C15) + TVectorDouble.Create(C14)) * r02) + + ((r * C13) + TVectorDouble.Create(C12))) * r04) + + ((((r * C11) + TVectorDouble.Create(C10)) * r02) + + ((r * C09) + TVectorDouble.Create(C08)))) * r08) + + (((((r * C07) + TVectorDouble.Create(C06)) * r02) + + ((r * C05) + TVectorDouble.Create(C04))) * r04) + + ((((r * C03) + TVectorDouble.Create(C02)) * r02) + r); + + return TVectorDouble.ConditionalSelect( + Unsafe.BitCast(specialMask), + specialResult, + (n * LN2_HEAD) + ((n * LN2_TAIL) + poly) + ); + } + + public static TVectorSingle LogSingle(TVectorSingle x) + where TVectorSingle : unmanaged, ISimdVector + where TVectorInt32 : unmanaged, ISimdVector + where TVectorUInt32 : unmanaged, ISimdVector + { + // This code is based on `vrs4_logf` from amd/aocl-libm-ose + // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Spec: + // logf(x) + // = logf(x) if x ∈ F and x > 0 + // = x if x = qNaN + // = 0 if x = 1 + // = -inf if x = (-0, 0} + // = NaN otherwise + // + // Assumptions/Expectations + // - ULP is derived to be << 4 (always) + // - Some FPU Exceptions may not be available + // - Performance is at least 3x + // + // Implementation Notes: + // 1. Range Reduction: + // x = 2^n*(1+f) .... (1) + // where n is exponent and is an integer + // (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2 .... (2) + // + // From (1), taking log on both sides + // log(x) = log(2^n * (1+f)) + // = log(2^n) + log(1+f) + // = n*log(2) + log(1+f) .... (3) + // + // let z = 1 + f + // log(z) = log(k) + log(z) - log(k) + // log(z) = log(kz) - log(k) + // + // From (2), range of z is [1, 2) + // by simply dividing range by 'k', z is in [1/k, 2/k) .... (4) + // Best choice of k is the one which gives equal and opposite values + // at extrema +- -+ + // 1 | 2 | + // --- - 1 = - |--- - 1 | + // k | k | .... (5) + // +- -+ + // + // Solving for k, k = 3/2, + // From (4), using 'k' value, range is therefore [-0.3333, 0.3333] + // + // 2. Polynomial Approximation: + // More information refer to tools/sollya/vrs4_logf.sollya + // + // 7th Deg - Error abs: 0x1.04c4ac98p-22 rel: 0x1.2216e6f8p-19 + // 6th Deg - Error abs: 0x1.179e97d8p-19 rel: 0x1.db676c1p-17 + + const uint V_MIN = 0x00800000; + const uint V_MAX = 0x7F800000; + const uint V_MASK = 0x007FFFFF; + const uint V_OFF = 0x3F2AAAAB; + + const float V_LN2 = 0.6931472f; + + const float C0 = 0.0f; + const float C1 = 1.0f; + const float C2 = -0.5000001f; + const float C3 = 0.33332965f; + const float C4 = -0.24999046f; + const float C5 = 0.20018855f; + const float C6 = -0.16700386f; + const float C7 = 0.13902695f; + const float C8 = -0.1197452f; + const float C9 = 0.14401625f; + const float C10 = -0.13657966f; + + TVectorSingle specialResult = x; + + // x is subnormal or infinity or NaN + TVectorUInt32 specialMask = TVectorUInt32.GreaterThanOrEqual(Unsafe.BitCast(x) - TVectorUInt32.Create(V_MIN), TVectorUInt32.Create(V_MAX - V_MIN)); + + if (specialMask != TVectorUInt32.Zero) + { + // float.IsZero(x) ? float.NegativeInfinity : x + TVectorSingle zeroMask = TVectorSingle.Equals(x, TVectorSingle.Zero); + + specialResult = TVectorSingle.ConditionalSelect( + zeroMask, + TVectorSingle.Create(float.NegativeInfinity), + specialResult + ); + + // (x < 0) ? float.NaN : x + TVectorSingle lessThanZeroMask = TVectorSingle.LessThan(x, TVectorSingle.Zero); + + specialResult = TVectorSingle.ConditionalSelect( + lessThanZeroMask, + TVectorSingle.Create(float.NaN), + specialResult + ); + + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + TVectorSingle temp = zeroMask + | lessThanZeroMask + | ~TVectorSingle.Equals(x, x) + | TVectorSingle.Equals(x, TVectorSingle.Create(float.PositiveInfinity)); + + // subnormal + TVectorSingle subnormalMask = TVectorSingle.AndNot(Unsafe.BitCast(specialMask), temp); + + x = TVectorSingle.ConditionalSelect( + subnormalMask, + Unsafe.BitCast(Unsafe.BitCast(x * 8388608.0f) - TVectorUInt32.Create(23u << 23)), + x + ); + + specialMask = Unsafe.BitCast(temp); + } + + TVectorUInt32 vx = Unsafe.BitCast(x) - TVectorUInt32.Create(V_OFF); + TVectorSingle n = ConvertToSingle(Unsafe.BitCast(vx) >> 23); + + vx = (vx & TVectorUInt32.Create(V_MASK)) + TVectorUInt32.Create(V_OFF); + + TVectorSingle r = Unsafe.BitCast(vx) - TVectorSingle.Create(1.0f); + + TVectorSingle r2 = r * r; + TVectorSingle r4 = r2 * r2; + TVectorSingle r8 = r4 * r4; + + TVectorSingle q = (TVectorSingle.Create(C10) * r2 + (TVectorSingle.Create(C9) * r + TVectorSingle.Create(C8))) + * r8 + (((TVectorSingle.Create(C7) * r + TVectorSingle.Create(C6)) + * r2 + (TVectorSingle.Create(C5) * r + TVectorSingle.Create(C4))) + * r4 + ((TVectorSingle.Create(C3) * r + TVectorSingle.Create(C2)) + * r2 + (TVectorSingle.Create(C1) * r + TVectorSingle.Create(C0)))); + + return TVectorSingle.ConditionalSelect( + Unsafe.BitCast(specialMask), + specialResult, + n * TVectorSingle.Create(V_LN2) + q + ); + } + public static TVectorDouble Log2Double(TVectorDouble x) where TVectorDouble : unmanaged, ISimdVector where TVectorInt64 : unmanaged, ISimdVector @@ -40,7 +322,7 @@ public static TVectorDouble Log2Double() Assert.Equal((Vector128)methodInfo.Invoke(null, null), Vector128.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] + public void LogDoubleTest(double value, double expectedResult, double variance) + { + Vector128 actualResult = Vector128.Log(Vector128.Create(value)); + AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.LogSingle), MemberType = typeof(VectorTestMemberData))] + public void LogSingleTest(float value, float expectedResult, float variance) + { + Vector128 actualResult = Vector128.Log(Vector128.Create(value)); + AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.Log2Double), MemberType = typeof(VectorTestMemberData))] public void Log2DoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 0f61b9d50b641b..a74bf79d65dea4 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -5702,6 +5702,22 @@ private static void TestGetOne() Assert.Equal((Vector256)methodInfo.Invoke(null, null), Vector256.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] + public void LogDoubleTest(double value, double expectedResult, double variance) + { + Vector256 actualResult = Vector256.Log(Vector256.Create(value)); + AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.LogSingle), MemberType = typeof(VectorTestMemberData))] + public void LogSingleTest(float value, float expectedResult, float variance) + { + Vector256 actualResult = Vector256.Log(Vector256.Create(value)); + AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.Log2Double), MemberType = typeof(VectorTestMemberData))] public void Log2DoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index 778bf2bbc3702a..01c42d277b8553 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -5134,6 +5134,22 @@ private static void TestIsNotSupported() Assert.False((bool)methodInfo.Invoke(null, null)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] + public void LogDoubleTest(double value, double expectedResult, double variance) + { + Vector512 actualResult = Vector512.Log(Vector512.Create(value)); + AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.LogSingle), MemberType = typeof(VectorTestMemberData))] + public void LogSingleTest(float value, float expectedResult, float variance) + { + Vector512 actualResult = Vector512.Log(Vector512.Create(value)); + AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.Log2Double), MemberType = typeof(VectorTestMemberData))] public void Log2DoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index 7706630bec31d1..ec5a0e11118cec 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -4104,6 +4104,22 @@ private static void TestGetOne() Assert.Equal((Vector64)methodInfo.Invoke(null, null), Vector64.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] + public void LogDoubleTest(double value, double expectedResult, double variance) + { + Vector64 actualResult = Vector64.Log(Vector64.Create(value)); + AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.LogSingle), MemberType = typeof(VectorTestMemberData))] + public void LogSingleTest(float value, float expectedResult, float variance) + { + Vector64 actualResult = Vector64.Log(Vector64.Create(value)); + AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.Log2Double), MemberType = typeof(VectorTestMemberData))] public void Log2DoubleTest(double value, double expectedResult, double variance) @@ -4116,8 +4132,6 @@ public void Log2DoubleTest(double value, double expectedResult, double variance) [MemberData(nameof(VectorTestMemberData.Log2Single), MemberType = typeof(VectorTestMemberData))] public void Log2SingleTest(float value, float expectedResult, float variance) { - AssertExtensions.Equal(0.0f, 0.0f, 0.0f); - Vector64 actualResult = Vector64.Log2(Vector64.Create(value)); AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance)); } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs index c035baa8fffd3d..ec64c8849be846 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs @@ -35,6 +35,100 @@ internal static class VectorTestMemberData // use CrossPlatformMachineEpsilon * 10. private const float SingleCrossPlatformMachineEpsilon = 4.76837158e-07f; + public static IEnumerable LogDouble + { + get + { + yield return new object[] { double.NegativeInfinity, double.NaN, 0.0 }; + yield return new object[] { -3.1415926535897932, double.NaN, 0.0 }; // value: -(pi) + yield return new object[] { -2.7182818284590452, double.NaN, 0.0 }; // value: -(e) + yield return new object[] { -1.4142135623730950, double.NaN, 0.0 }; // value: -(sqrt(2)) + yield return new object[] { -1.0, double.NaN, 0.0 }; + yield return new object[] { -0.69314718055994531, double.NaN, 0.0 }; // value: -(ln(2)) + yield return new object[] { -0.43429448190325183, double.NaN, 0.0 }; // value: -(log10(e)) + yield return new object[] { -0.0, double.NegativeInfinity, 0.0 }; + yield return new object[] { double.NaN, double.NaN, 0.0 }; + yield return new object[] { 0.0, double.NegativeInfinity, 0.0 }; + yield return new object[] { 0.043213918263772250, -3.1415926535897932, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(pi) + yield return new object[] { 0.065988035845312537, -2.7182818284590452, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(e) + yield return new object[] { 0.1, -2.3025850929940457, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(ln(10)) + yield return new object[] { 0.20787957635076191, -1.5707963267948966, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(pi / 2) + yield return new object[] { 0.23629008834452270, -1.4426950408889634, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(log2(e)) + yield return new object[] { 0.24311673443421421, -1.4142135623730950, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(sqrt(2)) + yield return new object[] { 0.32355726390307110, -1.1283791670955126, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: -(2 / sqrt(pi)) + yield return new object[] { 0.36787944117144232, -1.0, 0.0f }; + yield return new object[] { 0.45593812776599624, -0.78539816339744831, DoubleCrossPlatformMachineEpsilon }; // expected: -(pi / 4) + yield return new object[] { 0.49306869139523979, -0.70710678118654752, DoubleCrossPlatformMachineEpsilon }; // expected: -(1 / sqrt(2)) + yield return new object[] { 0.5, -0.69314718055994531, DoubleCrossPlatformMachineEpsilon }; // expected: -(ln(2)) + yield return new object[] { 0.52907780826773535, -0.63661977236758134, DoubleCrossPlatformMachineEpsilon }; // expected: -(2 / pi) + yield return new object[] { 0.64772148514180065, -0.43429448190325183, DoubleCrossPlatformMachineEpsilon }; // expected: -(log10(e)) + yield return new object[] { 0.72737734929521647, -0.31830988618379067, DoubleCrossPlatformMachineEpsilon }; // expected: -(1 / pi) + yield return new object[] { 1.0, 0.0, 0.0 }; + yield return new object[] { 1.3748022274393586, 0.31830988618379067, DoubleCrossPlatformMachineEpsilon }; // expected: (1 / pi) + yield return new object[] { 1.5438734439711811, 0.43429448190325183, DoubleCrossPlatformMachineEpsilon }; // expected: (log10(e)) + yield return new object[] { 1.8900811645722220, 0.63661977236758134, DoubleCrossPlatformMachineEpsilon }; // expected: (2 / pi) + yield return new object[] { 2.0, 0.69314718055994531, DoubleCrossPlatformMachineEpsilon }; // expected: (ln(2)) + yield return new object[] { 2.0281149816474725, 0.70710678118654752, DoubleCrossPlatformMachineEpsilon }; // expected: (1 / sqrt(2)) + yield return new object[] { 2.1932800507380155, 0.78539816339744831, DoubleCrossPlatformMachineEpsilon }; // expected: (pi / 4) + yield return new object[] { 2.7182818284590452, 1.0, 0.0f }; // value: (e) + yield return new object[] { 3.0906430223107976, 1.1283791670955126, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (2 / sqrt(pi)) + yield return new object[] { 4.1132503787829275, 1.4142135623730950, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (sqrt(2)) + yield return new object[] { 4.2320861065570819, 1.4426950408889634, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (log2(e)) + yield return new object[] { 4.8104773809653517, 1.5707963267948966, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (pi / 2) + yield return new object[] { 10.0, 2.3025850929940457, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (ln(10)) + yield return new object[] { 15.154262241479264, 2.7182818284590452, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (e) + yield return new object[] { 23.140692632779269, 3.1415926535897932, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (pi) + yield return new object[] { double.PositiveInfinity, double.PositiveInfinity, 0.0 }; + } + } + + public static IEnumerable LogSingle + { + get + { + yield return new object[] { float.NegativeInfinity, float.NaN, 0.0f }; + yield return new object[] { -3.14159265f, float.NaN, 0.0f }; // value: -(pi) + yield return new object[] { -2.71828183f, float.NaN, 0.0f }; // value: -(e) + yield return new object[] { -1.41421356f, float.NaN, 0.0f }; // value: -(sqrt(2)) + yield return new object[] { -1.0f, float.NaN, 0.0f }; + yield return new object[] { -0.693147181f, float.NaN, 0.0f }; // value: -(ln(2)) + yield return new object[] { -0.434294482f, float.NaN, 0.0f }; // value: -(log10(e)) + yield return new object[] { -0.0f, float.NegativeInfinity, 0.0f }; + yield return new object[] { float.NaN, float.NaN, 0.0f }; + yield return new object[] { 0.0f, float.NegativeInfinity, 0.0f }; + yield return new object[] { 0.0432139183f, -3.14159265f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(pi) + yield return new object[] { 0.0659880358f, -2.71828183f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(e) + yield return new object[] { 0.1f, -2.30258509f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(ln(10)) + yield return new object[] { 0.207879576f, -1.57079633f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(pi / 2) + yield return new object[] { 0.236290088f, -1.44269504f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(log2(e)) + yield return new object[] { 0.243116734f, -1.41421356f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(sqrt(2)) + yield return new object[] { 0.323557264f, -1.12837917f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: -(2 / sqrt(pi)) + yield return new object[] { 0.367879441f, -1.0f, 0.0f }; + yield return new object[] { 0.455938128f, -0.785398163f, SingleCrossPlatformMachineEpsilon }; // expected: -(pi / 4) + yield return new object[] { 0.493068691f, -0.707106781f, SingleCrossPlatformMachineEpsilon }; // expected: -(1 / sqrt(2)) + yield return new object[] { 0.5f, -0.693147181f, SingleCrossPlatformMachineEpsilon }; // expected: -(ln(2)) + yield return new object[] { 0.529077808f, -0.636619772f, SingleCrossPlatformMachineEpsilon }; // expected: -(2 / pi) + yield return new object[] { 0.647721485f, -0.434294482f, SingleCrossPlatformMachineEpsilon }; // expected: -(log10(e)) + yield return new object[] { 0.727377349f, -0.318309886f, SingleCrossPlatformMachineEpsilon }; // expected: -(1 / pi) + yield return new object[] { 1.0f, 0.0f, 0.0f }; + yield return new object[] { 1.37480223f, 0.318309886f, SingleCrossPlatformMachineEpsilon }; // expected: (1 / pi) + yield return new object[] { 1.54387344f, 0.434294482f, SingleCrossPlatformMachineEpsilon }; // expected: (log10(e)) + yield return new object[] { 1.89008116f, 0.636619772f, SingleCrossPlatformMachineEpsilon }; // expected: (2 / pi) + yield return new object[] { 2.0f, 0.693147181f, SingleCrossPlatformMachineEpsilon }; // expected: (ln(2)) + yield return new object[] { 2.02811498f, 0.707106781f, SingleCrossPlatformMachineEpsilon }; // expected: (1 / sqrt(2)) + yield return new object[] { 2.19328005f, 0.785398163f, SingleCrossPlatformMachineEpsilon }; // expected: (pi / 4) + yield return new object[] { 2.71828183f, 1.0f, 0.0f }; // value: (e) + yield return new object[] { 3.09064302f, 1.12837917f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (2 / sqrt(pi)) + yield return new object[] { 4.11325038f, 1.41421356f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (sqrt(2)) + yield return new object[] { 4.23208611f, 1.44269504f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (log2(e)) + yield return new object[] { 4.81047738f, 1.57079633f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (pi / 2) + yield return new object[] { 10.0f, 2.30258509f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (ln(10)) + yield return new object[] { 15.1542622f, 2.71828183f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (e) + yield return new object[] { 23.1406926f, 3.14159265f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (pi) + yield return new object[] { float.PositiveInfinity, float.PositiveInfinity, 0.0f }; + } + } + public static IEnumerable Log2Double { get From 296124145427b786c9ebcd7978ee4a2fba491380 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 Jan 2024 12:26:33 -0800 Subject: [PATCH 2/4] Accelerate TensorPrimitives.Log for double --- .../netcore/TensorPrimitives.netcore.cs | 419 +++++++++++++++++- 1 file changed, 401 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 5d06bd523c6bfd..9c41234cef3f92 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -11470,6 +11470,397 @@ public static Vector512 Invoke(Vector512 t) /// T.Log(x) internal readonly struct LogOperator : IUnaryOperator where T : ILogarithmicFunctions + { + public static bool Vectorizable => (typeof(T) == typeof(double)) + || (typeof(T) == typeof(float)); + + public static T Invoke(T x) => T.Log(x); + + public static Vector128 Invoke(Vector128 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector128.Log(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector128.Log(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return LogOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return LogOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + + public static Vector256 Invoke(Vector256 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector256.Log(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector256.Log(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return LogOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return LogOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + + public static Vector512 Invoke(Vector512 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector512.Log(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector512.Log(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return LogOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return LogOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + } + +#if !NET9_0_OR_GREATER + /// double.Log(x) + internal readonly struct LogOperatorDouble : IUnaryOperator + { + // This code is based on `vrd2_log` from amd/aocl-libm-ose + // Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Reduce x into the form: + // x = (-1)^s*2^n*m + // s will be always zero, as log is defined for positive numbers + // n is an integer known as the exponent + // m is mantissa + // + // x is reduced such that the mantissa, m lies in [2/3,4/3] + // x = 2^n*m where m is in [2/3,4/3] + // log(x) = log(2^n*m) We have log(a*b) = log(a)+log(b) + // = log(2^n) + log(m) We have log(a^n) = n*log(a) + // = n*log(2) + log(m) + // = n*log(2) + log(1+(m-1)) + // = n*log(2) + log(1+f) Where f = m-1 + // = n*log(2) + log1p(f) f lies in [-1/3,+1/3] + // + // Thus we have : + // log(x) = n*log(2) + log1p(f) + // In the above, the first term n*log(2), n can be calculated by using right shift operator and the value of log(2) + // is known and is stored as a constant + // The second term log1p(F) is approximated by using a polynomial + + private const ulong V_MIN = 0x00100000_00000000; // SmallestNormal + private const ulong V_MAX = 0x7FF00000_00000000; // +Infinity + private const ulong V_MSK = 0x000FFFFF_FFFFFFFF; // (1 << 52) - 1 + private const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0 + + private const double LN2_HEAD = 0.693359375; + private const double LN2_TAIL = -0.00021219444005469057; + + private const double C02 = -0.499999999999999560; + private const double C03 = +0.333333333333414750; + private const double C04 = -0.250000000000297430; + private const double C05 = +0.199999999975985220; + private const double C06 = -0.166666666608919500; + private const double C07 = +0.142857145600277100; + private const double C08 = -0.125000005127831270; + private const double C09 = +0.111110952357159440; + private const double C10 = -0.099999750495501240; + private const double C11 = +0.090914349823462390; + private const double C12 = -0.083340600527551860; + private const double C13 = +0.076817603328311300; + private const double C14 = -0.071296718946287310; + private const double C15 = +0.067963465211535730; + private const double C16 = -0.063995035098960040; + private const double C17 = +0.049370587082412105; + private const double C18 = -0.045370170994891980; + private const double C19 = +0.088970636003577750; + private const double C20 = -0.086906174116908760; + + public static bool Vectorizable => true; + + public static double Invoke(double x) => double.Log(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 specialResult = x; + + // x is zero, subnormal, infinity, or NaN + Vector128 specialMask = Vector128.GreaterThanOrEqual(x.AsUInt64() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN)); + + if (specialMask != Vector128.Zero) + { + Vector128 xBits = x.AsInt64(); + + // (x < 0) ? float.NaN : x + Vector128 lessThanZeroMask = Vector128.LessThan(xBits, Vector128.Zero).AsDouble(); + + specialResult = Vector128.ConditionalSelect( + lessThanZeroMask, + Vector128.Create(double.NaN), + specialResult + ); + + // double.IsZero(x) ? double.NegativeInfinity : x + Vector128 zeroMask = Vector128.Equals(xBits << 1, Vector128.Zero).AsDouble(); + + specialResult = Vector128.ConditionalSelect( + zeroMask, + Vector128.Create(double.NegativeInfinity), + specialResult + ); + + // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x) + Vector128 temp = zeroMask + | lessThanZeroMask + | Vector128.GreaterThanOrEqual(xBits, Vector128.Create(double.PositiveInfinity).AsInt64()).AsDouble(); + + // subnormal + Vector128 subnormalMask = Vector128.AndNot(specialMask.AsDouble(), temp); + + // multiply by 2^52, then normalize + x = Vector128.ConditionalSelect( + subnormalMask, + ((x * 4503599627370496.0).AsUInt64() - Vector128.Create(52ul << 52)).AsDouble(), + x + ); + + specialMask = temp.AsUInt64(); + } + + // Reduce the mantissa to [+2/3, +4/3] + Vector128 vx = x.AsUInt64() - Vector128.Create(V_OFF); + Vector128 n = Vector128.ConvertToDouble(vx.AsInt64() >> 52); + vx = (vx & Vector128.Create(V_MSK)) + Vector128.Create(V_OFF); + + // Adjust the mantissa to [-1/3, +1/3] + Vector128 r = vx.AsDouble() - Vector128.One; + + Vector128 r02 = r * r; + Vector128 r04 = r02 * r02; + Vector128 r08 = r04 * r04; + Vector128 r16 = r08 * r08; + + // Compute log(x + 1) using Polynomial approximation + // C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20) + + Vector128 poly = (((r04 * C20) + + ((((r * C19) + Vector128.Create(C18)) * r02) + + ((r * C17) + Vector128.Create(C16)))) * r16) + + (((((((r * C15) + Vector128.Create(C14)) * r02) + + ((r * C13) + Vector128.Create(C12))) * r04) + + ((((r * C11) + Vector128.Create(C10)) * r02) + + ((r * C09) + Vector128.Create(C08)))) * r08) + + (((((r * C07) + Vector128.Create(C06)) * r02) + + ((r * C05) + Vector128.Create(C04))) * r04) + + ((((r * C03) + Vector128.Create(C02)) * r02) + r); + + return Vector128.ConditionalSelect( + specialMask.AsDouble(), + specialResult, + (n * LN2_HEAD) + ((n * LN2_TAIL) + poly) + ); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 specialResult = x; + + // x is zero, subnormal, infinity, or NaN + Vector256 specialMask = Vector256.GreaterThanOrEqual(x.AsUInt64() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN)); + + if (specialMask != Vector256.Zero) + { + Vector256 xBits = x.AsInt64(); + + // (x < 0) ? float.NaN : x + Vector256 lessThanZeroMask = Vector256.LessThan(xBits, Vector256.Zero).AsDouble(); + + specialResult = Vector256.ConditionalSelect( + lessThanZeroMask, + Vector256.Create(double.NaN), + specialResult + ); + + // double.IsZero(x) ? double.NegativeInfinity : x + Vector256 zeroMask = Vector256.Equals(xBits << 1, Vector256.Zero).AsDouble(); + + specialResult = Vector256.ConditionalSelect( + zeroMask, + Vector256.Create(double.NegativeInfinity), + specialResult + ); + + // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x) + Vector256 temp = zeroMask + | lessThanZeroMask + | Vector256.GreaterThanOrEqual(xBits, Vector256.Create(double.PositiveInfinity).AsInt64()).AsDouble(); + + // subnormal + Vector256 subnormalMask = Vector256.AndNot(specialMask.AsDouble(), temp); + + // multiply by 2^52, then normalize + x = Vector256.ConditionalSelect( + subnormalMask, + ((x * 4503599627370496.0).AsUInt64() - Vector256.Create(52ul << 52)).AsDouble(), + x + ); + + specialMask = temp.AsUInt64(); + } + + // Reduce the mantissa to [+2/3, +4/3] + Vector256 vx = x.AsUInt64() - Vector256.Create(V_OFF); + Vector256 n = Vector256.ConvertToDouble(vx.AsInt64() >> 52); + vx = (vx & Vector256.Create(V_MSK)) + Vector256.Create(V_OFF); + + // Adjust the mantissa to [-1/3, +1/3] + Vector256 r = vx.AsDouble() - Vector256.One; + + Vector256 r02 = r * r; + Vector256 r04 = r02 * r02; + Vector256 r08 = r04 * r04; + Vector256 r16 = r08 * r08; + + // Compute log(x + 1) using Polynomial approximation + // C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20) + + Vector256 poly = (((r04 * C20) + + ((((r * C19) + Vector256.Create(C18)) * r02) + + ((r * C17) + Vector256.Create(C16)))) * r16) + + (((((((r * C15) + Vector256.Create(C14)) * r02) + + ((r * C13) + Vector256.Create(C12))) * r04) + + ((((r * C11) + Vector256.Create(C10)) * r02) + + ((r * C09) + Vector256.Create(C08)))) * r08) + + (((((r * C07) + Vector256.Create(C06)) * r02) + + ((r * C05) + Vector256.Create(C04))) * r04) + + ((((r * C03) + Vector256.Create(C02)) * r02) + r); + + return Vector256.ConditionalSelect( + specialMask.AsDouble(), + specialResult, + (n * LN2_HEAD) + ((n * LN2_TAIL) + poly) + ); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 specialResult = x; + + // x is zero, subnormal, infinity, or NaN + Vector512 specialMask = Vector512.GreaterThanOrEqual(x.AsUInt64() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN)); + + if (specialMask != Vector512.Zero) + { + Vector512 xBits = x.AsInt64(); + + // (x < 0) ? float.NaN : x + Vector512 lessThanZeroMask = Vector512.LessThan(xBits, Vector512.Zero).AsDouble(); + + specialResult = Vector512.ConditionalSelect( + lessThanZeroMask, + Vector512.Create(double.NaN), + specialResult + ); + + // double.IsZero(x) ? double.NegativeInfinity : x + Vector512 zeroMask = Vector512.Equals(xBits << 1, Vector512.Zero).AsDouble(); + + specialResult = Vector512.ConditionalSelect( + zeroMask, + Vector512.Create(double.NegativeInfinity), + specialResult + ); + + // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x) + Vector512 temp = zeroMask + | lessThanZeroMask + | Vector512.GreaterThanOrEqual(xBits, Vector512.Create(double.PositiveInfinity).AsInt64()).AsDouble(); + + // subnormal + Vector512 subnormalMask = Vector512.AndNot(specialMask.AsDouble(), temp); + + // multiply by 2^52, then normalize + x = Vector512.ConditionalSelect( + subnormalMask, + ((x * 4503599627370496.0).AsUInt64() - Vector512.Create(52ul << 52)).AsDouble(), + x + ); + + specialMask = temp.AsUInt64(); + } + + // Reduce the mantissa to [+2/3, +4/3] + Vector512 vx = x.AsUInt64() - Vector512.Create(V_OFF); + Vector512 n = Vector512.ConvertToDouble(vx.AsInt64() >> 52); + vx = (vx & Vector512.Create(V_MSK)) + Vector512.Create(V_OFF); + + // Adjust the mantissa to [-1/3, +1/3] + Vector512 r = vx.AsDouble() - Vector512.One; + + Vector512 r02 = r * r; + Vector512 r04 = r02 * r02; + Vector512 r08 = r04 * r04; + Vector512 r16 = r08 * r08; + + // Compute log(x + 1) using Polynomial approximation + // C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20) + + Vector512 poly = (((r04 * C20) + + ((((r * C19) + Vector512.Create(C18)) * r02) + + ((r * C17) + Vector512.Create(C16)))) * r16) + + (((((((r * C15) + Vector512.Create(C14)) * r02) + + ((r * C13) + Vector512.Create(C12))) * r04) + + ((((r * C11) + Vector512.Create(C10)) * r02) + + ((r * C09) + Vector512.Create(C08)))) * r08) + + (((((r * C07) + Vector512.Create(C06)) * r02) + + ((r * C05) + Vector512.Create(C04))) * r04) + + ((((r * C03) + Vector512.Create(C02)) * r02) + r); + + return Vector512.ConditionalSelect( + specialMask.AsDouble(), + specialResult, + (n * LN2_HEAD) + ((n * LN2_TAIL) + poly) + ); + } + } + + /// float.Log(x) + internal readonly struct LogOperatorSingle : IUnaryOperator { // This code is based on `vrs4_logf` from amd/aocl-libm-ose // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. @@ -11542,15 +11933,12 @@ public static Vector512 Invoke(Vector512 t) private const float C9 = 0.14401625f; private const float C10 = -0.13657966f; - public static bool Vectorizable => typeof(T) == typeof(float); + public static bool Vectorizable => true; - public static T Invoke(T x) => T.Log(x); + public static float Invoke(float x) => float.Log(x); - public static Vector128 Invoke(Vector128 t) + public static Vector128 Invoke(Vector128 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector128 x = t.AsSingle(); - Vector128 specialResult = x; // x is subnormal or infinity or NaN @@ -11615,14 +12003,11 @@ public static Vector128 Invoke(Vector128 t) specialMask.AsSingle(), specialResult, n * Vector128.Create(V_LN2) + q - ).As(); + ); } - public static Vector256 Invoke(Vector256 t) + public static Vector256 Invoke(Vector256 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector256 x = t.AsSingle(); - Vector256 specialResult = x; // x is subnormal or infinity or NaN @@ -11687,14 +12072,11 @@ public static Vector256 Invoke(Vector256 t) specialMask.AsSingle(), specialResult, n * Vector256.Create(V_LN2) + q - ).As(); + ); } - public static Vector512 Invoke(Vector512 t) + public static Vector512 Invoke(Vector512 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector512 x = t.AsSingle(); - Vector512 specialResult = x; // x is subnormal or infinity or NaN @@ -11759,9 +12141,10 @@ public static Vector512 Invoke(Vector512 t) specialMask.AsSingle(), specialResult, n * Vector512.Create(V_LN2) + q - ).As(); + ); } } +#endif /// T.Log2(x) internal readonly struct Log2Operator : IUnaryOperator @@ -11880,7 +12263,7 @@ public static Vector512 Invoke(Vector512 x) private const ulong V_MIN = 0x00100000_00000000; // SmallestNormal private const ulong V_MAX = 0x7FF00000_00000000; // +Infinity private const ulong V_MSK = 0x000FFFFF_FFFFFFFF; // (1 << 52) - 1 - private const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0\ + private const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0 private const double LN2_HEAD = 1.44269180297851562500E+00; private const double LN2_TAIL = 3.23791044778235969970E-06; From f16dc654812c11cfb4f961ca26bbde47381e6f88 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 Jan 2024 12:40:40 -0800 Subject: [PATCH 3/4] Ensure the ref assembly is updated to include the new Log method --- .../ref/System.Runtime.Intrinsics.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 2ce1c80ddd229a..1722583850be01 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -185,6 +185,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, public static System.Runtime.Intrinsics.Vector128 LoadUnsafe(ref readonly T source) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector128 LoadUnsafe(ref readonly T source, nuint elementOffset) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Log(System.Runtime.Intrinsics.Vector128 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Log(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Log2(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Log2(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Max(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -514,6 +516,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, public static System.Runtime.Intrinsics.Vector256 LoadUnsafe(ref readonly T source) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector256 LoadUnsafe(ref readonly T source, nuint elementOffset) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Log(System.Runtime.Intrinsics.Vector256 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Log(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Log2(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Log2(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Max(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } @@ -843,6 +847,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector512 vector, public static System.Runtime.Intrinsics.Vector512 LoadUnsafe(ref readonly T source) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector512 LoadUnsafe(ref readonly T source, nuint elementOffset) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Log(System.Runtime.Intrinsics.Vector512 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Log(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Log2(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Log2(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Max(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } @@ -1144,6 +1150,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, public static System.Runtime.Intrinsics.Vector64 LoadUnsafe(ref readonly T source) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector64 LoadUnsafe(ref readonly T source, nuint elementOffset) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Log(System.Runtime.Intrinsics.Vector64 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Log(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Log2(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Log2(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Max(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } From 4514b8c98b62d892240249093099dabe5005aa83 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 Jan 2024 14:20:51 -0800 Subject: [PATCH 4/4] Fix the variance for one of the Log2 tests to account for the scalar fallback --- .../tests/Vectors/VectorTestMemberData.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs index ec64c8849be846..cd2a20baa374d7 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs @@ -70,7 +70,7 @@ public static IEnumerable LogDouble yield return new object[] { 2.0, 0.69314718055994531, DoubleCrossPlatformMachineEpsilon }; // expected: (ln(2)) yield return new object[] { 2.0281149816474725, 0.70710678118654752, DoubleCrossPlatformMachineEpsilon }; // expected: (1 / sqrt(2)) yield return new object[] { 2.1932800507380155, 0.78539816339744831, DoubleCrossPlatformMachineEpsilon }; // expected: (pi / 4) - yield return new object[] { 2.7182818284590452, 1.0, 0.0f }; // value: (e) + yield return new object[] { 2.7182818284590452, 1.0, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (e) yield return new object[] { 3.0906430223107976, 1.1283791670955126, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (2 / sqrt(pi)) yield return new object[] { 4.1132503787829275, 1.4142135623730950, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (sqrt(2)) yield return new object[] { 4.2320861065570819, 1.4426950408889634, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (log2(e)) @@ -117,7 +117,7 @@ public static IEnumerable LogSingle yield return new object[] { 2.0f, 0.693147181f, SingleCrossPlatformMachineEpsilon }; // expected: (ln(2)) yield return new object[] { 2.02811498f, 0.707106781f, SingleCrossPlatformMachineEpsilon }; // expected: (1 / sqrt(2)) yield return new object[] { 2.19328005f, 0.785398163f, SingleCrossPlatformMachineEpsilon }; // expected: (pi / 4) - yield return new object[] { 2.71828183f, 1.0f, 0.0f }; // value: (e) + yield return new object[] { 2.71828183f, 1.0f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (e) yield return new object[] { 3.09064302f, 1.12837917f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (2 / sqrt(pi)) yield return new object[] { 4.11325038f, 1.41421356f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (sqrt(2)) yield return new object[] { 4.23208611f, 1.44269504f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (log2(e))