[vm/simarm] Fix VRECPS/VRSQRTSQS instruction implementation.

mraleph · commit-bot@chromium.org · commit 6e042efd95ed · 2019-10-15T20:27:12.000Z
This instruction handles 0.0 and infinity operands specially because otherwise it produces NaN where it should produce appropriate infinity or zero. Fixes #24399 Fixes #26675 Fixes #38844 This relands commit 3da9c34 Change-Id: I116cea3b6c27b5dc16741f9652fbbb9a3ec1194e Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/121705 Commit-Queue: Vyacheslav Egorov <vegorov@google.com> Reviewed-by: Alexander Markov <alexmarkov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm_test.cc b/runtime/vm/compiler/assembler/assembler_arm_test.cc
@@ -14,6 +14,54 @@
 namespace dart {
 namespace compiler {
 
+TEST_CASE(ReciprocalOps) {
+  EXPECT_EQ(true, isinf(ReciprocalEstimate(-0.0f)));
+  EXPECT_EQ(true, signbit(ReciprocalEstimate(-0.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalEstimate(0.0f)));
+  EXPECT_EQ(true, !signbit(ReciprocalEstimate(0.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalEstimate(NAN)));
+
+#define AS_UINT32(v) (bit_cast<uint32_t, float>(v))
+#define EXPECT_BITWISE_EQ(a, b) EXPECT_EQ(AS_UINT32(a), AS_UINT32(b))
+
+  EXPECT_BITWISE_EQ(0.0f, ReciprocalEstimate(kPosInfinity));
+  EXPECT_BITWISE_EQ(-0.0f, ReciprocalEstimate(kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, -0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, -0.0f));
+  EXPECT_EQ(true, isnan(ReciprocalStep(NAN, 1.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalStep(1.0f, NAN)));
+
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(kNegInfinity)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(-0.0f)));
+  EXPECT_EQ(true, signbit(ReciprocalSqrtEstimate(-0.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(0.0f)));
+  EXPECT_EQ(true, !signbit(ReciprocalSqrtEstimate(0.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(NAN)));
+  EXPECT_BITWISE_EQ(0.0f, ReciprocalSqrtEstimate(kPosInfinity));
+
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, -0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, -0.0f));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtStep(NAN, 1.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtStep(1.0f, NAN)));
+
+#undef AS_UINT32
+#undef EXPECT_BITWISE_EQ
+}
+
 #define __ assembler->
 
 ASSEMBLER_TEST_GENERATE(Simple, assembler) {
@@ -3430,43 +3478,6 @@ ASSEMBLER_TEST_RUN(Vmaxqs, test) {
   }
 }
 
-// This is the same function as in the Simulator.
-static float arm_recip_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-85.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
-  uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-                    ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  // result_exp = 253 - UInt(a<30:23>)
-  int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
-  ASSERT((result_exp >= 1) && (result_exp <= 252));
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
-
-  // a in units of 1/512 rounded down.
-  int32_t q = static_cast<int32_t>(scaled_d * 512.0);
-  // reciprocal r.
-  double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = sign : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
 ASSEMBLER_TEST_GENERATE(Vrecpeqs, assembler) {
   if (TargetCPUFeatures::neon_supported()) {
     __ LoadSImmediate(S4, 147.0);
@@ -3483,7 +3494,7 @@ ASSEMBLER_TEST_RUN(Vrecpeqs, test) {
   if (TargetCPUFeatures::neon_supported()) {
     typedef float (*Vrecpeqs)() DART_UNUSED;
     float res = EXECUTE_TEST_CODE_FLOAT(Vrecpeqs, test->entry());
-    EXPECT_FLOAT_EQ(arm_recip_estimate(147.0), res, 0.0001f);
+    EXPECT_FLOAT_EQ(ReciprocalEstimate(147.0), res, 0.0001f);
   }
 }
 
@@ -3540,60 +3551,6 @@ ASSEMBLER_TEST_RUN(Reciprocal, test) {
   }
 }
 
-static float arm_reciprocal_sqrt_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-87.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  uint64_t scaled;
-  if (((a_bits >> 23) & 1) != 0) {
-    // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fd) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  } else {
-    // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  }
-  // result_exp = (380 - UInt(operand<30:23>) DIV 2;
-  int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
-
-  double r;
-  if (scaled_d < 0.5) {
-    // range 0.25 <= a < 0.5
-
-    // a in units of 1/512 rounded down.
-    int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
-  } else {
-    // range 0.5 <= a < 1.0
-
-    // a in units of 1/256 rounded down.
-    int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
-  }
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = 0 : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
 ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) {
   if (TargetCPUFeatures::neon_supported()) {
     __ LoadSImmediate(S4, 147.0);
@@ -3611,7 +3568,7 @@ ASSEMBLER_TEST_RUN(Vrsqrteqs, test) {
   if (TargetCPUFeatures::neon_supported()) {
     typedef float (*Vrsqrteqs)() DART_UNUSED;
     float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry());
-    EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f);
+    EXPECT_FLOAT_EQ(ReciprocalSqrtEstimate(147.0), res, 0.0001f);
   }
 }
 
diff --git a/runtime/vm/constants_arm.cc b/runtime/vm/constants_arm.cc
@@ -7,6 +7,8 @@
 
 namespace arch_arm {
 
+using dart::bit_cast;
+
 const char* cpu_reg_names[kNumberOfCpuRegisters] = {
     "r0", "r1",  "r2", "r3", "r4", "r5", "r6", "r7",
     "r8", "ctx", "pp", "fp", "ip", "sp", "lr", "pc",
@@ -25,4 +27,116 @@ const Register CallingConventions::ArgumentRegisters[] = {R0, R1, R2, R3};
 // one element to appease MSVC.
 const FpuRegister CallingConventions::FpuArgumentRegisters[] = {Q0};
 
+float ReciprocalEstimate(float a) {
+  // From the ARM Architecture Reference Manual A2-85.
+  if (isinf(a) || (fabs(a) >= exp2f(126)))
+    return a >= 0.0f ? 0.0f : -0.0f;
+  else if (a == 0.0f)
+    return 1.0f / a;
+  else if (isnan(a))
+    return a;
+
+  uint32_t a_bits = bit_cast<uint32_t, float>(a);
+  // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
+  uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
+                    ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  // result_exp = 253 - UInt(a<30:23>)
+  int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
+  ASSERT((result_exp >= 1) && (result_exp <= 252));
+
+  double scaled_d = bit_cast<double, uint64_t>(scaled);
+  ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
+
+  // a in units of 1/512 rounded down.
+  int32_t q = static_cast<int32_t>(scaled_d * 512.0);
+  // reciprocal r.
+  double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
+  // r in units of 1/256 rounded to nearest.
+  int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
+  double estimate = static_cast<double>(s) / 256.0;
+  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
+
+  // result = sign : result_exp<7:0> : estimate<51:29>
+  int32_t result_bits =
+      (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
+      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
+  return bit_cast<float, int32_t>(result_bits);
+}
+
+float ReciprocalStep(float op1, float op2) {
+  float p;
+  if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
+    p = 0.0f;
+  } else {
+    p = op1 * op2;
+  }
+  return 2.0f - p;
+}
+
+float ReciprocalSqrtEstimate(float a) {
+  // From the ARM Architecture Reference Manual A2-87.
+  if (a < 0.0f)
+    return NAN;
+  else if (isinf(a) || (fabs(a) >= exp2f(126)))
+    return 0.0f;
+  else if (a == 0.0)
+    return 1.0f / a;
+  else if (isnan(a))
+    return a;
+
+  uint32_t a_bits = bit_cast<uint32_t, float>(a);
+  uint64_t scaled;
+  if (((a_bits >> 23) & 1) != 0) {
+    // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
+    scaled = (static_cast<uint64_t>(0x3fd) << 52) |
+             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  } else {
+    // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
+    scaled = (static_cast<uint64_t>(0x3fe) << 52) |
+             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  }
+  // result_exp = (380 - UInt(operand<30:23>) DIV 2;
+  int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
+
+  double scaled_d = bit_cast<double, uint64_t>(scaled);
+  ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
+
+  double r;
+  if (scaled_d < 0.5) {
+    // range 0.25 <= a < 0.5
+
+    // a in units of 1/512 rounded down.
+    int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
+    // reciprocal root r.
+    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
+  } else {
+    // range 0.5 <= a < 1.0
+
+    // a in units of 1/256 rounded down.
+    int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
+    // reciprocal root r.
+    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
+  }
+  // r in units of 1/256 rounded to nearest.
+  int32_t s = static_cast<int>(256.0 * r + 0.5);
+  double estimate = static_cast<double>(s) / 256.0;
+  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
+
+  // result = 0 : result_exp<7:0> : estimate<51:29>
+  int32_t result_bits =
+      ((result_exp & 0xff) << 23) |
+      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
+  return bit_cast<float, int32_t>(result_bits);
+}
+
+float ReciprocalSqrtStep(float op1, float op2) {
+  float p;
+  if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
+    p = 0.0f;
+  } else {
+    p = op1 * op2;
+  }
+  return (3.0f - p) / 2.0f;
+}
+
 }  // namespace arch_arm
diff --git a/runtime/vm/constants_arm.h b/runtime/vm/constants_arm.h
@@ -822,6 +822,16 @@ class Instr {
   DISALLOW_IMPLICIT_CONSTRUCTORS(Instr);
 };
 
+// Floating-point reciprocal estimate and step (see pages A2-85 and A2-86 of
+// ARM Architecture Reference Manual ARMv7-A edition).
+float ReciprocalEstimate(float op);
+float ReciprocalStep(float op1, float op2);
+
+// Floating-point reciprocal square root estimate and step (see pages A2-87 to
+// A2-90 of ARM Architecture Reference Manual ARMv7-A edition).
+float ReciprocalSqrtEstimate(float op);
+float ReciprocalSqrtStep(float op1, float op2);
+
 }  // namespace arch_arm
 
 #endif  // RUNTIME_VM_CONSTANTS_ARM_H_
diff --git a/runtime/vm/simulator_arm.cc b/runtime/vm/simulator_arm.cc