Backport from mainline

author olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)

committer olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)
author olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)
committer olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index db5f624331fee774ea8dce19345c7055350dac17..26fe42e05436a7badb233a0c9b716d31d6302b9e 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2013-03-06  Oleg Endo  <olegendo@gcc.gnu.org>
+
+       Backport from mainline
+       2013-03-06  Oleg Endo  <olegendo@gcc.gnu.org>
+
+       PR target/56529
+       * config/sh/sh.c (sh_option_override): Check for TARGET_DYNSHIFT
+       instead of TARGET_SH2 for call-table case.  Do not set sh_div_strategy
+       to SH_DIV_CALL_TABLE for TARGET_SH2.
+       * config.gcc (sh_multilibs): Add m2 and m2a to sh*-*-linux* multilib
+       list.
+       * doc/invoke.texi (SH options): Use table for mdiv= option.  Document
+       mdiv= call-div1, call-fp, call-table options.
+
  2013-02-22  Sebastian Huber <sebastian.huber@embedded-brains.de>
  
         * config.gcc (arm*-*-rtemself*): New.
diff --git a/gcc/config.gcc b/gcc/config.gcc

index c161d215f670d647ab1d83d372917023c7ddcf63..7282a68cc40102a6852b9f39bbdccbd53d07c12f 100644 (file)
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2355,7 +2355,7 @@ sh-*-elf* | sh[12346l]*-*-elf* | \
                 sh[1234]*)      sh_multilibs=${sh_cpu_target} ;;
                 sh64* | sh5*)   sh_multilibs=m5-32media,m5-32media-nofpu,m5-compact,m5-compact-nofpu,m5-64media,m5-64media-nofpu ;;
                 sh-superh-*)    sh_multilibs=m4,m4-single,m4-single-only,m4-nofpu ;;
-               sh*-*-linux*)   sh_multilibs=m1,m3e,m4 ;;
+               sh*-*-linux*)   sh_multilibs=m1,m2,m2a,m3e,m4 ;;
                 sh*-*-netbsd*)  sh_multilibs=m3,m3e,m4 ;;
                 *) sh_multilibs=m1,m2,m2e,m4,m4-single,m4-single-only,m2a,m2a-single ;;
                 esac
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c

index f828ed4972179c323ff495088282ce874af664a7..1c1ab62cd1ea6c847489b69b869b07a7bd4cf639 100644 (file)
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -686,7 +686,8 @@ sh_option_override (void)
                    || (TARGET_HARD_SH4 && TARGET_SH2E)
                    || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
         sh_div_strategy = SH_DIV_CALL_FP;
-      else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
+      else if (! strcmp (sh_div_str, "call-table")
+              && (TARGET_SH3 || TARGET_SH2A))
         sh_div_strategy = SH_DIV_CALL_TABLE;
        else
         /* Pick one that makes most sense for the target in general.
@@ -706,8 +707,6 @@ sh_option_override (void)
           sh_div_strategy = SH_DIV_CALL_FP;
          /* SH1 .. SH3 cores often go into small-footprint systems, so
            default to the smallest implementation available.  */
-       else if (TARGET_SH2)    /* ??? EXPERIMENTAL */
-         sh_div_strategy = SH_DIV_CALL_TABLE;
         else
           sh_div_strategy = SH_DIV_CALL_DIV1;
      }
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 93a8968f12cb42aa3404a382a7e36e4cab623799..0a53235f6b28e230277aad3a29ecfcfaae9894f5 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18066,43 +18066,94 @@ Set the cost to assume for a multiply insn.
  
  @item -mdiv=@var{strategy}
  @opindex mdiv=@var{strategy}
-Set the division strategy to use for SHmedia code.  @var{strategy} must be
-one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call,
-inv:call2, inv:fp .
-"fp" performs the operation in floating point.  This has a very high latency,
+Set the division strategy to be used for integer division operations.
+For SHmedia @var{strategy} can be one of: 
+
+@table @samp
+
+@item fp 
+Performs the operation in floating point.  This has a very high latency,
  but needs only a few instructions, so it might be a good choice if
  your code has enough easily-exploitable ILP to allow the compiler to
  schedule the floating-point instructions together with other instructions.
  Division by zero causes a floating-point exception.
-"inv" uses integer operations to calculate the inverse of the divisor,
+
+@item inv
+Uses integer operations to calculate the inverse of the divisor,
  and then multiplies the dividend with the inverse.  This strategy allows
-cse and hoisting of the inverse calculation.  Division by zero calculates
+CSE and hoisting of the inverse calculation.  Division by zero calculates
  an unspecified result, but does not trap.
-"inv:minlat" is a variant of "inv" where if no cse / hoisting opportunities
+
+@item inv:minlat
+A variant of @samp{inv} where, if no CSE or hoisting opportunities
  have been found, or if the entire operation has been hoisted to the same
  place, the last stages of the inverse calculation are intertwined with the
  final multiply to reduce the overall latency, at the expense of using a few
  more instructions, and thus offering fewer scheduling opportunities with
  other code.
-"call" calls a library function that usually implements the inv:minlat
+
+@item call
+Calls a library function that usually implements the @samp{inv:minlat}
  strategy.
-This gives high code density for m5-*media-nofpu compilations.
-"call2" uses a different entry point of the same library function, where it
+This gives high code density for @code{m5-*media-nofpu} compilations.
+
+@item call2
+Uses a different entry point of the same library function, where it
  assumes that a pointer to a lookup table has already been set up, which
-exposes the pointer load to cse / code hoisting optimizations.
-"inv:call", "inv:call2" and "inv:fp" all use the "inv" algorithm for initial
-code generation, but if the code stays unoptimized, revert to the "call",
-"call2", or "fp" strategies, respectively.  Note that the
+exposes the pointer load to CSE and code hoisting optimizations.
+
+@item inv:call
+@itemx inv:call2
+@itemx inv:fp
+Use the @samp{inv} algorithm for initial
+code generation, but if the code stays unoptimized, revert to the @samp{call},
+@samp{call2}, or @samp{fp} strategies, respectively.  Note that the
  potentially-trapping side effect of division by zero is carried by a
  separate instruction, so it is possible that all the integer instructions
  are hoisted out, but the marker for the side effect stays where it is.
-A recombination to fp operations or a call is not possible in that case.
-"inv20u" and "inv20l" are variants of the "inv:minlat" strategy.  In the case
-that the inverse calculation was nor separated from the multiply, they speed
-up division where the dividend fits into 20 bits (plus sign where applicable),
+A recombination to floating-point operations or a call is not possible
+in that case.
+
+@item inv20u
+@itemx inv20l
+Variants of the @samp{inv:minlat} strategy.  In the case
+that the inverse calculation is not separated from the multiply, they speed
+up division where the dividend fits into 20 bits (plus sign where applicable)
  by inserting a test to skip a number of operations in this case; this test
-slows down the case of larger dividends.  inv20u assumes the case of a such
-a small dividend to be unlikely, and inv20l assumes it to be likely.
+slows down the case of larger dividends.  @samp{inv20u} assumes the case of a such
+a small dividend to be unlikely, and @samp{inv20l} assumes it to be likely.
+
+@end table
+
+For targets other than SHmedia @var{strategy} can be one of:
+
+@table @samp
+
+@item call-div1
+Calls a library function that uses the single-step division instruction
+@code{div1} to perform the operation.  Division by zero calculates an
+unspecified result and does not trap.  This is the default except for SH4,
+SH2A and SHcompact.
+
+@item call-fp
+Calls a library function that performs the operation in double precision
+floating point.  Division by zero causes a floating-point exception.  This is
+the default for SHcompact with FPU.  Specifying this for targets that do not
+have a double precision FPU will default to @code{call-div1}.
+
+@item call-table
+Calls a library function that uses a lookup table for small divisors and
+the @code{div1} instruction with case distinction for larger divisors.  Division
+by zero calculates an unspecified result and does not trap.  This is the default
+for SH4.  Specifying this for targets that do not have dynamic shift
+instructions will default to @code{call-div1}.
+
+@end table
+
+When a division strategy has not been specified the default strategy will be
+selected based on the current target.  For SH2A the default strategy is to
+use the @code{divs} and @code{divu} instructions instead of library function
+calls.
  
  @item -maccumulate-outgoing-args
  @opindex maccumulate-outgoing-args
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog

index 81bacb2bca33816bc76818e8e9230002af5db9ff..f97b8479a2dc3d8cc684c20bd670a9af6307c77e 100644 (file)
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,12 @@
+2013-03-06  Oleg Endo  <olegendo@gcc.gnu.org>
+
+       Backport from mainline.
+       2013-03-06  Oleg Endo  <olegendo@gcc.gnu.org>
+
+       PR target/56529
+       * config/sh/lib1funcs.S (udivsi3_i4i, sdivsi3_i4i): Add __SH2A__ to
+       inclusion list.
+
  2013-02-22  Sebastian Huber <sebastian.huber@embedded-brains.de>
  
         * config.host (arm*-*-rtemself*): New.
diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S

index 2f0ca16cd91937963a711b37930710552517b6d8..84a42e79901a5c6949115ef3272c96146c1cf307 100644 (file)
--- a/libgcc/config/sh/lib1funcs.S
+++ b/libgcc/config/sh/lib1funcs.S
@@ -3255,8 +3255,8 @@ GLOBAL(div_table):
         .word   17136
         .word   16639
  
-#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
-/* This code used shld, thus is not suitable for SH1 / SH2.  */
+#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code uses shld, thus is not suitable for SH1 / SH2.  */
  
  /* Signed / unsigned division without use of FPU, optimized for SH4.
     Uses a lookup table for divisors in the range -128 .. +128, and
author	olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)
committer	olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 6 Mar 2013 22:54:11 +0000 (22:54 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config.gcc		patch \| blob \| history
gcc/config/sh/sh.c		patch \| blob \| history
gcc/doc/invoke.texi		patch \| blob \| history
libgcc/ChangeLog		patch \| blob \| history
libgcc/config/sh/lib1funcs.S		patch \| blob \| history