import gcc-8.2.1-3.5.el8

This commit is contained in:
CentOS Sources 2019-05-07 00:34:54 -04:00
commit 0e369741f6
28 changed files with 8360 additions and 0 deletions

3
.gcc.metadata Normal file
View File

@ -0,0 +1,3 @@
1fe3aa7ce95faa0f4d7f08f0dfefd86ff4b43015 SOURCES/gcc-8.2.1-20180905.tar.xz
3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
SOURCES/gcc-8.2.1-20180905.tar.xz
SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz

View File

@ -0,0 +1,27 @@
2017-02-25 Jakub Jelinek <jakub@redhat.com>
* configure.ac: When adding -Wno-format, also add -Wno-format-security.
* configure: Regenerated.
--- gcc/configure.ac.jj 2017-02-13 12:20:53.000000000 +0100
+++ gcc/configure.ac 2017-02-25 12:42:32.859175403 +0100
@@ -481,7 +481,7 @@ AC_ARG_ENABLE(build-format-warnings,
AS_HELP_STRING([--disable-build-format-warnings],[don't use -Wformat while building GCC]),
[],[enable_build_format_warnings=yes])
AS_IF([test $enable_build_format_warnings = no],
- [wf_opt=-Wno-format],[wf_opt=])
+ [wf_opt="-Wno-format -Wno-format-security"],[wf_opt=])
ACX_PROG_CXX_WARNING_OPTS(
m4_quote(m4_do([-W -Wall -Wno-narrowing -Wwrite-strings ],
[-Wcast-qual $wf_opt])), [loose_warn])
--- gcc/configure.jj 2017-02-13 12:20:52.000000000 +0100
+++ gcc/configure 2017-02-25 12:42:50.041946391 +0100
@@ -6647,7 +6647,7 @@ else
fi
if test $enable_build_format_warnings = no; then :
- wf_opt=-Wno-format
+ wf_opt="-Wno-format -Wno-format-security"
else
wf_opt=
fi

View File

@ -0,0 +1,117 @@
2017-01-20 Jakub Jelinek <jakub@redhat.com>
* gcc.c (offload_targets_default): New variable.
(process_command): Set it if -foffload is defaulted.
(driver::maybe_putenv_OFFLOAD_TARGETS): Add OFFLOAD_TARGET_DEFAULT=1
into environment if -foffload has been defaulted.
* lto-wrapper.c (OFFLOAD_TARGET_DEFAULT_ENV): Define.
(compile_images_for_offload_targets): If OFFLOAD_TARGET_DEFAULT
is in the environment, don't fail if corresponding mkoffload
can't be found. Free and clear offload_names if no valid offload
is found.
libgomp/
* target.c (gomp_load_plugin_for_device): If a plugin can't be
dlopened, assume it has no devices silently.
--- gcc/gcc.c.jj 2017-01-17 10:28:40.000000000 +0100
+++ gcc/gcc.c 2017-01-20 16:26:29.649962902 +0100
@@ -290,6 +290,10 @@ static const char *spec_host_machine = D
static char *offload_targets = NULL;
+/* Set to true if -foffload has not been used and offload_targets
+ is set to the configured in default. */
+static bool offload_targets_default;
+
/* Nonzero if cross-compiling.
When -b is used, the value comes from the `specs' file. */
@@ -4457,7 +4461,10 @@ process_command (unsigned int decoded_op
/* If the user didn't specify any, default to all configured offload
targets. */
if (ENABLE_OFFLOADING && offload_targets == NULL)
- handle_foffload_option (OFFLOAD_TARGETS);
+ {
+ handle_foffload_option (OFFLOAD_TARGETS);
+ offload_targets_default = true;
+ }
if (output_file
&& strcmp (output_file, "-") != 0
@@ -7693,6 +7700,8 @@ driver::maybe_putenv_OFFLOAD_TARGETS ()
obstack_grow (&collect_obstack, offload_targets,
strlen (offload_targets) + 1);
xputenv (XOBFINISH (&collect_obstack, char *));
+ if (offload_targets_default)
+ xputenv ("OFFLOAD_TARGET_DEFAULT=1");
}
free (offload_targets);
--- gcc/lto-wrapper.c.jj 2017-01-01 12:45:34.000000000 +0100
+++ gcc/lto-wrapper.c 2017-01-20 16:34:18.294016997 +0100
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
/* Environment variable, used for passing the names of offload targets from GCC
driver to lto-wrapper. */
#define OFFLOAD_TARGET_NAMES_ENV "OFFLOAD_TARGET_NAMES"
+#define OFFLOAD_TARGET_DEFAULT_ENV "OFFLOAD_TARGET_DEFAULT"
enum lto_mode_d {
LTO_MODE_NONE, /* Not doing LTO. */
@@ -790,8 +791,10 @@ compile_images_for_offload_targets (unsi
if (!target_names)
return;
unsigned num_targets = parse_env_var (target_names, &names, NULL);
+ const char *target_names_default = getenv (OFFLOAD_TARGET_DEFAULT_ENV);
int next_name_entry = 0;
+ bool hsa_seen = false;
const char *compiler_path = getenv ("COMPILER_PATH");
if (!compiler_path)
goto out;
@@ -804,18 +807,32 @@ compile_images_for_offload_targets (unsi
/* HSA does not use LTO-like streaming and a different compiler, skip
it. */
if (strcmp (names[i], "hsa") == 0)
- continue;
+ {
+ hsa_seen = true;
+ continue;
+ }
offload_names[next_name_entry]
= compile_offload_image (names[i], compiler_path, in_argc, in_argv,
compiler_opts, compiler_opt_count,
linker_opts, linker_opt_count);
if (!offload_names[next_name_entry])
- fatal_error (input_location,
- "problem with building target image for %s\n", names[i]);
+ {
+ if (target_names_default != NULL)
+ continue;
+ fatal_error (input_location,
+ "problem with building target image for %s\n",
+ names[i]);
+ }
next_name_entry++;
}
+ if (next_name_entry == 0 && !hsa_seen)
+ {
+ free (offload_names);
+ offload_names = NULL;
+ }
+
out:
free_array_of_ptrs ((void **) names, num_targets);
}
--- libgomp/target.c.jj 2017-01-01 12:45:52.000000000 +0100
+++ libgomp/target.c 2017-01-20 20:12:13.756710875 +0100
@@ -2356,7 +2356,7 @@ gomp_load_plugin_for_device (struct gomp
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
if (!plugin_handle)
- goto dl_fail;
+ return 0;
/* Check if all required functions are available in the plugin and store
their handlers. None of the symbols can legitimately be NULL,

124
SOURCES/gcc8-hack.patch Normal file
View File

@ -0,0 +1,124 @@
--- libada/Makefile.in.jj 2009-01-14 12:07:35.000000000 +0100
+++ libada/Makefile.in 2009-01-15 14:25:33.000000000 +0100
@@ -66,18 +66,40 @@ libsubdir := $(libdir)/gcc/$(target_nonc
ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR))
ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR))
+DEFAULTMULTIFLAGS :=
+ifeq ($(MULTISUBDIR),)
+targ:=$(subst -, ,$(target))
+arch:=$(word 1,$(targ))
+ifeq ($(words $(targ)),2)
+osys:=$(word 2,$(targ))
+else
+osys:=$(word 3,$(targ))
+endif
+ifeq ($(strip $(filter-out i%86 x86_64 powerpc% ppc% s390% sparc% linux%, $(arch) $(osys))),)
+ifeq ($(shell $(CC) $(CFLAGS) -print-multi-os-directory),../lib64)
+DEFAULTMULTIFLAGS := -m64
+else
+ifeq ($(strip $(filter-out s390%, $(arch))),)
+DEFAULTMULTIFLAGS := -m31
+else
+DEFAULTMULTIFLAGS := -m32
+endif
+endif
+endif
+endif
+
# exeext should not be used because it's the *host* exeext. We're building
# a *target* library, aren't we?!? Likewise for CC. Still, provide bogus
# definitions just in case something slips through the safety net provided
# by recursive make invocations in gcc/ada/Makefile.in
LIBADA_FLAGS_TO_PASS = \
"MAKEOVERRIDES=" \
- "LDFLAGS=$(LDFLAGS)" \
+ "LDFLAGS=$(LDFLAGS) $(DEFAULTMULTIFLAGS)" \
"LN_S=$(LN_S)" \
"SHELL=$(SHELL)" \
- "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS)" \
- "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS)" \
- "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS)" \
+ "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
+ "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
+ "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
"PICFLAG_FOR_TARGET=$(PICFLAG)" \
"THREAD_KIND=$(THREAD_KIND)" \
"TRACE=$(TRACE)" \
@@ -88,7 +110,7 @@ LIBADA_FLAGS_TO_PASS = \
"exeext=.exeext.should.not.be.used " \
'CC=the.host.compiler.should.not.be.needed' \
"GCC_FOR_TARGET=$(CC)" \
- "CFLAGS=$(CFLAGS)"
+ "CFLAGS=$(CFLAGS) $(DEFAULTMULTIFLAGS)"
# Rules to build gnatlib.
.PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool
--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200
+++ config-ml.in 2010-07-02 21:24:17.994211151 +0200
@@ -511,6 +511,8 @@ multi-do:
ADAFLAGS="$(ADAFLAGS) $${flags}" \
prefix="$(prefix)" \
exec_prefix="$(exec_prefix)" \
+ mandir="$(mandir)" \
+ infodir="$(infodir)" \
GOCFLAGS="$(GOCFLAGS) $${flags}" \
CXXFLAGS="$(CXXFLAGS) $${flags}" \
LIBCFLAGS="$(LIBCFLAGS) $${flags}" \
--- libcpp/macro.c.jj 2015-01-14 11:01:34.000000000 +0100
+++ libcpp/macro.c 2015-01-14 14:22:19.286949884 +0100
@@ -2947,8 +2947,6 @@ create_iso_definition (cpp_reader *pfile
cpp_token *token;
const cpp_token *ctoken;
bool following_paste_op = false;
- const char *paste_op_error_msg =
- N_("'##' cannot appear at either end of a macro expansion");
unsigned int num_extra_tokens = 0;
/* Get the first token of the expansion (or the '(' of a
@@ -3059,7 +3057,8 @@ create_iso_definition (cpp_reader *pfile
function-like macros, but not at the end. */
if (following_paste_op)
{
- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
+ cpp_error (pfile, CPP_DL_ERROR,
+ "'##' cannot appear at either end of a macro expansion");
return false;
}
break;
@@ -3072,7 +3071,8 @@ create_iso_definition (cpp_reader *pfile
function-like macros, but not at the beginning. */
if (macro->count == 1)
{
- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
+ cpp_error (pfile, CPP_DL_ERROR,
+ "'##' cannot appear at either end of a macro expansion");
return false;
}
--- libcpp/expr.c.jj 2015-01-14 11:01:34.000000000 +0100
+++ libcpp/expr.c 2015-01-14 14:35:52.851002344 +0100
@@ -672,16 +672,17 @@ cpp_classify_number (cpp_reader *pfile,
if ((result & CPP_N_WIDTH) == CPP_N_LARGE
&& CPP_OPTION (pfile, cpp_warn_long_long))
{
- const char *message = CPP_OPTION (pfile, cplusplus)
- ? N_("use of C++11 long long integer constant")
- : N_("use of C99 long long integer constant");
-
if (CPP_OPTION (pfile, c99))
cpp_warning_with_line (pfile, CPP_W_LONG_LONG, virtual_location,
- 0, message);
+ 0, CPP_OPTION (pfile, cplusplus)
+ ? N_("use of C++11 long long integer constant")
+ : N_("use of C99 long long integer constant"));
else
cpp_pedwarning_with_line (pfile, CPP_W_LONG_LONG,
- virtual_location, 0, message);
+ virtual_location, 0,
+ CPP_OPTION (pfile, cplusplus)
+ ? N_("use of C++11 long long integer constant")
+ : N_("use of C99 long long integer constant"));
}
result |= CPP_N_INTEGER;

View File

@ -0,0 +1,11 @@
--- libgomp/configure.tgt.jj 2008-01-10 20:53:48.000000000 +0100
+++ libgomp/configure.tgt 2008-03-27 12:44:51.000000000 +0100
@@ -67,7 +67,7 @@ if test $enable_linux_futex = yes; then
;;
*)
if test -z "$with_arch"; then
- XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
+ XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
fi
esac
;;

715
SOURCES/gcc8-isl-dl.patch Normal file
View File

@ -0,0 +1,715 @@
--- gcc/Makefile.in.jj 2015-06-06 10:00:25.000000000 +0200
+++ gcc/Makefile.in 2015-11-04 14:56:02.643536437 +0100
@@ -1046,7 +1046,7 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY)
# and the system's installed libraries.
LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \
$(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS)
-BACKENDLIBS = $(ISLLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
+BACKENDLIBS = $(if $(ISLLIBS),-ldl) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
$(ZLIB)
# Any system libraries needed just for GNAT.
SYSLIBS = @GNAT_LIBEXC@
@@ -2196,6 +2196,15 @@ $(out_object_file): $(out_file)
$(common_out_object_file): $(common_out_file)
$(COMPILE) $<
$(POSTCOMPILE)
+
+graphite%.o : \
+ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
+graphite.o : \
+ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
+graphite%.o : \
+ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
+graphite.o : \
+ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
#
# Generate header and source files from the machine description,
# and compile them.
--- gcc/graphite.h.jj 2016-01-27 12:44:06.000000000 +0100
+++ gcc/graphite.h 2016-01-27 13:26:38.309876856 +0100
@@ -39,6 +39,590 @@ along with GCC; see the file COPYING3.
#include <isl/schedule_node.h>
#include <isl/id.h>
#include <isl/space.h>
+#include <isl/version.h>
+#include <dlfcn.h>
+
+#define DYNSYMS \
+ DYNSYM (isl_aff_add_coefficient_si); \
+ DYNSYM (isl_aff_free); \
+ DYNSYM (isl_aff_get_space); \
+ DYNSYM (isl_aff_set_coefficient_si); \
+ DYNSYM (isl_aff_set_constant_si); \
+ DYNSYM (isl_aff_zero_on_domain); \
+ DYNSYM (isl_band_free); \
+ DYNSYM (isl_band_get_children); \
+ DYNSYM (isl_band_get_partial_schedule); \
+ DYNSYM (isl_band_has_children); \
+ DYNSYM (isl_band_list_free); \
+ DYNSYM (isl_band_list_get_band); \
+ DYNSYM (isl_band_list_get_ctx); \
+ DYNSYM (isl_band_list_n_band); \
+ DYNSYM (isl_band_n_member); \
+ DYNSYM (isl_basic_map_add_constraint); \
+ DYNSYM (isl_basic_map_project_out); \
+ DYNSYM (isl_basic_map_universe); \
+ DYNSYM (isl_constraint_set_coefficient_si); \
+ DYNSYM (isl_constraint_set_constant_si); \
+ DYNSYM (isl_ctx_alloc); \
+ DYNSYM (isl_ctx_free); \
+ DYNSYM (isl_equality_alloc); \
+ DYNSYM (isl_id_alloc); \
+ DYNSYM (isl_id_copy); \
+ DYNSYM (isl_id_free); \
+ DYNSYM (isl_inequality_alloc); \
+ DYNSYM (isl_local_space_copy); \
+ DYNSYM (isl_local_space_free); \
+ DYNSYM (isl_local_space_from_space); \
+ DYNSYM (isl_local_space_range); \
+ DYNSYM (isl_map_add_constraint); \
+ DYNSYM (isl_map_add_dims); \
+ DYNSYM (isl_map_align_params); \
+ DYNSYM (isl_map_apply_range); \
+ DYNSYM (isl_map_copy); \
+ DYNSYM (isl_map_dim); \
+ DYNSYM (isl_map_dump); \
+ DYNSYM (isl_map_equate); \
+ DYNSYM (isl_map_fix_si); \
+ DYNSYM (isl_map_flat_product); \
+ DYNSYM (isl_map_flat_range_product); \
+ DYNSYM (isl_map_free); \
+ DYNSYM (isl_map_from_basic_map); \
+ DYNSYM (isl_map_from_pw_aff); \
+ DYNSYM (isl_map_from_union_map); \
+ DYNSYM (isl_map_get_ctx); \
+ DYNSYM (isl_map_get_space); \
+ DYNSYM (isl_map_get_tuple_id); \
+ DYNSYM (isl_map_insert_dims); \
+ DYNSYM (isl_map_intersect); \
+ DYNSYM (isl_map_intersect_domain); \
+ DYNSYM (isl_map_intersect_range); \
+ DYNSYM (isl_map_is_empty); \
+ DYNSYM (isl_map_lex_ge); \
+ DYNSYM (isl_map_lex_le); \
+ DYNSYM (isl_map_n_out); \
+ DYNSYM (isl_map_range); \
+ DYNSYM (isl_map_set_tuple_id); \
+ DYNSYM (isl_map_universe); \
+ DYNSYM (isl_options_set_on_error); \
+ DYNSYM (isl_options_set_schedule_serialize_sccs); \
+ DYNSYM (isl_printer_set_yaml_style); \
+ DYNSYM (isl_options_set_schedule_max_constant_term); \
+ DYNSYM (isl_options_set_schedule_maximize_band_depth); \
+ DYNSYM (isl_printer_free); \
+ DYNSYM (isl_printer_print_aff); \
+ DYNSYM (isl_printer_print_constraint); \
+ DYNSYM (isl_printer_print_map); \
+ DYNSYM (isl_printer_print_set); \
+ DYNSYM (isl_printer_to_file); \
+ DYNSYM (isl_pw_aff_add); \
+ DYNSYM (isl_pw_aff_alloc); \
+ DYNSYM (isl_pw_aff_copy); \
+ DYNSYM (isl_pw_aff_eq_set); \
+ DYNSYM (isl_pw_aff_free); \
+ DYNSYM (isl_pw_aff_from_aff); \
+ DYNSYM (isl_pw_aff_ge_set); \
+ DYNSYM (isl_pw_aff_gt_set); \
+ DYNSYM (isl_pw_aff_is_cst); \
+ DYNSYM (isl_pw_aff_le_set); \
+ DYNSYM (isl_pw_aff_lt_set); \
+ DYNSYM (isl_pw_aff_mul); \
+ DYNSYM (isl_pw_aff_ne_set); \
+ DYNSYM (isl_pw_aff_nonneg_set); \
+ DYNSYM (isl_pw_aff_set_tuple_id); \
+ DYNSYM (isl_pw_aff_sub); \
+ DYNSYM (isl_pw_aff_zero_set); \
+ DYNSYM (isl_schedule_free); \
+ DYNSYM (isl_schedule_get_band_forest); \
+ DYNSYM (isl_set_add_constraint); \
+ DYNSYM (isl_set_add_dims); \
+ DYNSYM (isl_set_apply); \
+ DYNSYM (isl_set_coalesce); \
+ DYNSYM (isl_set_copy); \
+ DYNSYM (isl_set_dim); \
+ DYNSYM (isl_set_fix_si); \
+ DYNSYM (isl_set_free); \
+ DYNSYM (isl_set_get_space); \
+ DYNSYM (isl_set_get_tuple_id); \
+ DYNSYM (isl_set_intersect); \
+ DYNSYM (isl_set_is_empty); \
+ DYNSYM (isl_set_n_dim); \
+ DYNSYM (isl_set_nat_universe); \
+ DYNSYM (isl_set_project_out); \
+ DYNSYM (isl_set_set_tuple_id); \
+ DYNSYM (isl_set_universe); \
+ DYNSYM (isl_space_add_dims); \
+ DYNSYM (isl_space_alloc); \
+ DYNSYM (isl_space_copy); \
+ DYNSYM (isl_space_dim); \
+ DYNSYM (isl_space_domain); \
+ DYNSYM (isl_space_find_dim_by_id); \
+ DYNSYM (isl_space_free); \
+ DYNSYM (isl_space_from_domain); \
+ DYNSYM (isl_space_get_tuple_id); \
+ DYNSYM (isl_space_params_alloc); \
+ DYNSYM (isl_space_range); \
+ DYNSYM (isl_space_set_alloc); \
+ DYNSYM (isl_space_set_dim_id); \
+ DYNSYM (isl_space_set_tuple_id); \
+ DYNSYM (isl_union_map_add_map); \
+ DYNSYM (isl_union_map_align_params); \
+ DYNSYM (isl_union_map_apply_domain); \
+ DYNSYM (isl_union_map_apply_range); \
+ DYNSYM (isl_union_map_compute_flow); \
+ DYNSYM (isl_union_map_copy); \
+ DYNSYM (isl_union_map_empty); \
+ DYNSYM (isl_union_map_flat_range_product); \
+ DYNSYM (isl_union_map_foreach_map); \
+ DYNSYM (isl_union_map_free); \
+ DYNSYM (isl_union_map_from_map); \
+ DYNSYM (isl_union_map_get_ctx); \
+ DYNSYM (isl_union_map_get_space); \
+ DYNSYM (isl_union_map_gist_domain); \
+ DYNSYM (isl_union_map_gist_range); \
+ DYNSYM (isl_union_map_intersect_domain); \
+ DYNSYM (isl_union_map_is_empty); \
+ DYNSYM (isl_union_map_subtract); \
+ DYNSYM (isl_union_map_union); \
+ DYNSYM (isl_union_set_add_set); \
+ DYNSYM (isl_union_set_compute_schedule); \
+ DYNSYM (isl_union_set_copy); \
+ DYNSYM (isl_union_set_empty); \
+ DYNSYM (isl_union_set_from_set); \
+ DYNSYM (isl_aff_add_constant_val); \
+ DYNSYM (isl_aff_get_coefficient_val); \
+ DYNSYM (isl_aff_get_ctx); \
+ DYNSYM (isl_aff_mod_val); \
+ DYNSYM (isl_ast_build_ast_from_schedule); \
+ DYNSYM (isl_ast_build_free); \
+ DYNSYM (isl_ast_build_from_context); \
+ DYNSYM (isl_ast_build_get_ctx); \
+ DYNSYM (isl_ast_build_get_schedule); \
+ DYNSYM (isl_ast_build_get_schedule_space); \
+ DYNSYM (isl_ast_build_set_before_each_for); \
+ DYNSYM (isl_ast_build_set_options); \
+ DYNSYM (isl_ast_expr_free); \
+ DYNSYM (isl_ast_expr_from_val); \
+ DYNSYM (isl_ast_expr_get_ctx); \
+ DYNSYM (isl_ast_expr_get_id); \
+ DYNSYM (isl_ast_expr_get_op_arg); \
+ DYNSYM (isl_ast_expr_get_op_n_arg); \
+ DYNSYM (isl_ast_expr_get_op_type); \
+ DYNSYM (isl_ast_expr_get_type); \
+ DYNSYM (isl_ast_expr_get_val); \
+ DYNSYM (isl_ast_expr_sub); \
+ DYNSYM (isl_ast_node_block_get_children); \
+ DYNSYM (isl_ast_node_for_get_body); \
+ DYNSYM (isl_ast_node_for_get_cond); \
+ DYNSYM (isl_ast_node_for_get_inc); \
+ DYNSYM (isl_ast_node_for_get_init); \
+ DYNSYM (isl_ast_node_for_get_iterator); \
+ DYNSYM (isl_ast_node_free); \
+ DYNSYM (isl_ast_node_get_annotation); \
+ DYNSYM (isl_ast_node_get_type); \
+ DYNSYM (isl_ast_node_if_get_cond); \
+ DYNSYM (isl_ast_node_if_get_else); \
+ DYNSYM (isl_ast_node_if_get_then); \
+ DYNSYM (isl_ast_node_list_free); \
+ DYNSYM (isl_ast_node_list_get_ast_node); \
+ DYNSYM (isl_ast_node_list_n_ast_node); \
+ DYNSYM (isl_ast_node_user_get_expr); \
+ DYNSYM (isl_constraint_set_coefficient_val); \
+ DYNSYM (isl_constraint_set_constant_val); \
+ DYNSYM (isl_id_get_user); \
+ DYNSYM (isl_local_space_get_ctx); \
+ DYNSYM (isl_map_fix_val); \
+ DYNSYM (isl_options_set_ast_build_atomic_upper_bound); \
+ DYNSYM (isl_printer_print_ast_node); \
+ DYNSYM (isl_printer_print_str); \
+ DYNSYM (isl_printer_set_output_format); \
+ DYNSYM (isl_pw_aff_mod_val); \
+ DYNSYM (isl_schedule_constraints_compute_schedule); \
+ DYNSYM (isl_schedule_constraints_on_domain); \
+ DYNSYM (isl_schedule_constraints_set_coincidence); \
+ DYNSYM (isl_schedule_constraints_set_proximity); \
+ DYNSYM (isl_schedule_constraints_set_validity); \
+ DYNSYM (isl_set_get_dim_id); \
+ DYNSYM (isl_set_max_val); \
+ DYNSYM (isl_set_min_val); \
+ DYNSYM (isl_set_params); \
+ DYNSYM (isl_space_align_params); \
+ DYNSYM (isl_space_map_from_domain_and_range); \
+ DYNSYM (isl_space_set_tuple_name); \
+ DYNSYM (isl_space_wrap); \
+ DYNSYM (isl_union_map_from_domain_and_range); \
+ DYNSYM (isl_union_map_range); \
+ DYNSYM (isl_union_set_union); \
+ DYNSYM (isl_union_set_universe); \
+ DYNSYM (isl_val_2exp); \
+ DYNSYM (isl_val_add_ui); \
+ DYNSYM (isl_val_copy); \
+ DYNSYM (isl_val_free); \
+ DYNSYM (isl_val_int_from_si); \
+ DYNSYM (isl_val_int_from_ui); \
+ DYNSYM (isl_val_mul); \
+ DYNSYM (isl_val_neg); \
+ DYNSYM (isl_val_sub); \
+ DYNSYM (isl_printer_print_union_map); \
+ DYNSYM (isl_pw_aff_get_ctx); \
+ DYNSYM (isl_val_is_int); \
+ DYNSYM (isl_ctx_get_max_operations); \
+ DYNSYM (isl_ctx_set_max_operations); \
+ DYNSYM (isl_ctx_last_error); \
+ DYNSYM (isl_ctx_reset_operations); \
+ DYNSYM (isl_map_coalesce); \
+ DYNSYM (isl_printer_print_schedule); \
+ DYNSYM (isl_set_set_dim_id); \
+ DYNSYM (isl_union_map_coalesce); \
+ DYNSYM (isl_multi_val_set_val); \
+ DYNSYM (isl_multi_val_zero); \
+ DYNSYM (isl_options_set_schedule_max_coefficient); \
+ DYNSYM (isl_options_set_tile_scale_tile_loops); \
+ DYNSYM (isl_schedule_copy); \
+ DYNSYM (isl_schedule_get_map); \
+ DYNSYM (isl_schedule_map_schedule_node_bottom_up); \
+ DYNSYM (isl_schedule_node_band_get_permutable); \
+ DYNSYM (isl_schedule_node_band_get_space); \
+ DYNSYM (isl_schedule_node_band_tile); \
+ DYNSYM (isl_schedule_node_child); \
+ DYNSYM (isl_schedule_node_free); \
+ DYNSYM (isl_schedule_node_get_child); \
+ DYNSYM (isl_schedule_node_get_ctx); \
+ DYNSYM (isl_schedule_node_get_type); \
+ DYNSYM (isl_schedule_node_n_children); \
+ DYNSYM (isl_union_map_is_equal); \
+ DYNSYM (isl_union_access_info_compute_flow); \
+ DYNSYM (isl_union_access_info_from_sink); \
+ DYNSYM (isl_union_access_info_set_may_source); \
+ DYNSYM (isl_union_access_info_set_must_source); \
+ DYNSYM (isl_union_access_info_set_schedule); \
+ DYNSYM (isl_union_flow_free); \
+ DYNSYM (isl_union_flow_get_may_dependence); \
+ DYNSYM (isl_union_flow_get_must_dependence); \
+ DYNSYM (isl_aff_var_on_domain); \
+ DYNSYM (isl_multi_aff_from_aff); \
+ DYNSYM (isl_schedule_get_ctx); \
+ DYNSYM (isl_multi_aff_set_tuple_id); \
+ DYNSYM (isl_multi_aff_dim); \
+ DYNSYM (isl_schedule_get_domain); \
+ DYNSYM (isl_union_set_is_empty); \
+ DYNSYM (isl_union_set_get_space); \
+ DYNSYM (isl_union_pw_multi_aff_empty); \
+ DYNSYM (isl_union_set_foreach_set); \
+ DYNSYM (isl_union_set_free); \
+ DYNSYM (isl_multi_union_pw_aff_from_union_pw_multi_aff); \
+ DYNSYM (isl_multi_union_pw_aff_apply_multi_aff); \
+ DYNSYM (isl_schedule_insert_partial_schedule); \
+ DYNSYM (isl_union_pw_multi_aff_free); \
+ DYNSYM (isl_pw_multi_aff_project_out_map); \
+ DYNSYM (isl_union_pw_multi_aff_add_pw_multi_aff); \
+ DYNSYM (isl_schedule_from_domain); \
+ DYNSYM (isl_schedule_sequence); \
+ DYNSYM (isl_ast_build_node_from_schedule); \
+ DYNSYM (isl_ast_node_mark_get_node); \
+ DYNSYM (isl_schedule_node_band_member_get_ast_loop_type); \
+ DYNSYM (isl_schedule_node_band_member_set_ast_loop_type); \
+ DYNSYM (isl_val_n_abs_num_chunks); \
+ DYNSYM (isl_val_get_abs_num_chunks); \
+ DYNSYM (isl_val_int_from_chunks); \
+ DYNSYM (isl_val_is_neg); \
+ DYNSYM (isl_version); \
+ DYNSYM (isl_options_get_on_error); \
+ DYNSYM (isl_ctx_reset_error);
+
+extern struct isl_pointers_s__
+{
+ bool inited;
+ void *h;
+#define DYNSYM(x) __typeof (x) *p_##x
+ DYNSYMS
+#undef DYNSYM
+} isl_pointers__;
+
+#define isl_aff_add_coefficient_si (*isl_pointers__.p_isl_aff_add_coefficient_si)
+#define isl_aff_free (*isl_pointers__.p_isl_aff_free)
+#define isl_aff_get_space (*isl_pointers__.p_isl_aff_get_space)
+#define isl_aff_set_coefficient_si (*isl_pointers__.p_isl_aff_set_coefficient_si)
+#define isl_aff_set_constant_si (*isl_pointers__.p_isl_aff_set_constant_si)
+#define isl_aff_zero_on_domain (*isl_pointers__.p_isl_aff_zero_on_domain)
+#define isl_band_free (*isl_pointers__.p_isl_band_free)
+#define isl_band_get_children (*isl_pointers__.p_isl_band_get_children)
+#define isl_band_get_partial_schedule (*isl_pointers__.p_isl_band_get_partial_schedule)
+#define isl_band_has_children (*isl_pointers__.p_isl_band_has_children)
+#define isl_band_list_free (*isl_pointers__.p_isl_band_list_free)
+#define isl_band_list_get_band (*isl_pointers__.p_isl_band_list_get_band)
+#define isl_band_list_get_ctx (*isl_pointers__.p_isl_band_list_get_ctx)
+#define isl_band_list_n_band (*isl_pointers__.p_isl_band_list_n_band)
+#define isl_band_n_member (*isl_pointers__.p_isl_band_n_member)
+#define isl_basic_map_add_constraint (*isl_pointers__.p_isl_basic_map_add_constraint)
+#define isl_basic_map_project_out (*isl_pointers__.p_isl_basic_map_project_out)
+#define isl_basic_map_universe (*isl_pointers__.p_isl_basic_map_universe)
+#define isl_constraint_set_coefficient_si (*isl_pointers__.p_isl_constraint_set_coefficient_si)
+#define isl_constraint_set_constant_si (*isl_pointers__.p_isl_constraint_set_constant_si)
+#define isl_ctx_alloc (*isl_pointers__.p_isl_ctx_alloc)
+#define isl_ctx_free (*isl_pointers__.p_isl_ctx_free)
+#define isl_equality_alloc (*isl_pointers__.p_isl_equality_alloc)
+#define isl_id_alloc (*isl_pointers__.p_isl_id_alloc)
+#define isl_id_copy (*isl_pointers__.p_isl_id_copy)
+#define isl_id_free (*isl_pointers__.p_isl_id_free)
+#define isl_inequality_alloc (*isl_pointers__.p_isl_inequality_alloc)
+#define isl_local_space_copy (*isl_pointers__.p_isl_local_space_copy)
+#define isl_local_space_free (*isl_pointers__.p_isl_local_space_free)
+#define isl_local_space_from_space (*isl_pointers__.p_isl_local_space_from_space)
+#define isl_local_space_range (*isl_pointers__.p_isl_local_space_range)
+#define isl_map_add_constraint (*isl_pointers__.p_isl_map_add_constraint)
+#define isl_map_add_dims (*isl_pointers__.p_isl_map_add_dims)
+#define isl_map_align_params (*isl_pointers__.p_isl_map_align_params)
+#define isl_map_apply_range (*isl_pointers__.p_isl_map_apply_range)
+#define isl_map_copy (*isl_pointers__.p_isl_map_copy)
+#define isl_map_dim (*isl_pointers__.p_isl_map_dim)
+#define isl_map_dump (*isl_pointers__.p_isl_map_dump)
+#define isl_map_equate (*isl_pointers__.p_isl_map_equate)
+#define isl_map_fix_si (*isl_pointers__.p_isl_map_fix_si)
+#define isl_map_flat_product (*isl_pointers__.p_isl_map_flat_product)
+#define isl_map_flat_range_product (*isl_pointers__.p_isl_map_flat_range_product)
+#define isl_map_free (*isl_pointers__.p_isl_map_free)
+#define isl_map_from_basic_map (*isl_pointers__.p_isl_map_from_basic_map)
+#define isl_map_from_pw_aff (*isl_pointers__.p_isl_map_from_pw_aff)
+#define isl_map_from_union_map (*isl_pointers__.p_isl_map_from_union_map)
+#define isl_map_get_ctx (*isl_pointers__.p_isl_map_get_ctx)
+#define isl_map_get_space (*isl_pointers__.p_isl_map_get_space)
+#define isl_map_get_tuple_id (*isl_pointers__.p_isl_map_get_tuple_id)
+#define isl_map_insert_dims (*isl_pointers__.p_isl_map_insert_dims)
+#define isl_map_intersect (*isl_pointers__.p_isl_map_intersect)
+#define isl_map_intersect_domain (*isl_pointers__.p_isl_map_intersect_domain)
+#define isl_map_intersect_range (*isl_pointers__.p_isl_map_intersect_range)
+#define isl_map_is_empty (*isl_pointers__.p_isl_map_is_empty)
+#define isl_map_lex_ge (*isl_pointers__.p_isl_map_lex_ge)
+#define isl_map_lex_le (*isl_pointers__.p_isl_map_lex_le)
+#define isl_map_n_out (*isl_pointers__.p_isl_map_n_out)
+#define isl_map_range (*isl_pointers__.p_isl_map_range)
+#define isl_map_set_tuple_id (*isl_pointers__.p_isl_map_set_tuple_id)
+#define isl_map_universe (*isl_pointers__.p_isl_map_universe)
+#define isl_options_set_on_error (*isl_pointers__.p_isl_options_set_on_error)
+#define isl_options_set_schedule_serialize_sccs (*isl_pointers__.p_isl_options_set_schedule_serialize_sccs)
+#define isl_printer_set_yaml_style (*isl_pointers__.p_isl_printer_set_yaml_style)
+#define isl_options_set_schedule_max_constant_term (*isl_pointers__.p_isl_options_set_schedule_max_constant_term)
+#define isl_options_set_schedule_maximize_band_depth (*isl_pointers__.p_isl_options_set_schedule_maximize_band_depth)
+#define isl_printer_free (*isl_pointers__.p_isl_printer_free)
+#define isl_printer_print_aff (*isl_pointers__.p_isl_printer_print_aff)
+#define isl_printer_print_constraint (*isl_pointers__.p_isl_printer_print_constraint)
+#define isl_printer_print_map (*isl_pointers__.p_isl_printer_print_map)
+#define isl_printer_print_set (*isl_pointers__.p_isl_printer_print_set)
+#define isl_printer_to_file (*isl_pointers__.p_isl_printer_to_file)
+#define isl_pw_aff_add (*isl_pointers__.p_isl_pw_aff_add)
+#define isl_pw_aff_alloc (*isl_pointers__.p_isl_pw_aff_alloc)
+#define isl_pw_aff_copy (*isl_pointers__.p_isl_pw_aff_copy)
+#define isl_pw_aff_eq_set (*isl_pointers__.p_isl_pw_aff_eq_set)
+#define isl_pw_aff_free (*isl_pointers__.p_isl_pw_aff_free)
+#define isl_pw_aff_from_aff (*isl_pointers__.p_isl_pw_aff_from_aff)
+#define isl_pw_aff_ge_set (*isl_pointers__.p_isl_pw_aff_ge_set)
+#define isl_pw_aff_gt_set (*isl_pointers__.p_isl_pw_aff_gt_set)
+#define isl_pw_aff_is_cst (*isl_pointers__.p_isl_pw_aff_is_cst)
+#define isl_pw_aff_le_set (*isl_pointers__.p_isl_pw_aff_le_set)
+#define isl_pw_aff_lt_set (*isl_pointers__.p_isl_pw_aff_lt_set)
+#define isl_pw_aff_mul (*isl_pointers__.p_isl_pw_aff_mul)
+#define isl_pw_aff_ne_set (*isl_pointers__.p_isl_pw_aff_ne_set)
+#define isl_pw_aff_nonneg_set (*isl_pointers__.p_isl_pw_aff_nonneg_set)
+#define isl_pw_aff_set_tuple_id (*isl_pointers__.p_isl_pw_aff_set_tuple_id)
+#define isl_pw_aff_sub (*isl_pointers__.p_isl_pw_aff_sub)
+#define isl_pw_aff_zero_set (*isl_pointers__.p_isl_pw_aff_zero_set)
+#define isl_schedule_free (*isl_pointers__.p_isl_schedule_free)
+#define isl_schedule_get_band_forest (*isl_pointers__.p_isl_schedule_get_band_forest)
+#define isl_set_add_constraint (*isl_pointers__.p_isl_set_add_constraint)
+#define isl_set_add_dims (*isl_pointers__.p_isl_set_add_dims)
+#define isl_set_apply (*isl_pointers__.p_isl_set_apply)
+#define isl_set_coalesce (*isl_pointers__.p_isl_set_coalesce)
+#define isl_set_copy (*isl_pointers__.p_isl_set_copy)
+#define isl_set_dim (*isl_pointers__.p_isl_set_dim)
+#define isl_set_fix_si (*isl_pointers__.p_isl_set_fix_si)
+#define isl_set_free (*isl_pointers__.p_isl_set_free)
+#define isl_set_get_space (*isl_pointers__.p_isl_set_get_space)
+#define isl_set_get_tuple_id (*isl_pointers__.p_isl_set_get_tuple_id)
+#define isl_set_intersect (*isl_pointers__.p_isl_set_intersect)
+#define isl_set_is_empty (*isl_pointers__.p_isl_set_is_empty)
+#define isl_set_n_dim (*isl_pointers__.p_isl_set_n_dim)
+#define isl_set_nat_universe (*isl_pointers__.p_isl_set_nat_universe)
+#define isl_set_project_out (*isl_pointers__.p_isl_set_project_out)
+#define isl_set_set_tuple_id (*isl_pointers__.p_isl_set_set_tuple_id)
+#define isl_set_universe (*isl_pointers__.p_isl_set_universe)
+#define isl_space_add_dims (*isl_pointers__.p_isl_space_add_dims)
+#define isl_space_alloc (*isl_pointers__.p_isl_space_alloc)
+#define isl_space_copy (*isl_pointers__.p_isl_space_copy)
+#define isl_space_dim (*isl_pointers__.p_isl_space_dim)
+#define isl_space_domain (*isl_pointers__.p_isl_space_domain)
+#define isl_space_find_dim_by_id (*isl_pointers__.p_isl_space_find_dim_by_id)
+#define isl_space_free (*isl_pointers__.p_isl_space_free)
+#define isl_space_from_domain (*isl_pointers__.p_isl_space_from_domain)
+#define isl_space_get_tuple_id (*isl_pointers__.p_isl_space_get_tuple_id)
+#define isl_space_params_alloc (*isl_pointers__.p_isl_space_params_alloc)
+#define isl_space_range (*isl_pointers__.p_isl_space_range)
+#define isl_space_set_alloc (*isl_pointers__.p_isl_space_set_alloc)
+#define isl_space_set_dim_id (*isl_pointers__.p_isl_space_set_dim_id)
+#define isl_space_set_tuple_id (*isl_pointers__.p_isl_space_set_tuple_id)
+#define isl_union_map_add_map (*isl_pointers__.p_isl_union_map_add_map)
+#define isl_union_map_align_params (*isl_pointers__.p_isl_union_map_align_params)
+#define isl_union_map_apply_domain (*isl_pointers__.p_isl_union_map_apply_domain)
+#define isl_union_map_apply_range (*isl_pointers__.p_isl_union_map_apply_range)
+#define isl_union_map_compute_flow (*isl_pointers__.p_isl_union_map_compute_flow)
+#define isl_union_map_copy (*isl_pointers__.p_isl_union_map_copy)
+#define isl_union_map_empty (*isl_pointers__.p_isl_union_map_empty)
+#define isl_union_map_flat_range_product (*isl_pointers__.p_isl_union_map_flat_range_product)
+#define isl_union_map_foreach_map (*isl_pointers__.p_isl_union_map_foreach_map)
+#define isl_union_map_free (*isl_pointers__.p_isl_union_map_free)
+#define isl_union_map_from_map (*isl_pointers__.p_isl_union_map_from_map)
+#define isl_union_map_get_ctx (*isl_pointers__.p_isl_union_map_get_ctx)
+#define isl_union_map_get_space (*isl_pointers__.p_isl_union_map_get_space)
+#define isl_union_map_gist_domain (*isl_pointers__.p_isl_union_map_gist_domain)
+#define isl_union_map_gist_range (*isl_pointers__.p_isl_union_map_gist_range)
+#define isl_union_map_intersect_domain (*isl_pointers__.p_isl_union_map_intersect_domain)
+#define isl_union_map_is_empty (*isl_pointers__.p_isl_union_map_is_empty)
+#define isl_union_map_subtract (*isl_pointers__.p_isl_union_map_subtract)
+#define isl_union_map_union (*isl_pointers__.p_isl_union_map_union)
+#define isl_union_set_add_set (*isl_pointers__.p_isl_union_set_add_set)
+#define isl_union_set_compute_schedule (*isl_pointers__.p_isl_union_set_compute_schedule)
+#define isl_union_set_copy (*isl_pointers__.p_isl_union_set_copy)
+#define isl_union_set_empty (*isl_pointers__.p_isl_union_set_empty)
+#define isl_union_set_from_set (*isl_pointers__.p_isl_union_set_from_set)
+#define isl_aff_add_constant_val (*isl_pointers__.p_isl_aff_add_constant_val)
+#define isl_aff_get_coefficient_val (*isl_pointers__.p_isl_aff_get_coefficient_val)
+#define isl_aff_get_ctx (*isl_pointers__.p_isl_aff_get_ctx)
+#define isl_aff_mod_val (*isl_pointers__.p_isl_aff_mod_val)
+#define isl_ast_build_ast_from_schedule (*isl_pointers__.p_isl_ast_build_ast_from_schedule)
+#define isl_ast_build_free (*isl_pointers__.p_isl_ast_build_free)
+#define isl_ast_build_from_context (*isl_pointers__.p_isl_ast_build_from_context)
+#define isl_ast_build_get_ctx (*isl_pointers__.p_isl_ast_build_get_ctx)
+#define isl_ast_build_get_schedule (*isl_pointers__.p_isl_ast_build_get_schedule)
+#define isl_ast_build_get_schedule_space (*isl_pointers__.p_isl_ast_build_get_schedule_space)
+#define isl_ast_build_set_before_each_for (*isl_pointers__.p_isl_ast_build_set_before_each_for)
+#define isl_ast_build_set_options (*isl_pointers__.p_isl_ast_build_set_options)
+#define isl_ast_expr_free (*isl_pointers__.p_isl_ast_expr_free)
+#define isl_ast_expr_from_val (*isl_pointers__.p_isl_ast_expr_from_val)
+#define isl_ast_expr_get_ctx (*isl_pointers__.p_isl_ast_expr_get_ctx)
+#define isl_ast_expr_get_id (*isl_pointers__.p_isl_ast_expr_get_id)
+#define isl_ast_expr_get_op_arg (*isl_pointers__.p_isl_ast_expr_get_op_arg)
+#define isl_ast_expr_get_op_n_arg (*isl_pointers__.p_isl_ast_expr_get_op_n_arg)
+#define isl_ast_expr_get_op_type (*isl_pointers__.p_isl_ast_expr_get_op_type)
+#define isl_ast_expr_get_type (*isl_pointers__.p_isl_ast_expr_get_type)
+#define isl_ast_expr_get_val (*isl_pointers__.p_isl_ast_expr_get_val)
+#define isl_ast_expr_sub (*isl_pointers__.p_isl_ast_expr_sub)
+#define isl_ast_node_block_get_children (*isl_pointers__.p_isl_ast_node_block_get_children)
+#define isl_ast_node_for_get_body (*isl_pointers__.p_isl_ast_node_for_get_body)
+#define isl_ast_node_for_get_cond (*isl_pointers__.p_isl_ast_node_for_get_cond)
+#define isl_ast_node_for_get_inc (*isl_pointers__.p_isl_ast_node_for_get_inc)
+#define isl_ast_node_for_get_init (*isl_pointers__.p_isl_ast_node_for_get_init)
+#define isl_ast_node_for_get_iterator (*isl_pointers__.p_isl_ast_node_for_get_iterator)
+#define isl_ast_node_free (*isl_pointers__.p_isl_ast_node_free)
+#define isl_ast_node_get_annotation (*isl_pointers__.p_isl_ast_node_get_annotation)
+#define isl_ast_node_get_type (*isl_pointers__.p_isl_ast_node_get_type)
+#define isl_ast_node_if_get_cond (*isl_pointers__.p_isl_ast_node_if_get_cond)
+#define isl_ast_node_if_get_else (*isl_pointers__.p_isl_ast_node_if_get_else)
+#define isl_ast_node_if_get_then (*isl_pointers__.p_isl_ast_node_if_get_then)
+#define isl_ast_node_list_free (*isl_pointers__.p_isl_ast_node_list_free)
+#define isl_ast_node_list_get_ast_node (*isl_pointers__.p_isl_ast_node_list_get_ast_node)
+#define isl_ast_node_list_n_ast_node (*isl_pointers__.p_isl_ast_node_list_n_ast_node)
+#define isl_ast_node_user_get_expr (*isl_pointers__.p_isl_ast_node_user_get_expr)
+#define isl_constraint_set_coefficient_val (*isl_pointers__.p_isl_constraint_set_coefficient_val)
+#define isl_constraint_set_constant_val (*isl_pointers__.p_isl_constraint_set_constant_val)
+#define isl_id_get_user (*isl_pointers__.p_isl_id_get_user)
+#define isl_local_space_get_ctx (*isl_pointers__.p_isl_local_space_get_ctx)
+#define isl_map_fix_val (*isl_pointers__.p_isl_map_fix_val)
+#define isl_options_set_ast_build_atomic_upper_bound (*isl_pointers__.p_isl_options_set_ast_build_atomic_upper_bound)
+#define isl_printer_print_ast_node (*isl_pointers__.p_isl_printer_print_ast_node)
+#define isl_printer_print_str (*isl_pointers__.p_isl_printer_print_str)
+#define isl_printer_set_output_format (*isl_pointers__.p_isl_printer_set_output_format)
+#define isl_pw_aff_mod_val (*isl_pointers__.p_isl_pw_aff_mod_val)
+#define isl_schedule_constraints_compute_schedule (*isl_pointers__.p_isl_schedule_constraints_compute_schedule)
+#define isl_schedule_constraints_on_domain (*isl_pointers__.p_isl_schedule_constraints_on_domain)
+#define isl_schedule_constraints_set_coincidence (*isl_pointers__.p_isl_schedule_constraints_set_coincidence)
+#define isl_schedule_constraints_set_proximity (*isl_pointers__.p_isl_schedule_constraints_set_proximity)
+#define isl_schedule_constraints_set_validity (*isl_pointers__.p_isl_schedule_constraints_set_validity)
+#define isl_set_get_dim_id (*isl_pointers__.p_isl_set_get_dim_id)
+#define isl_set_max_val (*isl_pointers__.p_isl_set_max_val)
+#define isl_set_min_val (*isl_pointers__.p_isl_set_min_val)
+#define isl_set_params (*isl_pointers__.p_isl_set_params)
+#define isl_space_align_params (*isl_pointers__.p_isl_space_align_params)
+#define isl_space_map_from_domain_and_range (*isl_pointers__.p_isl_space_map_from_domain_and_range)
+#define isl_space_set_tuple_name (*isl_pointers__.p_isl_space_set_tuple_name)
+#define isl_space_wrap (*isl_pointers__.p_isl_space_wrap)
+#define isl_union_map_from_domain_and_range (*isl_pointers__.p_isl_union_map_from_domain_and_range)
+#define isl_union_map_range (*isl_pointers__.p_isl_union_map_range)
+#define isl_union_set_union (*isl_pointers__.p_isl_union_set_union)
+#define isl_union_set_universe (*isl_pointers__.p_isl_union_set_universe)
+#define isl_val_2exp (*isl_pointers__.p_isl_val_2exp)
+#define isl_val_add_ui (*isl_pointers__.p_isl_val_add_ui)
+#define isl_val_copy (*isl_pointers__.p_isl_val_copy)
+#define isl_val_free (*isl_pointers__.p_isl_val_free)
+#define isl_val_int_from_si (*isl_pointers__.p_isl_val_int_from_si)
+#define isl_val_int_from_ui (*isl_pointers__.p_isl_val_int_from_ui)
+#define isl_val_mul (*isl_pointers__.p_isl_val_mul)
+#define isl_val_neg (*isl_pointers__.p_isl_val_neg)
+#define isl_val_sub (*isl_pointers__.p_isl_val_sub)
+#define isl_printer_print_union_map (*isl_pointers__.p_isl_printer_print_union_map)
+#define isl_pw_aff_get_ctx (*isl_pointers__.p_isl_pw_aff_get_ctx)
+#define isl_val_is_int (*isl_pointers__.p_isl_val_is_int)
+#define isl_ctx_get_max_operations (*isl_pointers__.p_isl_ctx_get_max_operations)
+#define isl_ctx_set_max_operations (*isl_pointers__.p_isl_ctx_set_max_operations)
+#define isl_ctx_last_error (*isl_pointers__.p_isl_ctx_last_error)
+#define isl_ctx_reset_operations (*isl_pointers__.p_isl_ctx_reset_operations)
+#define isl_map_coalesce (*isl_pointers__.p_isl_map_coalesce)
+#define isl_printer_print_schedule (*isl_pointers__.p_isl_printer_print_schedule)
+#define isl_set_set_dim_id (*isl_pointers__.p_isl_set_set_dim_id)
+#define isl_union_map_coalesce (*isl_pointers__.p_isl_union_map_coalesce)
+#define isl_multi_val_set_val (*isl_pointers__.p_isl_multi_val_set_val)
+#define isl_multi_val_zero (*isl_pointers__.p_isl_multi_val_zero)
+#define isl_options_set_schedule_max_coefficient (*isl_pointers__.p_isl_options_set_schedule_max_coefficient)
+#define isl_options_set_tile_scale_tile_loops (*isl_pointers__.p_isl_options_set_tile_scale_tile_loops)
+#define isl_schedule_copy (*isl_pointers__.p_isl_schedule_copy)
+#define isl_schedule_get_map (*isl_pointers__.p_isl_schedule_get_map)
+#define isl_schedule_map_schedule_node_bottom_up (*isl_pointers__.p_isl_schedule_map_schedule_node_bottom_up)
+#define isl_schedule_node_band_get_permutable (*isl_pointers__.p_isl_schedule_node_band_get_permutable)
+#define isl_schedule_node_band_get_space (*isl_pointers__.p_isl_schedule_node_band_get_space)
+#define isl_schedule_node_band_tile (*isl_pointers__.p_isl_schedule_node_band_tile)
+#define isl_schedule_node_child (*isl_pointers__.p_isl_schedule_node_child)
+#define isl_schedule_node_free (*isl_pointers__.p_isl_schedule_node_free)
+#define isl_schedule_node_get_child (*isl_pointers__.p_isl_schedule_node_get_child)
+#define isl_schedule_node_get_ctx (*isl_pointers__.p_isl_schedule_node_get_ctx)
+#define isl_schedule_node_get_type (*isl_pointers__.p_isl_schedule_node_get_type)
+#define isl_schedule_node_n_children (*isl_pointers__.p_isl_schedule_node_n_children)
+#define isl_union_map_is_equal (*isl_pointers__.p_isl_union_map_is_equal)
+#define isl_union_access_info_compute_flow (*isl_pointers__.p_isl_union_access_info_compute_flow)
+#define isl_union_access_info_from_sink (*isl_pointers__.p_isl_union_access_info_from_sink)
+#define isl_union_access_info_set_may_source (*isl_pointers__.p_isl_union_access_info_set_may_source)
+#define isl_union_access_info_set_must_source (*isl_pointers__.p_isl_union_access_info_set_must_source)
+#define isl_union_access_info_set_schedule (*isl_pointers__.p_isl_union_access_info_set_schedule)
+#define isl_union_flow_free (*isl_pointers__.p_isl_union_flow_free)
+#define isl_union_flow_get_may_dependence (*isl_pointers__.p_isl_union_flow_get_may_dependence)
+#define isl_union_flow_get_must_dependence (*isl_pointers__.p_isl_union_flow_get_must_dependence)
+#define isl_aff_var_on_domain (*isl_pointers__.p_isl_aff_var_on_domain)
+#define isl_multi_aff_from_aff (*isl_pointers__.p_isl_multi_aff_from_aff)
+#define isl_schedule_get_ctx (*isl_pointers__.p_isl_schedule_get_ctx)
+#define isl_multi_aff_set_tuple_id (*isl_pointers__.p_isl_multi_aff_set_tuple_id)
+#define isl_multi_aff_dim (*isl_pointers__.p_isl_multi_aff_dim)
+#define isl_schedule_get_domain (*isl_pointers__.p_isl_schedule_get_domain)
+#define isl_union_set_is_empty (*isl_pointers__.p_isl_union_set_is_empty)
+#define isl_union_set_get_space (*isl_pointers__.p_isl_union_set_get_space)
+#define isl_union_pw_multi_aff_empty (*isl_pointers__.p_isl_union_pw_multi_aff_empty)
+#define isl_union_set_foreach_set (*isl_pointers__.p_isl_union_set_foreach_set)
+#define isl_union_set_free (*isl_pointers__.p_isl_union_set_free)
+#define isl_multi_union_pw_aff_from_union_pw_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_from_union_pw_multi_aff)
+#define isl_multi_union_pw_aff_apply_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_apply_multi_aff)
+#define isl_schedule_insert_partial_schedule (*isl_pointers__.p_isl_schedule_insert_partial_schedule)
+#define isl_union_pw_multi_aff_free (*isl_pointers__.p_isl_union_pw_multi_aff_free)
+#define isl_pw_multi_aff_project_out_map (*isl_pointers__.p_isl_pw_multi_aff_project_out_map)
+#define isl_union_pw_multi_aff_add_pw_multi_aff (*isl_pointers__.p_isl_union_pw_multi_aff_add_pw_multi_aff)
+#define isl_schedule_from_domain (*isl_pointers__.p_isl_schedule_from_domain)
+#define isl_schedule_sequence (*isl_pointers__.p_isl_schedule_sequence)
+#define isl_ast_build_node_from_schedule (*isl_pointers__.p_isl_ast_build_node_from_schedule)
+#define isl_ast_node_mark_get_node (*isl_pointers__.p_isl_ast_node_mark_get_node)
+#define isl_schedule_node_band_member_get_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_get_ast_loop_type)
+#define isl_schedule_node_band_member_set_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_set_ast_loop_type)
+#define isl_val_n_abs_num_chunks (*isl_pointers__.p_isl_val_n_abs_num_chunks)
+#define isl_val_get_abs_num_chunks (*isl_pointers__.p_isl_val_get_abs_num_chunks)
+#define isl_val_int_from_chunks (*isl_pointers__.p_isl_val_int_from_chunks)
+#define isl_val_is_neg (*isl_pointers__.p_isl_val_is_neg)
+#define isl_version (*isl_pointers__.p_isl_version)
+#define isl_options_get_on_error (*isl_pointers__.p_isl_options_get_on_error)
+#define isl_ctx_reset_error (*isl_pointers__.p_isl_ctx_reset_error)
typedef struct poly_dr *poly_dr_p;
@@ -461,5 +1045,6 @@ extern void build_scops (vec<scop_p> *);
extern void dot_all_sese (FILE *, vec<sese_l> &);
extern void dot_sese (sese_l &);
extern void dot_cfg ();
+extern const char *get_isl_version (bool);
#endif
--- gcc/graphite.c.jj 2015-11-04 14:15:32.000000000 +0100
+++ gcc/graphite.c 2015-11-04 14:56:02.645536409 +0100
@@ -60,6 +60,35 @@ along with GCC; see the file COPYING3.
#include "tree-into-ssa.h"
#include "graphite.h"
+__typeof (isl_pointers__) isl_pointers__;
+
+static bool
+init_isl_pointers (void)
+{
+ void *h;
+
+ if (isl_pointers__.inited)
+ return isl_pointers__.h != NULL;
+ h = dlopen ("libisl.so.15", RTLD_LAZY);
+ isl_pointers__.h = h;
+ if (h == NULL)
+ return false;
+#define DYNSYM(x) \
+ do \
+ { \
+ union { __typeof (isl_pointers__.p_##x) p; void *q; } u; \
+ u.q = dlsym (h, #x); \
+ if (u.q == NULL) \
+ return false; \
+ isl_pointers__.p_##x = u.p; \
+ } \
+ while (0)
+ DYNSYMS
+#undef DYNSYM
+ isl_pointers__.inited = true;
+ return true;
+}
+
/* Print global statistics to FILE. */
static void
@@ -365,6 +394,15 @@ graphite_transform_loops (void)
if (parallelized_function_p (cfun->decl))
return;
+ if (number_of_loops (cfun) <= 1)
+ return;
+
+ if (!init_isl_pointers ())
+ {
+ sorry ("Graphite loop optimizations cannot be used");
+ return;
+ }
+
calculate_dominance_info (CDI_DOMINATORS);
/* We rely on post-dominators during merging of SESE regions so those
@@ -455,6 +493,14 @@ graphite_transform_loops (void)
}
}
+const char *
+get_isl_version (bool force)
+{
+ if (force)
+ init_isl_pointers ();
+ return (isl_pointers__.inited && isl_version) ? isl_version () : "none";
+}
+
#else /* If isl is not available: #ifndef HAVE_isl. */
static void
--- gcc/toplev.c.jj 2017-02-19 13:02:31.000000000 +0100
+++ gcc/toplev.c 2017-02-19 16:50:25.536301350 +0100
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.
#ifdef HAVE_isl
#include <isl/version.h>
+extern const char *get_isl_version (bool);
#endif
static void general_init (const char *, bool);
@@ -683,7 +684,7 @@ print_version (FILE *file, const char *i
#ifndef HAVE_isl
"none"
#else
- isl_version ()
+ get_isl_version (*indent == 0)
#endif
);
if (strcmp (GCC_GMP_STRINGIFY_VERSION, gmp_version))

View File

@ -0,0 +1,14 @@
--- libgcc/config/t-slibgcc.mp 2018-10-03 16:07:00.336990246 -0400
+++ libgcc/config/t-slibgcc 2018-10-03 16:06:26.719946740 -0400
@@ -30,9 +30,10 @@ SHLIB_LC = -lc
SHLIB_MAKE_SOLINK = $(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
SHLIB_INSTALL_SOLINK = $(LN_S) $(SHLIB_SONAME) \
$(DESTDIR)$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+SHLIB_EXTRA_LDFLAGS = -Wl,-z,relro -Wl,-z,now
SHLIB_LINK = $(CC) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
- $(SHLIB_LDFLAGS) \
+ $(SHLIB_LDFLAGS) $(SHLIB_EXTRA_LDFLAGS) \
-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
$(SHLIB_OBJS) $(SHLIB_LC) && \
rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \

View File

@ -0,0 +1,17 @@
2008-06-09 Jakub Jelinek <jakub@redhat.com>
* omp.h.in (omp_nest_lock_t): Fix up for Linux multilibs.
--- libgomp/omp.h.in.jj 2008-06-09 13:34:05.000000000 +0200
+++ libgomp/omp.h.in 2008-06-09 13:34:48.000000000 +0200
@@ -42,8 +42,8 @@ typedef struct
typedef struct
{
- unsigned char _x[@OMP_NEST_LOCK_SIZE@]
- __attribute__((__aligned__(@OMP_NEST_LOCK_ALIGN@)));
+ unsigned char _x[8 + sizeof (void *)]
+ __attribute__((__aligned__(sizeof (void *))));
} omp_nest_lock_t;
#endif

View File

@ -0,0 +1,24 @@
--- libstdc++-v3/doc/html/index.html.jj 2011-01-03 12:53:21.282829010 +0100
+++ libstdc++-v3/doc/html/index.html 2011-01-04 18:06:28.999851145 +0100
@@ -5,6 +5,8 @@
<a class="link" href="https://www.fsf.org" target="_top">FSF
</a>
</p><p>
+ Release 8.1.1
+ </p><p>
Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation
License, Version 1.2 or any later version published by the
--- libstdc++-v3/doc/html/api.html.jj 2011-01-03 12:53:21.000000000 +0100
+++ libstdc++-v3/doc/html/api.html 2011-01-04 18:12:01.672757784 +0100
@@ -20,7 +20,9 @@
member functions for the library classes, finding out what is in a
particular include file, looking at inheritance diagrams, etc.
</p><p>
- The API documentation, rendered into HTML, can be viewed online
+ The API documentation, rendered into HTML, can be viewed locally
+ <a class="link" href="api/index.html" target="_top">for the 8.1.1 release</a>,
+ online
<a class="link" href="http://gcc.gnu.org/onlinedocs/" target="_top">for each GCC release</a>
and
<a class="link" href="http://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen/index.html" target="_top">

View File

@ -0,0 +1,27 @@
libtool sucks.
--- ltmain.sh.jj 2007-12-07 14:53:21.000000000 +0100
+++ ltmain.sh 2008-09-05 21:51:48.000000000 +0200
@@ -5394,6 +5394,7 @@ EOF
rpath="$finalize_rpath"
test "$mode" != relink && rpath="$compile_rpath$rpath"
for libdir in $rpath; do
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
if test -n "$hardcode_libdir_flag_spec"; then
if test -n "$hardcode_libdir_separator"; then
if test -z "$hardcode_libdirs"; then
@@ -6071,6 +6072,7 @@ EOF
rpath=
hardcode_libdirs=
for libdir in $compile_rpath $finalize_rpath; do
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
if test -n "$hardcode_libdir_flag_spec"; then
if test -n "$hardcode_libdir_separator"; then
if test -z "$hardcode_libdirs"; then
@@ -6120,6 +6122,7 @@ EOF
rpath=
hardcode_libdirs=
for libdir in $finalize_rpath; do
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
if test -n "$hardcode_libdir_flag_spec"; then
if test -n "$hardcode_libdir_separator"; then
if test -z "$hardcode_libdirs"; then

17
SOURCES/gcc8-mcet.patch Normal file
View File

@ -0,0 +1,17 @@
2018-04-24 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.opt (mcet): Remporarily re-add as alias to -mshstk.
--- gcc/config/i386/i386.opt (revision 259613)
+++ gcc/config/i386/i386.opt (revision 259612)
@@ -1006,6 +1006,10 @@ mgeneral-regs-only
Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Var(ix86_target_flags) Save
Generate code which uses only the general registers.
+mcet
+Target Undocumented Alias(mshstk)
+;; Deprecated
+
mshstk
Target Report Mask(ISA_SHSTK) Var(ix86_isa_flags) Save
Enable shadow stack built-in functions from Control-flow Enforcement

View File

@ -0,0 +1,50 @@
2010-02-08 Roland McGrath <roland@redhat.com>
* config/rs6000/sysv4.h (LINK_EH_SPEC): Pass --no-add-needed to the
linker.
* config/gnu-user.h (LINK_EH_SPEC): Likewise.
* config/alpha/elf.h (LINK_EH_SPEC): Likewise.
* config/ia64/linux.h (LINK_EH_SPEC): Likewise.
--- gcc/config/alpha/elf.h.jj 2011-01-03 12:52:31.118056764 +0100
+++ gcc/config/alpha/elf.h 2011-01-04 18:14:10.931874160 +0100
@@ -168,5 +168,5 @@ extern int alpha_this_gpdisp_sequence_nu
I imagine that other systems will catch up. In the meantime, it
doesn't harm to make sure that the data exists to be used later. */
#if defined(HAVE_LD_EH_FRAME_HDR)
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
#endif
--- gcc/config/ia64/linux.h.jj 2011-01-03 13:02:11.462994522 +0100
+++ gcc/config/ia64/linux.h 2011-01-04 18:14:10.931874160 +0100
@@ -76,7 +76,7 @@ do { \
Signalize that because we have fde-glibc, we don't need all C shared libs
linked against -lgcc_s. */
#undef LINK_EH_SPEC
-#define LINK_EH_SPEC ""
+#define LINK_EH_SPEC "--no-add-needed "
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS ia64_soft_fp_init_libfuncs
--- gcc/config/gnu-user.h.jj 2011-01-03 12:53:03.739057299 +0100
+++ gcc/config/gnu-user.h 2011-01-04 18:14:10.932814884 +0100
@@ -133,7 +133,7 @@ see the files COPYING3 and COPYING.RUNTI
#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
#if defined(HAVE_LD_EH_FRAME_HDR)
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
#endif
#undef LINK_GCC_C_SEQUENCE_SPEC
--- gcc/config/rs6000/sysv4.h.jj 2011-01-03 13:02:18.255994215 +0100
+++ gcc/config/rs6000/sysv4.h 2011-01-04 18:14:10.933888871 +0100
@@ -816,7 +816,7 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF
-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
#if defined(HAVE_LD_EH_FRAME_HDR)
-# define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
+# define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
#endif
#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \

View File

@ -0,0 +1,84 @@
PR libgcc/60790
x86: Do not assume ELF constructors run before IFUNC resolvers.
* config/x86/host-config.h (libat_feat1_ecx, libat_feat1_edx):
Remove declarations.
(__libat_feat1, __libat_feat1_init): Declare.
(FEAT1_REGISTER): Define.
(load_feat1): New function.
(IFUNC_COND_1): Adjust.
* config/x86/init.c (libat_feat1_ecx, libat_feat1_edx)
(init_cpuid): Remove definitions.
(__libat_feat1): New variable.
(__libat_feat1_init): New function.
--- libatomic/config/x86/host-config.h (revision 264990)
+++ libatomic/config/x86/host-config.h (working copy)
@@ -25,13 +25,39 @@
#if HAVE_IFUNC
#include <cpuid.h>
-extern unsigned int libat_feat1_ecx HIDDEN;
-extern unsigned int libat_feat1_edx HIDDEN;
+#ifdef __x86_64__
+# define FEAT1_REGISTER ecx
+#else
+# define FEAT1_REGISTER edx
+#endif
+/* Value of the CPUID feature register FEAT1_REGISTER for the cmpxchg
+ bit for IFUNC_COND1 below. */
+extern unsigned int __libat_feat1 HIDDEN;
+
+/* Initialize libat_feat1 and return its value. */
+unsigned int __libat_feat1_init (void) HIDDEN;
+
+/* Return the value of the relevant feature register for the relevant
+ cmpxchg bit, or 0 if there is no CPUID support. */
+static inline unsigned int
+__attribute__ ((const))
+load_feat1 (void)
+{
+ /* See the store in __libat_feat1_init. */
+ unsigned int feat1 = __atomic_load_n (&__libat_feat1, __ATOMIC_RELAXED);
+ if (feat1 == 0)
+ /* Assume that initialization has not happened yet. This may get
+ called repeatedly if the CPU does not have any feature bits at
+ all. */
+ feat1 = __libat_feat1_init ();
+ return feat1;
+}
+
#ifdef __x86_64__
-# define IFUNC_COND_1 (libat_feat1_ecx & bit_CMPXCHG16B)
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
#else
-# define IFUNC_COND_1 (libat_feat1_edx & bit_CMPXCHG8B)
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
#endif
#ifdef __x86_64__
--- libatomic/config/x86/init.c (revision 264990)
+++ libatomic/config/x86/init.c (working copy)
@@ -26,13 +26,17 @@
#if HAVE_IFUNC
-unsigned int libat_feat1_ecx, libat_feat1_edx;
+unsigned int __libat_feat1;
-static void __attribute__((constructor))
-init_cpuid (void)
+unsigned int
+__libat_feat1_init (void)
{
- unsigned int eax, ebx;
- __get_cpuid (1, &eax, &ebx, &libat_feat1_ecx, &libat_feat1_edx);
+ unsigned int eax, ebx, ecx, edx;
+ FEAT1_REGISTER = 0;
+ __get_cpuid (1, &eax, &ebx, &ecx, &edx);
+ /* See the load in load_feat1. */
+ __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
+ return FEAT1_REGISTER;
}
#endif /* HAVE_IFUNC */

View File

@ -0,0 +1,445 @@
--- gcc/config/aarch64/aarch64.c
+++ gcc/config/aarch64/aarch64.c
@@ -3799,7 +3799,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
output_asm_insn ("sub\t%0, %0, %1", xops);
/* Probe at TEST_ADDR. */
- output_asm_insn ("str\txzr, [%0]", xops);
+ if (flag_stack_clash_protection)
+ {
+ gcc_assert (xops[0] == stack_pointer_rtx);
+ xops[1] = GEN_INT (PROBE_INTERVAL - 8);
+ output_asm_insn ("str\txzr, [%0, %1]", xops);
+ }
+ else
+ output_asm_insn ("str\txzr, [%0]", xops);
/* Test if TEST_ADDR == LAST_ADDR. */
xops[1] = reg2;
@@ -4589,6 +4596,133 @@ aarch64_set_handled_components (sbitmap components)
cfun->machine->reg_is_wrapped_separately[regno] = true;
}
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
+ registers. */
+
+static void
+aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ poly_int64 poly_size)
+{
+ HOST_WIDE_INT size;
+ if (!poly_size.is_constant (&size))
+ {
+ sorry ("stack probes for SVE frames");
+ return;
+ }
+
+ HOST_WIDE_INT probe_interval
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+ HOST_WIDE_INT guard_size
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+ HOST_WIDE_INT guard_used_by_caller = 1024;
+
+ /* SIZE should be large enough to require probing here. ie, it
+ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
+
+ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
+ without any probing. */
+ gcc_assert (size >= guard_size - guard_used_by_caller);
+ aarch64_sub_sp (temp1, temp2, guard_size - guard_used_by_caller, true);
+ HOST_WIDE_INT orig_size = size;
+ size -= (guard_size - guard_used_by_caller);
+
+ HOST_WIDE_INT rounded_size = size & -probe_interval;
+ HOST_WIDE_INT residual = size - rounded_size;
+
+ /* We can handle a small number of allocations/probes inline. Otherwise
+ punt to a loop. */
+ if (rounded_size && rounded_size <= 4 * probe_interval)
+ {
+ /* We don't use aarch64_sub_sp here because we don't want to
+ repeatedly load TEMP1. */
+ rtx step = GEN_INT (-probe_interval);
+ if (probe_interval > ARITH_FACTOR)
+ {
+ emit_move_insn (temp1, step);
+ step = temp1;
+ }
+
+ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
+ {
+ rtx_insn *insn = emit_insn (gen_add2_insn (stack_pointer_rtx, step));
+ add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
+
+ if (probe_interval > ARITH_FACTOR)
+ {
+ RTX_FRAME_RELATED_P (insn) = 1;
+ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ gen_rtx_SET (stack_pointer_rtx, adj));
+ }
+
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+ (probe_interval
+ - GET_MODE_SIZE (word_mode))));
+ emit_insn (gen_blockage ());
+ }
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+ }
+ else if (rounded_size)
+ {
+ /* Compute the ending address. */
+ unsigned int scratchreg = REGNO (temp1);
+ emit_move_insn (temp1, GEN_INT (-rounded_size));
+ rtx_insn *insn
+ = emit_insn (gen_add3_insn (temp1, stack_pointer_rtx, temp1));
+
+ /* For the initial allocation, we don't have a frame pointer
+ set up, so we always need CFI notes. If we're doing the
+ final allocation, then we may have a frame pointer, in which
+ case it is the CFA, otherwise we need CFI notes.
+
+ We can determine which allocation we are doing by looking at
+ the temporary register. IP0 is the initial allocation, IP1
+ is the final allocation. */
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
+ {
+ /* We want the CFA independent of the stack pointer for the
+ duration of the loop. */
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, temp1,
+ (rounded_size + (orig_size - size))));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ /* This allocates and probes the stack.
+
+ It also probes at a 4k interval regardless of the value of
+ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
+ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
+ stack_pointer_rtx, temp1));
+
+ /* Now reset the CFA register if needed. */
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
+ {
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_pointer_rtx,
+ (rounded_size + (orig_size - size))));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ emit_insn (gen_blockage ());
+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
+ }
+ else
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+
+ /* Handle any residuals.
+ Note that any residual must be probed. */
+ if (residual)
+ {
+ aarch64_sub_sp (temp1, temp2, residual, true);
+ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+ (residual - GET_MODE_SIZE (word_mode))));
+ emit_insn (gen_blockage ());
+ }
+ return;
+}
+
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
is saved at BASE + OFFSET. */
@@ -4686,7 +4820,54 @@ aarch64_expand_prologue (void)
rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
- aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
+ /* We do not fully protect aarch64 against stack clash style attacks
+ as doing so would be prohibitively expensive with less utility over
+ time as newer compilers are deployed.
+
+ We assume the guard is at least 64k. Furthermore, we assume that
+ the caller has not pushed the stack pointer more than 1k into
+ the guard. A caller that pushes the stack pointer than 1k into
+ the guard is considered invalid.
+
+ Note that the caller's ability to push the stack pointer into the
+ guard is a function of the number and size of outgoing arguments and/or
+ dynamic stack allocations due to the mandatory save of the link register
+ in the caller's frame.
+
+ With those assumptions the callee can allocate up to 63k of stack
+ space without probing.
+
+ When probing is needed, we emit a probe at the start of the prologue
+ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
+
+ We have to track how much space has been allocated, but we do not
+ track stores into the stack as implicit probes except for the
+ fp/lr store. */
+ HOST_WIDE_INT guard_size
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+ HOST_WIDE_INT guard_used_by_caller = 1024;
+ if (flag_stack_clash_protection)
+ {
+ if (known_eq (frame_size, 0))
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
+ else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
+ && known_lt (final_adjust, guard_size - guard_used_by_caller))
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+ }
+
+ /* In theory we should never have both an initial adjustment
+ and a callee save adjustment. Verify that is the case since the
+ code below does not handle it for -fstack-clash-protection. */
+ gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
+
+ /* Only probe if the initial adjustment is larger than the guard
+ less the amount of the guard reserved for use by the caller's
+ outgoing args. */
+ if (flag_stack_clash_protection
+ && maybe_ge (initial_adjust, guard_size - guard_used_by_caller))
+ aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust);
+ else
+ aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
if (callee_adjust != 0)
aarch64_push_regs (reg1, reg2, callee_adjust);
@@ -4742,7 +4923,31 @@ aarch64_expand_prologue (void)
callee_adjust != 0 || emit_frame_chain);
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || emit_frame_chain);
- aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
+
+ /* We may need to probe the final adjustment as well. */
+ if (flag_stack_clash_protection && maybe_ne (final_adjust, 0))
+ {
+ /* First probe if the final adjustment is larger than the guard size
+ less the amount of the guard reserved for use by the caller's
+ outgoing args. */
+ if (maybe_ge (final_adjust, guard_size - guard_used_by_caller))
+ aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx,
+ final_adjust);
+ else
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
+
+ /* We must also probe if the final adjustment is larger than the guard
+ that is assumed used by the caller. This may be sub-optimal. */
+ if (maybe_ge (final_adjust, guard_used_by_caller))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Stack clash aarch64 large outgoing arg, probing\n");
+ emit_stack_probe (stack_pointer_rtx);
+ }
+ }
+ else
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
}
/* Return TRUE if we can use a simple_return insn.
@@ -10476,6 +10681,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
opts->x_flag_prefetch_loop_arrays = 1;
+ /* We assume the guard page is 64k. */
+ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
+ 16,
+ opts->x_param_values,
+ global_options_set.x_param_values);
+
aarch64_override_options_after_change_1 (opts);
}
@@ -17161,6 +17372,28 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
}
}
+/* It has been decided that to allow up to 1kb of outgoing argument
+ space to be allocated w/o probing. If more than 1kb of outgoing
+ argment space is allocated, then it must be probed and the last
+ probe must occur no more than 1kbyte away from the end of the
+ allocated space.
+
+ This implies that the residual part of an alloca allocation may
+ need probing in cases where the generic code might not otherwise
+ think a probe is needed.
+
+ This target hook returns TRUE when allocating RESIDUAL bytes of
+ alloca space requires an additional probe, otherwise FALSE is
+ returned. */
+
+static bool
+aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
+{
+ return (residual == CONST0_RTX (Pmode)
+ || GET_CODE (residual) != CONST_INT
+ || INTVAL (residual) >= 1024);
+}
+
/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
static int
@@ -17669,6 +17902,10 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
+#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
+#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
+ aarch64_stack_clash_protection_final_dynamic_probe
+
#undef TARGET_COMPUTE_PRESSURE_CLASSES
#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
--- gcc/config/aarch64/aarch64.md
+++ gcc/config/aarch64/aarch64.md
@@ -5812,7 +5812,7 @@
)
(define_insn "probe_stack_range"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=rk")
(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
(match_operand:DI 2 "register_operand" "r")]
UNSPECV_PROBE_STACK_RANGE))]
--- gcc/testsuite/gcc.target/aarch64/stack-check-12.c
+++ gcc/testsuite/gcc.target/aarch64/stack-check-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+extern void arf (unsigned long int *, unsigned long int *);
+void
+frob ()
+{
+ unsigned long int num[1000];
+ unsigned long int den[1000];
+ arf (den, num);
+}
+
+/* This verifies that the scheduler did not break the dependencies
+ by adjusting the offsets within the probe and that the scheduler
+ did not reorder around the stack probes. */
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
+
+
+
--- gcc/testsuite/gcc.target/aarch64/stack-check-13.c
+++ gcc/testsuite/gcc.target/aarch64/stack-check-13.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
+void out1(ARG192(__int128));
+int t1(int);
+
+int t3(int x)
+{
+ if (x < 1000)
+ return t1 (x) + 1;
+
+ out1 (ARG192(1));
+ return 0;
+}
+
+
+
+/* This test creates a large (> 1k) outgoing argument area that needs
+ to be probed. We don't test the exact size of the space or the
+ exact offset to make the test a little less sensitive to trivial
+ output changes. */
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
+
+
+
--- gcc/testsuite/gcc.target/aarch64/stack-check-14.c
+++ gcc/testsuite/gcc.target/aarch64/stack-check-14.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+int t1(int);
+
+int t2(int x)
+{
+ char *p = __builtin_alloca (4050);
+ x = t1 (x);
+ return p[x];
+}
+
+
+/* This test has a constant sized alloca that is smaller than the
+ probe interval. But it actually requires two probes instead
+ of one because of the optimistic assumptions we made in the
+ aarch64 prologue code WRT probing state.
+
+ The form can change quite a bit so we just check for two
+ probes without looking at the actual address. */
+/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
+
+
+
--- gcc/testsuite/gcc.target/aarch64/stack-check-15.c
+++ gcc/testsuite/gcc.target/aarch64/stack-check-15.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+int t1(int);
+
+int t2(int x)
+{
+ char *p = __builtin_alloca (x);
+ x = t1 (x);
+ return p[x];
+}
+
+
+/* This test has a variable sized alloca. It requires 3 probes.
+ One in the loop, one for the residual and at the end of the
+ alloca area.
+
+ The form can change quite a bit so we just check for two
+ probes without looking at the actual address. */
+/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
+
+
+
--- gcc/testsuite/lib/target-supports.exp
+++ gcc/testsuite/lib/target-supports.exp
@@ -9201,14 +9201,9 @@ proc check_effective_target_autoincdec { } {
#
proc check_effective_target_supports_stack_clash_protection { } {
- # Temporary until the target bits are fully ACK'd.
-# if { [istarget aarch*-*-*] } {
-# return 1
-# }
-
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|| [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
- || [istarget s390*-*-*] } {
+ || [istarget aarch64*-**] || [istarget s390*-*-*] } {
return 1
}
return 0
@@ -9217,9 +9212,9 @@ proc check_effective_target_supports_stack_clash_protection { } {
# Return 1 if the target creates a frame pointer for non-leaf functions
# Note we ignore cases where we apply tail call optimization here.
proc check_effective_target_frame_pointer_for_non_leaf { } {
- if { [istarget aarch*-*-*] } {
- return 1
- }
+# if { [istarget aarch*-*-*] } {
+# return 1
+# }
# Solaris/x86 defaults to -fno-omit-frame-pointer.
if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } {

View File

@ -0,0 +1,31 @@
crt files and statically linked libgcc objects cause false positives
in annobin coverage, so we add the assembler flag to generate notes
for them.
The patch also adds notes to libgcc_s.so, but this is harmless because
these notes only confer that there is no other annobin markup.
2018-07-25 Florian Weimer <fweimer@redhat.com>
* Makefile.in (LIBGCC2_CFLAGS, CRTSTUFF_CFLAGS): Add
-Wa,--generate-missing-build-notes=yes.
--- libgcc/Makefile.in 2018-01-13 13:05:41.000000000 +0100
+++ libgcc/Makefile.in 2018-07-25 13:15:02.036226940 +0200
@@ -244,6 +244,7 @@
LIBGCC2_CFLAGS = -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS) $(HOST_LIBGCC2_CFLAGS) \
$(LIBGCC2_DEBUG_CFLAGS) -DIN_LIBGCC2 \
-fbuilding-libgcc -fno-stack-protector \
+ -Wa,--generate-missing-build-notes=yes \
$(INHIBIT_LIBC_CFLAGS)
# Additional options to use when compiling libgcc2.a.
@@ -297,6 +298,7 @@
$(NO_PIE_CFLAGS) -finhibit-size-directive -fno-inline -fno-exceptions \
-fno-zero-initialized-in-bss -fno-toplevel-reorder -fno-tree-vectorize \
-fbuilding-libgcc -fno-stack-protector $(FORCE_EXPLICIT_EH_REGISTRY) \
+ -Wa,--generate-missing-build-notes=yes \
$(INHIBIT_LIBC_CFLAGS)
# Extra flags to use when compiling crt{begin,end}.o.

View File

@ -0,0 +1,85 @@
2018-08-03 David Malcolm <dmalcolm@redhat.com>
* doc/gcov.texi (-x): Remove duplicate "to".
* doc/invoke.texi (-Wnoexcept-type): Remove duplicate "calls".
(-Wif-not-aligned): Remove duplicate "is".
(-flto): Remove duplicate "the".
(MicroBlaze Options): In examples of "-mcpu=cpu-type", remove
duplicate "v5.00.b".
(MSP430 Options): Remove duplicate "and" from the description
of "-mgprel-sec=regexp".
(x86 Options): Remove duplicate copies of "vmldLog102" and
vmlsLog104 from description of "-mveclibabi=type".
--- gcc/doc/gcov.texi
+++ gcc/doc/gcov.texi
@@ -340,7 +340,7 @@ Print verbose informations related to basic blocks and arcs.
@item -x
@itemx --hash-filenames
-By default, gcov uses the full pathname of the source files to to create
+By default, gcov uses the full pathname of the source files to create
an output filename. This can lead to long filenames that can overflow
filesystem limits. This option creates names of the form
@file{@var{source-file}##@var{md5}.gcov},
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -3056,7 +3056,7 @@ void h() @{ f(g); @}
@end smallexample
@noindent
-In C++14, @code{f} calls calls @code{f<void(*)()>}, but in
+In C++14, @code{f} calls @code{f<void(*)()>}, but in
C++17 it calls @code{f<void(*)()noexcept>}.
@item -Wclass-memaccess @r{(C++ and Objective-C++ only)}
@@ -4587,7 +4587,7 @@ The @option{-Wimplicit-fallthrough=3} warning is enabled by @option{-Wextra}.
@opindex Wif-not-aligned
@opindex Wno-if-not-aligned
Control if warning triggered by the @code{warn_if_not_aligned} attribute
-should be issued. This is is enabled by default.
+should be issued. This is enabled by default.
Use @option{-Wno-if-not-aligned} to disable it.
@item -Wignored-qualifiers @r{(C and C++ only)}
@@ -9613,7 +9613,7 @@ for LTO, use @command{gcc-ar} and @command{gcc-ranlib} instead of @command{ar}
and @command{ranlib};
to show the symbols of object files with GIMPLE bytecode, use
@command{gcc-nm}. Those commands require that @command{ar}, @command{ranlib}
-and @command{nm} have been compiled with plugin support. At link time, use the the
+and @command{nm} have been compiled with plugin support. At link time, use the
flag @option{-fuse-linker-plugin} to ensure that the library participates in
the LTO optimization process:
@@ -20159,7 +20159,7 @@ Use features of, and schedule code for, the given CPU.
Supported values are in the format @samp{v@var{X}.@var{YY}.@var{Z}},
where @var{X} is a major version, @var{YY} is the minor version, and
@var{Z} is compatibility code. Example values are @samp{v3.00.a},
-@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v5.00.b}, @samp{v6.00.a}.
+@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v6.00.a}.
@item -mxl-soft-mul
@opindex mxl-soft-mul
@@ -21839,7 +21839,7 @@ GP-relative addressing. It is most useful in conjunction with
The @var{regexp} is a POSIX Extended Regular Expression.
This option does not affect the behavior of the @option{-G} option, and
-and the specified sections are in addition to the standard @code{.sdata}
+the specified sections are in addition to the standard @code{.sdata}
and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}.
@item -mr0rel-sec=@var{regexp}
@@ -27613,11 +27613,11 @@ To use this option, both @option{-ftree-vectorize} and
ABI-compatible library must be specified at link time.
GCC currently emits calls to @code{vmldExp2},
-@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
+@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
-@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
+@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},

View File

@ -0,0 +1,124 @@
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon Nov 26 15:15:57 2018 +0000
S/390: Fix flogr RTX.
The flogr instruction uses a 64 bit register pair target operand. In
the RTX we model this as a write to a TImode register. Unfortunately
the RTX's being assigned to the two parts of the target operand were
swapped. This is no problem if in the end the flogr instruction will
be emitted since the instruction still does what the clzdi expander
expects. However, a problem arises when the RTX is used to optimize
CLZ for a constant input operand. Even then it matters only if the
expression couldn't be folded on tree level already.
In the testcase this happened thanks to loop unrolling on RTL level.
The iteration variable is used as an argument to the clz
builtin. Due to the loop unrolling it becomes a constant and after
folding the broken RTX leads to a wrong assumption.
gcc/ChangeLog:
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
Backport from mainline
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
DImode parts of the target operand.
gcc/testsuite/ChangeLog:
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
Backport from mainline
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/flogr-1.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index c4d391bc9b5..53bb1985285 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -8861,17 +8861,17 @@
DONE;
})
+; CLZ result is in hard reg op0 - this is the high part of the target operand
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
(define_insn "clztidi2"
[(set (match_operand:TI 0 "register_operand" "=d")
(ior:TI
- (ashift:TI
- (zero_extend:TI
- (xor:DI (match_operand:DI 1 "register_operand" "d")
- (lshiftrt (match_operand:DI 2 "const_int_operand" "")
- (subreg:SI (clz:DI (match_dup 1)) 4))))
-
- (const_int 64))
- (zero_extend:TI (clz:DI (match_dup 1)))))
+ (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
+ (const_int 64))
+ (zero_extend:TI
+ (xor:DI (match_dup 1)
+ (lshiftrt (match_operand:DI 2 "const_int_operand" "")
+ (subreg:SI (clz:DI (match_dup 1)) 4))))))
(clobber (reg:CC CC_REGNUM))]
"UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
&& TARGET_EXTIMM && TARGET_ZARCH"
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
new file mode 100644
index 00000000000..a3869000d62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
+/* { dg-require-effective-target stdint_types } */
+
+/* Folding of the FLOGR caused a wrong value to be returned by
+ __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
+ The problematic folding can only be triggered with constants inputs
+ introduced on RTL level. In this case it happens with loop
+ unrolling. */
+
+#include <stdint.h>
+#include <assert.h>
+
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
+ if (x <= 1) {
+ return x;
+ }
+ int msb_on_index;
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
+ assert(msb_on_index < 31);
+ return 1U << (msb_on_index + 1);
+}
+
+void __attribute__((noinline,noclone))
+die (int a)
+{
+ if (a)
+ __builtin_abort ();
+}
+
+void test_pow2_ceil_u32(void) {
+ unsigned i;
+
+ for (i = 0; i < 18; i++) {
+ uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
+ if (!(a_ == (((uint32_t)1) << i))) {
+ die(1);
+ }
+ }
+}
+
+int
+main(void) {
+ test_pow2_ceil_u32();
+
+ return 0;
+}

View File

@ -0,0 +1,572 @@
commit 87c504d3b293ebe6d36f3b50696cd307b02b0daa
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue Jun 19 21:23:39 2018 +0000
2018-06-19 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (select_block_compare_mode): Check
TARGET_EFFICIENT_OVERLAPPING_UNALIGNED here instead of in caller.
(do_and3, do_and3_mask, do_compb3, do_rotl3): New functions.
(expand_block_compare): Change select_block_compare_mode call.
(expand_strncmp_align_check): Use new functions, fix comment.
(emit_final_str_compare_gpr): New function.
(expand_strn_compare): Refactor and clean up code.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Remove *.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261769 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 632d3359711..f9dd54eb639 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -266,6 +266,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
else if (bytes == GET_MODE_SIZE (QImode))
return QImode;
else if (bytes < GET_MODE_SIZE (SImode)
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& offset >= GET_MODE_SIZE (SImode) - bytes)
/* This matches the case were we have SImode and 3 bytes
and offset >= 1 and permits us to move back one and overlap
@@ -273,6 +274,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
unwanted bytes off of the input. */
return SImode;
else if (word_mode_ok && bytes < UNITS_PER_WORD
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& offset >= UNITS_PER_WORD-bytes)
/* Similarly, if we can use DImode it will get matched here and
can do an overlapping read that ends at the end of the block. */
@@ -408,6 +410,54 @@ do_add3 (rtx dest, rtx src1, rtx src2)
emit_insn (gen_addsi3 (dest, src1, src2));
}
+/* Emit an and of the proper mode for DEST.
+
+ DEST is the destination register for the and.
+ SRC1 is the first and input.
+ SRC2 is the second and input.
+
+ Computes DEST = SRC1&SRC2. */
+static void
+do_and3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_anddi3 (dest, src1, src2));
+ else
+ emit_insn (gen_andsi3 (dest, src1, src2));
+}
+
+/* Emit an cmpb of the proper mode for DEST.
+
+ DEST is the destination register for the cmpb.
+ SRC1 is the first input.
+ SRC2 is the second input.
+
+ Computes cmpb of SRC1, SRC2. */
+static void
+do_cmpb3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_cmpbdi3 (dest, src1, src2));
+ else
+ emit_insn (gen_cmpbsi3 (dest, src1, src2));
+}
+
+/* Emit a rotl of the proper mode for DEST.
+
+ DEST is the destination register for the and.
+ SRC1 is the first and input.
+ SRC2 is the second and input.
+
+ Computes DEST = SRC1 rotated left by SRC2. */
+static void
+do_rotl3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_rotldi3 (dest, src1, src2));
+ else
+ emit_insn (gen_rotlsi3 (dest, src1, src2));
+}
+
/* Generate rtl for a load, shift, and compare of less than a full word.
LOAD_MODE is the machine mode for the loads.
@@ -1395,11 +1445,8 @@ expand_block_compare (rtx operands[])
while (bytes > 0)
{
unsigned int align = compute_current_alignment (base_align, offset);
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- load_mode = select_block_compare_mode (offset, bytes, align,
- word_mode_ok);
- else
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes,
+ align, word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1627,22 +1674,19 @@ expand_block_compare (rtx operands[])
return true;
}
-/* Generate alignment check and branch code to set up for
+/* Generate page crossing check and branch code to set up for
strncmp when we don't have DI alignment.
STRNCMP_LABEL is the label to branch if there is a page crossing.
- SRC is the string pointer to be examined.
+ SRC_ADDR is the string address to be examined.
BYTES is the max number of bytes to compare. */
static void
-expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
+expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes)
{
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
- rtx src_check = copy_addr_to_reg (XEXP (src, 0));
- if (GET_MODE (src_check) == SImode)
- emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
- else
- emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
+ rtx src_pgoff = gen_reg_rtx (GET_MODE (src_addr));
+ do_and3 (src_pgoff, src_addr, GEN_INT (0xfff));
rtx cond = gen_reg_rtx (CCmode);
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
+ emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_pgoff,
GEN_INT (4096 - bytes)));
rtx cmp_rtx = gen_rtx_GE (VOIDmode, cond, const0_rtx);
@@ -1654,6 +1698,76 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
LABEL_NUSES (strncmp_label) += 1;
}
+/* Generate the final sequence that identifies the differing
+ byte and generates the final result, taking into account
+ zero bytes:
+
+ cmpb cmpb_result1, src1, src2
+ cmpb cmpb_result2, src1, zero
+ orc cmpb_result1, cmp_result1, cmpb_result2
+ cntlzd get bit of first zero/diff byte
+ addi convert for rldcl use
+ rldcl rldcl extract diff/zero byte
+ subf subtract for final result
+
+ STR1 is the reg rtx for data from string 1.
+ STR2 is the reg rtx for data from string 2.
+ RESULT is the reg rtx for the comparison result. */
+
+static void
+emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
+{
+ machine_mode m = GET_MODE (str1);
+ rtx cmpb_diff = gen_reg_rtx (m);
+ rtx cmpb_zero = gen_reg_rtx (m);
+ rtx rot_amt = gen_reg_rtx (m);
+ rtx zero_reg = gen_reg_rtx (m);
+
+ rtx rot1_1 = gen_reg_rtx (m);
+ rtx rot1_2 = gen_reg_rtx (m);
+ rtx rot2_1 = gen_reg_rtx (m);
+ rtx rot2_2 = gen_reg_rtx (m);
+
+ if (m == SImode)
+ {
+ emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
+ emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
+ emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
+ emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
+ emit_insn (gen_rotlsi3 (rot1_1, str1,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
+ emit_insn (gen_rotlsi3 (rot2_1, str2,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
+ emit_insn (gen_subsi3 (result, rot1_2, rot2_2));
+ }
+ else if (m == DImode)
+ {
+ emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
+ emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
+ emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
+ emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
+ emit_insn (gen_rotldi3 (rot1_1, str1,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
+ emit_insn (gen_rotldi3 (rot2_1, str2,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
+ emit_insn (gen_subdi3 (result, rot1_2, rot2_2));
+ }
+ else
+ gcc_unreachable ();
+
+ return;
+}
+
/* Expand a string compare operation with length, and return
true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
@@ -1684,8 +1798,8 @@ expand_strn_compare (rtx operands[], int no_length)
align_rtx = operands[4];
}
unsigned HOST_WIDE_INT cmp_bytes = 0;
- rtx src1 = orig_src1;
- rtx src2 = orig_src2;
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
/* If we have a length, it must be constant. This simplifies things
a bit as we don't have to generate code to check if we've exceeded
@@ -1698,8 +1812,8 @@ expand_strn_compare (rtx operands[], int no_length)
return false;
unsigned int base_align = UINTVAL (align_rtx);
- int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
- int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
+ unsigned int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
+ unsigned int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
if (targetm.slow_unaligned_access (word_mode, align1)
@@ -1751,8 +1865,9 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
+ unsigned int required_align = 8;
- if (base_align < 8)
+ if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
begin_compare_label = gen_label_rtx ();
@@ -1775,14 +1890,14 @@ expand_strn_compare (rtx operands[], int no_length)
}
else
{
- align_test = ROUND_UP (align_test, 8);
- base_align = 8;
+ align_test = ROUND_UP (align_test, required_align);
+ base_align = required_align;
}
- if (align1 < 8)
- expand_strncmp_align_check (strncmp_label, src1, align_test);
- if (align2 < 8)
- expand_strncmp_align_check (strncmp_label, src2, align_test);
+ if (align1 < required_align)
+ expand_strncmp_align_check (strncmp_label, src1_addr, align_test);
+ if (align2 < required_align)
+ expand_strncmp_align_check (strncmp_label, src2_addr, align_test);
/* Now generate the following sequence:
- branch to begin_compare
@@ -1799,25 +1914,13 @@ expand_strn_compare (rtx operands[], int no_length)
emit_label (strncmp_label);
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
-
if (no_length)
{
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
+ force_reg (Pmode, src1_addr), Pmode,
+ force_reg (Pmode, src2_addr), Pmode);
}
else
{
@@ -1830,8 +1933,8 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
+ force_reg (Pmode, src1_addr), Pmode,
+ force_reg (Pmode, src2_addr), Pmode,
len_rtx, Pmode);
}
@@ -1847,12 +1950,12 @@ expand_strn_compare (rtx operands[], int no_length)
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
- /* Generate sequence of ld/ldbrx, cmpb to compare out
+ /* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
while (bytes_to_compare > 0)
{
- /* Compare sequence:
+ /* GPR compare sequence:
check each 8B with: ld/ld cmpd bne
If equal, use rldicr/cmpb to check for zero byte.
cleanup code at end:
@@ -1866,13 +1969,10 @@ expand_strn_compare (rtx operands[], int no_length)
The last compare can branch around the cleanup code if the
result is zero because the strings are exactly equal. */
+
unsigned int align = compute_current_alignment (base_align, offset);
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
- word_mode_ok);
- else
- load_mode = select_block_compare_mode (0, bytes_to_compare, align,
- word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes_to_compare,
+ align, word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes_to_compare >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1895,25 +1995,10 @@ expand_strn_compare (rtx operands[], int no_length)
rid of the extra bytes. */
cmp_bytes = bytes_to_compare;
- src1 = adjust_address (orig_src1, load_mode, offset);
- src2 = adjust_address (orig_src2, load_mode, offset);
-
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
- set_mem_size (src1, load_mode_size);
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
- set_mem_size (src2, load_mode_size);
-
- do_load_for_compare (tmp_reg_src1, src1, load_mode);
- do_load_for_compare (tmp_reg_src2, src2, load_mode);
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
/* We must always left-align the data we read, and
clear any bytes to the right that are beyond the string.
@@ -1926,16 +2011,8 @@ expand_strn_compare (rtx operands[], int no_length)
{
/* Rotate left first. */
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- if (word_mode == DImode)
- {
- emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
- emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
- }
- else
- {
- emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
- emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
- }
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
}
if (cmp_bytes < word_mode_size)
@@ -1944,16 +2021,8 @@ expand_strn_compare (rtx operands[], int no_length)
turned into a rldicr instruction. */
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- if (word_mode == DImode)
- {
- emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
- emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
- }
- else
- {
- emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
- emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
- }
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
}
/* Cases to handle. A and B are chunks of the two strings.
@@ -2010,31 +2079,16 @@ expand_strn_compare (rtx operands[], int no_length)
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
rtx condz = gen_reg_rtx (CCmode);
rtx zero_reg = gen_reg_rtx (word_mode);
- if (word_mode == SImode)
- {
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
- }
- }
- else
+ emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+
+ if (cmp_bytes < word_mode_size)
{
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
- }
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (cmpb_zero, cmpb_zero, mask);
}
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
@@ -2054,22 +2108,10 @@ expand_strn_compare (rtx operands[], int no_length)
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */
- src1 = adjust_address (orig_src1, load_mode, offset);
- src2 = adjust_address (orig_src2, load_mode, offset);
-
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
- set_mem_size (src1, load_mode_size);
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
- set_mem_size (src2, load_mode_size);
+ rtx src1 = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
+ rtx src2 = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
/* Construct call to strcmp/strncmp to compare the rest of the string. */
if (no_length)
@@ -2077,8 +2119,7 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
+ src1, Pmode, src2, Pmode);
}
else
{
@@ -2087,9 +2128,7 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
- len_rtx, Pmode);
+ src1, Pmode, src2, Pmode, len_rtx, Pmode);
}
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
@@ -2102,63 +2141,7 @@ expand_strn_compare (rtx operands[], int no_length)
if (cleanup_label)
emit_label (cleanup_label);
- /* Generate the final sequence that identifies the differing
- byte and generates the final result, taking into account
- zero bytes:
-
- cmpb cmpb_result1, src1, src2
- cmpb cmpb_result2, src1, zero
- orc cmpb_result1, cmp_result1, cmpb_result2
- cntlzd get bit of first zero/diff byte
- addi convert for rldcl use
- rldcl rldcl extract diff/zero byte
- subf subtract for final result
- */
-
- rtx cmpb_diff = gen_reg_rtx (word_mode);
- rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx rot_amt = gen_reg_rtx (word_mode);
- rtx zero_reg = gen_reg_rtx (word_mode);
-
- rtx rot1_1 = gen_reg_rtx (word_mode);
- rtx rot1_2 = gen_reg_rtx (word_mode);
- rtx rot2_1 = gen_reg_rtx (word_mode);
- rtx rot2_2 = gen_reg_rtx (word_mode);
-
- if (word_mode == SImode)
- {
- emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
- emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
- emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
- emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
- emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
- }
- else
- {
- emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
- emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
- emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
- emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
- emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
- }
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
emit_label (final_move_label);
emit_insn (gen_movsi (target,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0fc77aa18b0..e6921e96a3d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1210,7 +1210,7 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
-(define_insn "*vsx_mov<mode>_64bit"
+(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, <VSa>, <VSa>, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wo, v,

View File

@ -0,0 +1,472 @@
commit c7a833caa029b84ad579c3fabe006a80f718d7e1
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Aug 2 18:11:54 2018 +0000
2018-07-31 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (select_block_compare_mode): Move test
for word_mode_ok here instead of passing as argument.
(expand_block_compare): Change select_block_compare_mode() call.
(expand_strncmp_gpr_sequence): New function.
(expand_strn_compare): Make use of expand_strncmp_gpr_sequence.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263273 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index f9dd54eb639..451e9ed33da 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -238,13 +238,11 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
OFFSET is the current read offset from the beginning of the block.
BYTES is the number of bytes remaining to be read.
- ALIGN is the minimum alignment of the memory blocks being compared in bytes.
- WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
- the largest allowable mode. */
+ ALIGN is the minimum alignment of the memory blocks being compared in bytes. */
static machine_mode
select_block_compare_mode (unsigned HOST_WIDE_INT offset,
unsigned HOST_WIDE_INT bytes,
- unsigned HOST_WIDE_INT align, bool word_mode_ok)
+ unsigned HOST_WIDE_INT align)
{
/* First see if we can do a whole load unit
as that will be more efficient than a larger load + shift. */
@@ -257,6 +255,11 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
/* The most we can read without potential page crossing. */
unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
+ /* If we have an LE target without ldbrx and word_mode is DImode,
+ then we must avoid using word_mode. */
+ int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
+ && word_mode == DImode);
+
if (word_mode_ok && bytes >= UNITS_PER_WORD)
return word_mode;
else if (bytes == GET_MODE_SIZE (SImode))
@@ -1382,16 +1385,11 @@ expand_block_compare (rtx operands[])
else
cond = gen_reg_rtx (CCmode);
- /* If we have an LE target without ldbrx and word_mode is DImode,
- then we must avoid using word_mode. */
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
- && word_mode == DImode);
-
/* Strategy phase. How many ops will this take and should we expand it? */
unsigned HOST_WIDE_INT offset = 0;
machine_mode load_mode =
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
+ select_block_compare_mode (offset, bytes, base_align);
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
/* We don't want to generate too much code. The loop code can take
@@ -1445,8 +1443,7 @@ expand_block_compare (rtx operands[])
while (bytes > 0)
{
unsigned int align = compute_current_alignment (base_align, offset);
- load_mode = select_block_compare_mode (offset, bytes,
- align, word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes, align);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1698,6 +1695,189 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
LABEL_NUSES (strncmp_label) += 1;
}
+/* Generate the sequence of compares for strcmp/strncmp using gpr instructions.
+ BYTES_TO_COMPARE is the number of bytes to be compared.
+ BASE_ALIGN is the smaller of the alignment of the two strings.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ TMP_REG_SRC1 is the register for loading the first string.
+ TMP_REG_SRC2 is the register for loading the second string.
+ RESULT_REG is the rtx for the result register.
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
+ CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
+ and generate the final comparison result.
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ set the final result. */
+static void
+expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
+ unsigned int base_align,
+ rtx orig_src1, rtx orig_src2,
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
+ bool equality_compare_rest, rtx &cleanup_label,
+ rtx final_move_label)
+{
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
+ machine_mode load_mode;
+ unsigned int load_mode_size;
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
+ unsigned HOST_WIDE_INT offset = 0;
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
+
+ while (bytes_to_compare > 0)
+ {
+ /* GPR compare sequence:
+ check each 8B with: ld/ld cmpd bne
+ If equal, use rldicr/cmpb to check for zero byte.
+ cleanup code at end:
+ cmpb get byte that differs
+ cmpb look for zero byte
+ orc combine
+ cntlzd get bit of first zero/diff byte
+ subfic convert for rldcl use
+ rldcl rldcl extract diff/zero byte
+ subf subtract for final result
+
+ The last compare can branch around the cleanup code if the
+ result is zero because the strings are exactly equal. */
+
+ unsigned int align = compute_current_alignment (base_align, offset);
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
+ load_mode_size = GET_MODE_SIZE (load_mode);
+ if (bytes_to_compare >= load_mode_size)
+ cmp_bytes = load_mode_size;
+ else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
+ {
+ /* Move this load back so it doesn't go past the end.
+ P8/P9 can do this efficiently. */
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
+ cmp_bytes = bytes_to_compare;
+ if (extra_bytes < offset)
+ {
+ offset -= extra_bytes;
+ cmp_bytes = load_mode_size;
+ bytes_to_compare = cmp_bytes;
+ }
+ }
+ else
+ /* P7 and earlier can't do the overlapping load trick fast,
+ so this forces a non-overlapping load and a shift to get
+ rid of the extra bytes. */
+ cmp_bytes = bytes_to_compare;
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
+
+ /* We must always left-align the data we read, and
+ clear any bytes to the right that are beyond the string.
+ Otherwise the cmpb sequence won't produce the correct
+ results. The beginning of the compare will be done
+ with word_mode so will not have any extra shifts or
+ clear rights. */
+
+ if (load_mode_size < word_mode_size)
+ {
+ /* Rotate left first. */
+ rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
+ }
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Now clear right. This plus the rotate can be
+ turned into a rldicr instruction. */
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ }
+
+ /* Cases to handle. A and B are chunks of the two strings.
+ 1: Not end of comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, next block if not found.
+ 2: End of the inline comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, call strcmp/strncmp
+ 3: compared requested N bytes:
+ A == B: branch to result 0.
+ A != B: cleanup code to compute result. */
+
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+ rtx dst_label;
+ if (remain > 0 || equality_compare_rest)
+ {
+ /* Branch to cleanup code, otherwise fall through to do
+ more compares. */
+ if (!cleanup_label)
+ cleanup_label = gen_label_rtx ();
+ dst_label = cleanup_label;
+ }
+ else
+ /* Branch to end and produce result of 0. */
+ dst_label = final_move_label;
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx cond = gen_reg_rtx (CCmode);
+
+ /* Always produce the 0 result, it is needed if
+ cmpb finds a 0 byte in this chunk. */
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
+
+ rtx cmp_rtx;
+ if (remain == 0 && !equality_compare_rest)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
+
+ if (remain > 0 || equality_compare_rest)
+ {
+ /* Generate a cmpb to test for a 0 byte and branch
+ to final result if found. */
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx condz = gen_reg_rtx (CCmode);
+ rtx zero_reg = gen_reg_rtx (word_mode);
+ emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (cmpb_zero, cmpb_zero, mask);
+ }
+
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
+ lab_ref_fin, pc_rtx);
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j2) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+
+ }
+
+ offset += cmp_bytes;
+ bytes_to_compare -= cmp_bytes;
+ }
+
+}
+
/* Generate the final sequence that identifies the differing
byte and generates the final result, taking into account
zero bytes:
@@ -1797,7 +1977,7 @@ expand_strn_compare (rtx operands[], int no_length)
bytes_rtx = operands[3];
align_rtx = operands[4];
}
- unsigned HOST_WIDE_INT cmp_bytes = 0;
+
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
@@ -1822,11 +2002,6 @@ expand_strn_compare (rtx operands[], int no_length)
gcc_assert (GET_MODE (target) == SImode);
- /* If we have an LE target without ldbrx and word_mode is DImode,
- then we must avoid using word_mode. */
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
- && word_mode == DImode);
-
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
unsigned HOST_WIDE_INT offset = 0;
@@ -1839,7 +2014,7 @@ expand_strn_compare (rtx operands[], int no_length)
bytes = UINTVAL (bytes_rtx);
machine_mode load_mode =
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
+ select_block_compare_mode (0, bytes, base_align);
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
compare_length = rs6000_string_compare_inline_limit * load_mode_size;
@@ -1867,6 +2042,8 @@ expand_strn_compare (rtx operands[], int no_length)
rtx begin_compare_label = NULL;
unsigned int required_align = 8;
+ required_align = 8;
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -1952,159 +2129,15 @@ expand_strn_compare (rtx operands[], int no_length)
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
- unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
- while (bytes_to_compare > 0)
- {
- /* GPR compare sequence:
- check each 8B with: ld/ld cmpd bne
- If equal, use rldicr/cmpb to check for zero byte.
- cleanup code at end:
- cmpb get byte that differs
- cmpb look for zero byte
- orc combine
- cntlzd get bit of first zero/diff byte
- subfic convert for rldcl use
- rldcl rldcl extract diff/zero byte
- subf subtract for final result
-
- The last compare can branch around the cleanup code if the
- result is zero because the strings are exactly equal. */
-
- unsigned int align = compute_current_alignment (base_align, offset);
- load_mode = select_block_compare_mode (offset, bytes_to_compare,
- align, word_mode_ok);
- load_mode_size = GET_MODE_SIZE (load_mode);
- if (bytes_to_compare >= load_mode_size)
- cmp_bytes = load_mode_size;
- else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- {
- /* Move this load back so it doesn't go past the end.
- P8/P9 can do this efficiently. */
- unsigned int extra_bytes = load_mode_size - bytes_to_compare;
- cmp_bytes = bytes_to_compare;
- if (extra_bytes < offset)
- {
- offset -= extra_bytes;
- cmp_bytes = load_mode_size;
- bytes_to_compare = cmp_bytes;
- }
- }
- else
- /* P7 and earlier can't do the overlapping load trick fast,
- so this forces a non-overlapping load and a shift to get
- rid of the extra bytes. */
- cmp_bytes = bytes_to_compare;
-
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
- do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
- do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
-
- /* We must always left-align the data we read, and
- clear any bytes to the right that are beyond the string.
- Otherwise the cmpb sequence won't produce the correct
- results. The beginning of the compare will be done
- with word_mode so will not have any extra shifts or
- clear rights. */
-
- if (load_mode_size < word_mode_size)
- {
- /* Rotate left first. */
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
- }
-
- if (cmp_bytes < word_mode_size)
- {
- /* Now clear right. This plus the rotate can be
- turned into a rldicr instruction. */
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
- }
-
- /* Cases to handle. A and B are chunks of the two strings.
- 1: Not end of comparison:
- A != B: branch to cleanup code to compute result.
- A == B: check for 0 byte, next block if not found.
- 2: End of the inline comparison:
- A != B: branch to cleanup code to compute result.
- A == B: check for 0 byte, call strcmp/strncmp
- 3: compared requested N bytes:
- A == B: branch to result 0.
- A != B: cleanup code to compute result. */
-
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
-
- rtx dst_label;
- if (remain > 0 || equality_compare_rest)
- {
- /* Branch to cleanup code, otherwise fall through to do
- more compares. */
- if (!cleanup_label)
- cleanup_label = gen_label_rtx ();
- dst_label = cleanup_label;
- }
- else
- /* Branch to end and produce result of 0. */
- dst_label = final_move_label;
-
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
- rtx cond = gen_reg_rtx (CCmode);
-
- /* Always produce the 0 result, it is needed if
- cmpb finds a 0 byte in this chunk. */
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
-
- rtx cmp_rtx;
- if (remain == 0 && !equality_compare_rest)
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
- else
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
-
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
- lab_ref, pc_rtx);
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j) = dst_label;
- LABEL_NUSES (dst_label) += 1;
-
- if (remain > 0 || equality_compare_rest)
- {
- /* Generate a cmpb to test for a 0 byte and branch
- to final result if found. */
- rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
- rtx condz = gen_reg_rtx (CCmode);
- rtx zero_reg = gen_reg_rtx (word_mode);
- emit_move_insn (zero_reg, GEN_INT (0));
- do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
-
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (cmpb_zero, cmpb_zero, mask);
- }
-
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
- lab_ref_fin, pc_rtx);
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j2) = final_move_label;
- LABEL_NUSES (final_move_label) += 1;
-
- }
-
- offset += cmp_bytes;
- bytes_to_compare -= cmp_bytes;
- }
-
+ expand_strncmp_gpr_sequence(compare_length, base_align,
+ orig_src1, orig_src2,
+ tmp_reg_src1, tmp_reg_src2,
+ result_reg,
+ equality_compare_rest,
+ cleanup_label, final_move_label);
+
+ offset = compare_length;
+
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */

View File

@ -0,0 +1,613 @@
commit e4108e7e619dcf7f21224382bc37ba2ef651eb43
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Aug 30 18:17:00 2018 +0000
2018-08-30 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/altivec.md (altivec_eq<mode>): Remove star.
(altivec_vcmpequ<VI_char>_p): Remove star.
* config/rs6000/rs6000-string.c (do_load_for_compare): Support
vector load modes.
(expand_strncmp_vec_sequence): New function.
(emit_final_str_compare_vec): New function.
(expand_strn_compare): Add support for vector strncmp.
* config/rs6000/rs6000.opt (-mstring-compare-inline-limit): Change
length specification to bytes.
* config/rs6000/vsx.md (vsx_ld_elemrev_v16qi_internal): Remove star.
(vcmpnezb_p): New pattern.
* doc/invoke.texi (RS/6000 and PowerPC Options): Update documentation
for option -mstring-compare-inline-limit.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263991 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 13f4654db6a..db4f926bd15 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -608,7 +608,7 @@
"vcmpbfp %0,%1,%2"
[(set_attr "type" "veccmp")])
-(define_insn "*altivec_eq<mode>"
+(define_insn "altivec_eq<mode>"
[(set (match_operand:VI2 0 "altivec_register_operand" "=v")
(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
(match_operand:VI2 2 "altivec_register_operand" "v")))]
@@ -2438,7 +2438,7 @@
;; Compare vectors producing a vector result and a predicate, setting CR6 to
;; indicate a combined status
-(define_insn "*altivec_vcmpequ<VI_char>_p"
+(define_insn "altivec_vcmpequ<VI_char>_p"
[(set (reg:CC CR6_REGNO)
(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
(match_operand:VI2 2 "register_operand" "v"))]
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 451e9ed33da..ff0414586d0 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -157,6 +157,33 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
{
switch (GET_MODE (reg))
{
+ case E_V16QImode:
+ switch (mode)
+ {
+ case E_V16QImode:
+ if (!BYTES_BIG_ENDIAN)
+ {
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vsx_ld_elemrev_v16qi_internal (reg, mem));
+ else
+ {
+ rtx reg_v2di = simplify_gen_subreg (V2DImode, reg,
+ V16QImode, 0);
+ gcc_assert (MEM_P (mem));
+ rtx addr = XEXP (mem, 0);
+ rtx mem_v2di = gen_rtx_MEM (V2DImode, addr);
+ MEM_COPY_ATTRIBUTES (mem_v2di, mem);
+ set_mem_size (mem, GET_MODE_SIZE (V2DImode));
+ emit_insn (gen_vsx_ld_elemrev_v2di (reg_v2di, mem_v2di));
+ }
+ }
+ else
+ emit_insn (gen_vsx_movv2di_64bit (reg, mem));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
case E_DImode:
switch (mode)
{
@@ -227,6 +254,12 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
gcc_unreachable ();
}
break;
+
+ case E_QImode:
+ gcc_assert (mode == E_QImode);
+ emit_move_insn (reg, mem);
+ break;
+
default:
gcc_unreachable ();
break;
@@ -1705,17 +1738,17 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
RESULT_REG is the rtx for the result register.
EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
to strcmp/strncmp if we have equality at the end of the inline comparison.
- CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
- and generate the final comparison result.
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
+ to clean up and generate the final comparison result.
FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
-expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
- unsigned int base_align,
- rtx orig_src1, rtx orig_src2,
- rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
- bool equality_compare_rest, rtx &cleanup_label,
- rtx final_move_label)
+expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+ unsigned int base_align,
+ rtx orig_src1, rtx orig_src2,
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
+ bool equality_compare_rest, rtx *p_cleanup_label,
+ rtx final_move_label)
{
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
machine_mode load_mode;
@@ -1724,6 +1757,8 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
unsigned HOST_WIDE_INT offset = 0;
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
+ gcc_assert (p_cleanup_label != NULL);
+ rtx cleanup_label = *p_cleanup_label;
while (bytes_to_compare > 0)
{
@@ -1876,6 +1911,178 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
bytes_to_compare -= cmp_bytes;
}
+ *p_cleanup_label = cleanup_label;
+ return;
+}
+
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
+ instructions.
+
+ BYTES_TO_COMPARE is the number of bytes to be compared.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ S1ADDR is the register to use for the base address of the first string.
+ S2ADDR is the register to use for the base address of the second string.
+ OFF_REG is the register to use for the string offset for loads.
+ S1DATA is the register for loading the first string.
+ S2DATA is the register for loading the second string.
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
+ and generate the final comparison result.
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ set the final result. */
+static void
+expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+ rtx orig_src1, rtx orig_src2,
+ rtx s1addr, rtx s2addr, rtx off_reg,
+ rtx s1data, rtx s2data,
+ rtx vec_result, bool equality_compare_rest,
+ rtx *p_cleanup_label, rtx final_move_label)
+{
+ machine_mode load_mode;
+ unsigned int load_mode_size;
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
+ unsigned HOST_WIDE_INT offset = 0;
+
+ gcc_assert (p_cleanup_label != NULL);
+ rtx cleanup_label = *p_cleanup_label;
+
+ emit_move_insn (s1addr, force_reg (Pmode, XEXP (orig_src1, 0)));
+ emit_move_insn (s2addr, force_reg (Pmode, XEXP (orig_src2, 0)));
+
+ unsigned int i;
+ rtx zr[16];
+ for (i = 0; i < 16; i++)
+ zr[i] = GEN_INT (0);
+ rtvec zv = gen_rtvec_v (16, zr);
+ rtx zero_reg = gen_reg_rtx (V16QImode);
+ rs6000_expand_vector_init (zero_reg, gen_rtx_PARALLEL (V16QImode, zv));
+
+ while (bytes_to_compare > 0)
+ {
+ /* VEC/VSX compare sequence for P8:
+ check each 16B with:
+ lxvd2x 32,28,8
+ lxvd2x 33,29,8
+ vcmpequb 2,0,1 # compare strings
+ vcmpequb 4,0,3 # compare w/ 0
+ xxlorc 37,36,34 # first FF byte is either mismatch or end of string
+ vcmpequb. 7,5,3 # reg 7 contains 0
+ bnl 6,.Lmismatch
+
+ For the P8 LE case, we use lxvd2x and compare full 16 bytes
+ but then use use vgbbd and a shift to get two bytes with the
+ information we need in the correct order.
+
+ VEC/VSX compare sequence if TARGET_P9_VECTOR:
+ lxvb16x/lxvb16x # load 16B of each string
+ vcmpnezb. # produces difference location or zero byte location
+ bne 6,.Lmismatch
+
+ Use the overlapping compare trick for the last block if it is
+ less than 16 bytes.
+ */
+
+ load_mode = V16QImode;
+ load_mode_size = GET_MODE_SIZE (load_mode);
+
+ if (bytes_to_compare >= load_mode_size)
+ cmp_bytes = load_mode_size;
+ else
+ {
+ /* Move this load back so it doesn't go past the end. P8/P9
+ can do this efficiently. This is never called with less
+ than 16 bytes so we should always be able to do this. */
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
+ cmp_bytes = bytes_to_compare;
+ gcc_assert (offset > extra_bytes);
+ offset -= extra_bytes;
+ cmp_bytes = load_mode_size;
+ bytes_to_compare = cmp_bytes;
+ }
+
+ /* The offset currently used is always kept in off_reg so that the
+ cleanup code on P8 can use it to extract the differing byte. */
+ emit_move_insn (off_reg, GEN_INT (offset));
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
+ do_load_for_compare_from_addr (load_mode, s1data, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
+ do_load_for_compare_from_addr (load_mode, s2data, addr2, orig_src2);
+
+ /* Cases to handle. A and B are chunks of the two strings.
+ 1: Not end of comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: next block
+ 2: End of the inline comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: call strcmp/strncmp
+ 3: compared requested N bytes:
+ A == B: branch to result 0.
+ A != B: cleanup code to compute result. */
+
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vcmpnezb_p (vec_result, s1data, s2data));
+ else
+ {
+ /* Emit instructions to do comparison and zero check. */
+ rtx cmp_res = gen_reg_rtx (load_mode);
+ rtx cmp_zero = gen_reg_rtx (load_mode);
+ rtx cmp_combined = gen_reg_rtx (load_mode);
+ emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
+ emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
+ emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
+ emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, zero_reg));
+ }
+
+ bool branch_to_cleanup = (remain > 0 || equality_compare_rest);
+ rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+ rtx dst_label;
+ rtx cmp_rtx;
+ if (branch_to_cleanup)
+ {
+ /* Branch to cleanup code, otherwise fall through to do more
+ compares. P8 and P9 use different CR bits because on P8
+ we are looking at the result of a comparsion vs a
+ register of zeroes so the all-true condition means no
+ difference or zero was found. On P9, vcmpnezb sets a byte
+ to 0xff if there is a mismatch or zero, so the all-false
+ condition indicates we found no difference or zero. */
+ if (!cleanup_label)
+ cleanup_label = gen_label_rtx ();
+ dst_label = cleanup_label;
+ if (TARGET_P9_VECTOR)
+ cmp_rtx = gen_rtx_NE (VOIDmode, cr6, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_GE (VOIDmode, cr6, const0_rtx);
+ }
+ else
+ {
+ /* Branch to final return or fall through to cleanup,
+ result is already set to 0. */
+ dst_label = final_move_label;
+ if (TARGET_P9_VECTOR)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cr6, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_LT (VOIDmode, cr6, const0_rtx);
+ }
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j2) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
+
+ offset += cmp_bytes;
+ bytes_to_compare -= cmp_bytes;
+ }
+ *p_cleanup_label = cleanup_label;
+ return;
}
/* Generate the final sequence that identifies the differing
@@ -1948,6 +2155,96 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
return;
}
+/* Generate the final sequence that identifies the differing
+ byte and generates the final result, taking into account
+ zero bytes:
+
+ P8:
+ vgbbd 0,0
+ vsldoi 0,0,0,9
+ mfvsrd 9,32
+ addi 10,9,-1 # count trailing zero bits
+ andc 9,10,9
+ popcntd 9,9
+ lbzx 10,28,9 # use that offset to load differing byte
+ lbzx 3,29,9
+ subf 3,3,10 # subtract for final result
+
+ P9:
+ vclzlsbb # counts trailing bytes with lsb=0
+ vextublx # extract differing byte
+
+ STR1 is the reg rtx for data from string 1.
+ STR2 is the reg rtx for data from string 2.
+ RESULT is the reg rtx for the comparison result.
+ S1ADDR is the register to use for the base address of the first string.
+ S2ADDR is the register to use for the base address of the second string.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ OFF_REG is the register to use for the string offset for loads.
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
+ */
+
+static void
+emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
+ rtx s1addr, rtx s2addr,
+ rtx orig_src1, rtx orig_src2,
+ rtx off_reg, rtx vec_result)
+{
+ if (TARGET_P9_VECTOR)
+ {
+ rtx diffix = gen_reg_rtx (SImode);
+ rtx chr1 = gen_reg_rtx (SImode);
+ rtx chr2 = gen_reg_rtx (SImode);
+ rtx chr1_di = simplify_gen_subreg (DImode, chr1, SImode, 0);
+ rtx chr2_di = simplify_gen_subreg (DImode, chr2, SImode, 0);
+ emit_insn (gen_vclzlsbb_v16qi (diffix, vec_result));
+ emit_insn (gen_vextublx (chr1, diffix, str1));
+ emit_insn (gen_vextublx (chr2, diffix, str2));
+ do_sub3 (result, chr1_di, chr2_di);
+ }
+ else
+ {
+ rtx diffix = gen_reg_rtx (DImode);
+ rtx result_gbbd = gen_reg_rtx (V16QImode);
+ /* Since each byte of the input is either 00 or FF, the bytes in
+ dw0 and dw1 after vgbbd are all identical to each other. */
+ emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
+ /* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
+ For BE, we shift by 7 and get AB in the high two bytes then CLZ. */
+ rtx result_shifted = gen_reg_rtx (V16QImode);
+ int shift_amt = (BYTES_BIG_ENDIAN) ? 7 : 9;
+ emit_insn (gen_altivec_vsldoi_v16qi (result_shifted,result_gbbd,result_gbbd, GEN_INT (shift_amt)));
+
+ rtx diffix_df = simplify_gen_subreg (DFmode, diffix, DImode, 0);
+ emit_insn (gen_p8_mfvsrd_3_v16qi (diffix_df, result_shifted));
+ rtx count = gen_reg_rtx (DImode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_clzdi2 (count, diffix));
+ else
+ emit_insn (gen_ctzdi2 (count, diffix));
+
+ /* P8 doesn't have a good solution for extracting one byte from
+ a vsx reg like vextublx on P9 so we just compute the offset
+ of the differing byte and load it from each string. */
+ do_add3 (off_reg, off_reg, count);
+
+ rtx chr1 = gen_reg_rtx (QImode);
+ rtx chr2 = gen_reg_rtx (QImode);
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
+ do_load_for_compare_from_addr (QImode, chr1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
+ do_load_for_compare_from_addr (QImode, chr2, addr2, orig_src2);
+ machine_mode rmode = GET_MODE (result);
+ rtx chr1_rm = simplify_gen_subreg (rmode, chr1, QImode, 0);
+ rtx chr2_rm = simplify_gen_subreg (rmode, chr2, QImode, 0);
+ do_sub3 (result, chr1_rm, chr2_rm);
+ }
+
+ return;
+}
+
/* Expand a string compare operation with length, and return
true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
@@ -2002,21 +2299,43 @@ expand_strn_compare (rtx operands[], int no_length)
gcc_assert (GET_MODE (target) == SImode);
- unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
+ unsigned int required_align = 8;
unsigned HOST_WIDE_INT offset = 0;
unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
+
if (no_length)
- /* Use this as a standin to determine the mode to use. */
- bytes = rs6000_string_compare_inline_limit * word_mode_size;
+ bytes = rs6000_string_compare_inline_limit;
else
bytes = UINTVAL (bytes_rtx);
- machine_mode load_mode =
- select_block_compare_mode (0, bytes, base_align);
- unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
- compare_length = rs6000_string_compare_inline_limit * load_mode_size;
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
+ least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
+ at least POWER8. That way we can rely on overlapping compares to
+ do the final comparison of less than 16 bytes. Also I do not want
+ to deal with making this work for 32 bits. */
+ int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
+
+ if (use_vec)
+ required_align = 16;
+
+ machine_mode load_mode;
+ rtx tmp_reg_src1, tmp_reg_src2;
+ if (use_vec)
+ {
+ load_mode = V16QImode;
+ tmp_reg_src1 = gen_reg_rtx (V16QImode);
+ tmp_reg_src2 = gen_reg_rtx (V16QImode);
+ }
+ else
+ {
+ load_mode = select_block_compare_mode (0, bytes, base_align);
+ tmp_reg_src1 = gen_reg_rtx (word_mode);
+ tmp_reg_src2 = gen_reg_rtx (word_mode);
+ }
+
+ compare_length = rs6000_string_compare_inline_limit;
/* If we have equality at the end of the last compare and we have not
found the end of the string, we need to call strcmp/strncmp to
@@ -2040,10 +2359,7 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
- unsigned int required_align = 8;
-
- required_align = 8;
-
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -2060,7 +2376,7 @@ expand_strn_compare (rtx operands[], int no_length)
the subsequent code generation are in agreement so we do not
go past the length we tested for a 4k boundary crossing. */
unsigned HOST_WIDE_INT align_test = compare_length;
- if (align_test < 8)
+ if (align_test < required_align)
{
align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
base_align = align_test;
@@ -2102,7 +2418,7 @@ expand_strn_compare (rtx operands[], int no_length)
else
{
/* -m32 -mpowerpc64 results in word_mode being DImode even
- though otherwise it is 32-bit. The length arg to strncmp
+ though otherwise it is 32-bit. The length arg to strncmp
is a size_t which will be the same size as pointers. */
rtx len_rtx = gen_reg_rtx (Pmode);
emit_move_insn (len_rtx, gen_int_mode (bytes, Pmode));
@@ -2124,17 +2440,32 @@ expand_strn_compare (rtx operands[], int no_length)
}
rtx cleanup_label = NULL;
- rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
- rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
+ rtx s1addr = NULL, s2addr = NULL, off_reg = NULL, vec_result = NULL;
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
- expand_strncmp_gpr_sequence(compare_length, base_align,
- orig_src1, orig_src2,
- tmp_reg_src1, tmp_reg_src2,
- result_reg,
- equality_compare_rest,
- cleanup_label, final_move_label);
+ if (use_vec)
+ {
+ s1addr = gen_reg_rtx (Pmode);
+ s2addr = gen_reg_rtx (Pmode);
+ off_reg = gen_reg_rtx (Pmode);
+ vec_result = gen_reg_rtx (load_mode);
+ emit_move_insn (result_reg, GEN_INT (0));
+ expand_strncmp_vec_sequence (compare_length,
+ orig_src1, orig_src2,
+ s1addr, s2addr, off_reg,
+ tmp_reg_src1, tmp_reg_src2,
+ vec_result,
+ equality_compare_rest,
+ &cleanup_label, final_move_label);
+ }
+ else
+ expand_strncmp_gpr_sequence (compare_length, base_align,
+ orig_src1, orig_src2,
+ tmp_reg_src1, tmp_reg_src2,
+ result_reg,
+ equality_compare_rest,
+ &cleanup_label, final_move_label);
offset = compare_length;
@@ -2174,7 +2505,12 @@ expand_strn_compare (rtx operands[], int no_length)
if (cleanup_label)
emit_label (cleanup_label);
- emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
+ if (use_vec)
+ emit_final_str_compare_vec (tmp_reg_src1, tmp_reg_src2, result_reg,
+ s1addr, s2addr, orig_src1, orig_src2,
+ off_reg, vec_result);
+ else
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
emit_label (final_move_label);
emit_insn (gen_movsi (target,
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ace8a477550..ad1b8a29ac6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -342,8 +342,8 @@ Target Report Var(rs6000_block_compare_inline_loop_limit) Init(-1) RejectNegativ
Max number of bytes to compare with loops.
mstring-compare-inline-limit=
-Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
-Max number of pairs of load insns for compare.
+Target Report Var(rs6000_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
+Max number of bytes to compare.
misel
Target Report Mask(ISEL) Var(rs6000_isa_flags)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e6921e96a3d..01fb4213001 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1429,7 +1429,7 @@
}
})
-(define_insn "*vsx_ld_elemrev_v16qi_internal"
+(define_insn "vsx_ld_elemrev_v16qi_internal"
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(vec_select:V16QI
(match_operand:V16QI 1 "memory_operand" "Z")
@@ -5107,6 +5107,22 @@
"vcmpnezb %0,%1,%2"
[(set_attr "type" "vecsimple")])
+;; Vector Compare Not Equal or Zero Byte predicate or record-form
+(define_insn "vcmpnezb_p"
+ [(set (reg:CC CR6_REGNO)
+ (unspec:CC
+ [(match_operand:V16QI 1 "altivec_register_operand" "v")
+ (match_operand:V16QI 2 "altivec_register_operand" "v")]
+ UNSPEC_VCMPNEZB))
+ (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
+ (unspec:V16QI
+ [(match_dup 1)
+ (match_dup 2)]
+ UNSPEC_VCMPNEZB))]
+ "TARGET_P9_VECTOR"
+ "vcmpnezb. %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
;; Vector Compare Not Equal Half Word (specified/not+eq:)
(define_insn "vcmpneh"
[(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f2dd12b3d73..291e414fea2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24165,12 +24165,10 @@ target-specific.
@item -mstring-compare-inline-limit=@var{num}
@opindex mstring-compare-inline-limit
-Generate at most @var{num} pairs of load instructions to compare the
-string inline. If the difference or end of string is not found at the
+Compare at most @var{num} string bytes with inline code.
+If the difference or end of string is not found at the
end of the inline compare a call to @code{strcmp} or @code{strncmp} will
-take care of the rest of the comparison. The default is 8 pairs of
-loads, which will compare 64 bytes on a 64-bit target and 32 bytes on a
-32-bit target.
+take care of the rest of the comparison. The default is 64 bytes.
@item -G @var{num}
@opindex G

View File

@ -0,0 +1,40 @@
commit 6f1a7440d9aac59fba0f2e2d8d0a9a0b82f480cb
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue Oct 2 17:31:53 2018 +0000
2018-10-02 Aaron Sawdey <acsawdey@linux.ibm.com>
PR target/87474
* config/rs6000/rs6000-string.c (expand_strn_compare): Check that both
P8_VECTOR and VSX are enabled.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@264799 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index ff0414586d0..9c25bad97a1 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2205,6 +2205,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
}
else
{
+ gcc_assert (TARGET_P8_VECTOR);
rtx diffix = gen_reg_rtx (DImode);
rtx result_gbbd = gen_reg_rtx (V16QImode);
/* Since each byte of the input is either 00 or FF, the bytes in
@@ -2313,9 +2314,12 @@ expand_strn_compare (rtx operands[], int no_length)
/* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
at least POWER8. That way we can rely on overlapping compares to
- do the final comparison of less than 16 bytes. Also I do not want
- to deal with making this work for 32 bits. */
- int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
+ do the final comparison of less than 16 bytes. Also I do not
+ want to deal with making this work for 32 bits. In addition, we
+ have to make sure that we have at least P8_VECTOR (we don't allow
+ P9_VECTOR without P8_VECTOR). */
+ int use_vec = (bytes >= 16 && !TARGET_32BIT
+ && TARGET_EFFICIENT_UNALIGNED_VSX && TARGET_P8_VECTOR);
if (use_vec)
required_align = 16;

View File

@ -0,0 +1,510 @@
commit 08869d85bd2a7ec5468b3bb3f01a930eb93e6381
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Fri Oct 26 19:33:31 2018 +0000
2018-10-26 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
a shorter sequence with fewer branches.
(emit_final_str_compare_gpr): Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@265546 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 9c25bad97a1..96729d9663c 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -259,7 +259,7 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
gcc_assert (mode == E_QImode);
emit_move_insn (reg, mem);
break;
-
+
default:
gcc_unreachable ();
break;
@@ -726,7 +726,7 @@ expand_compare_loop (rtx operands[])
{
if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) > GET_MODE_SIZE (word_mode))
/* Do not expect length longer than word_mode. */
- return false;
+ return false;
else if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) < GET_MODE_SIZE (word_mode))
{
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
@@ -770,7 +770,7 @@ expand_compare_loop (rtx operands[])
rtx j;
/* Example of generated code for 35 bytes aligned 1 byte.
-
+
mtctr 8
li 6,0
li 5,8
@@ -798,7 +798,7 @@ expand_compare_loop (rtx operands[])
popcntd 9,9
subfe 10,10,10
or 9,9,10
-
+
Compiled with -fno-reorder-blocks for clarity. */
/* Structure of what we're going to do:
@@ -1041,7 +1041,7 @@ expand_compare_loop (rtx operands[])
if (!bytes_is_const)
{
/* If we're dealing with runtime length, we have to check if
- it's zero after the loop. When length is known at compile
+ it's zero after the loop. When length is known at compile
time the no-remainder condition is dealt with above. By
doing this after cleanup_label, we also deal with the
case where length is 0 at the start and we bypass the
@@ -1411,7 +1411,7 @@ expand_block_compare (rtx operands[])
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
/* P7/P8 code uses cond for subfc. but P9 uses
- it for cmpld which needs CCUNSmode. */
+ it for cmpld which needs CCUNSmode. */
rtx cond;
if (TARGET_P9_MISC)
cond = gen_reg_rtx (CCUNSmode);
@@ -1655,7 +1655,7 @@ expand_block_compare (rtx operands[])
emit_label (convert_label);
/* We need to produce DI result from sub, then convert to target SI
- while maintaining <0 / ==0 / >0 properties. This sequence works:
+ while maintaining <0 / ==0 / >0 properties. This sequence works:
subfc L,A,B
subfe H,H,H
popcntd L,L
@@ -1740,7 +1740,7 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
to strcmp/strncmp if we have equality at the end of the inline comparison.
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
to clean up and generate the final comparison result.
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
@@ -1763,12 +1763,9 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
while (bytes_to_compare > 0)
{
/* GPR compare sequence:
- check each 8B with: ld/ld cmpd bne
- If equal, use rldicr/cmpb to check for zero byte.
+ check each 8B with: ld/ld/cmpb/cmpb/orc./bne
+
cleanup code at end:
- cmpb get byte that differs
- cmpb look for zero byte
- orc combine
cntlzd get bit of first zero/diff byte
subfic convert for rldcl use
rldcl rldcl extract diff/zero byte
@@ -1776,7 +1773,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
The last compare can branch around the cleanup code if the
result is zero because the strings are exactly equal. */
-
+
unsigned int align = compute_current_alignment (base_align, offset);
load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
load_mode_size = GET_MODE_SIZE (load_mode);
@@ -1801,34 +1798,49 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
rid of the extra bytes. */
cmp_bytes = bytes_to_compare;
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ rtx offset_reg = gen_reg_rtx (Pmode);
+ emit_move_insn (offset_reg, GEN_INT (offset));
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
/* We must always left-align the data we read, and
clear any bytes to the right that are beyond the string.
Otherwise the cmpb sequence won't produce the correct
- results. The beginning of the compare will be done
- with word_mode so will not have any extra shifts or
- clear rights. */
+ results. However if there is only one byte left, we
+ can just subtract to get the final result so the shifts
+ and clears are not needed. */
- if (load_mode_size < word_mode_size)
- {
- /* Rotate left first. */
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
- }
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
- if (cmp_bytes < word_mode_size)
+ /* Loading just a single byte is a special case. If we are
+ loading more than that, we have to check whether we are
+ looking at the entire chunk of data. If not, rotate left and
+ clear right so that bytes we aren't supposed to look at are
+ zeroed, and the first byte we are supposed to compare is
+ leftmost. */
+ if (load_mode_size != 1)
{
- /* Now clear right. This plus the rotate can be
- turned into a rldicr instruction. */
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ if (load_mode_size < word_mode_size)
+ {
+ /* Rotate left first. */
+ rtx sh = GEN_INT (BITS_PER_UNIT
+ * (word_mode_size - load_mode_size));
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
+ }
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Now clear right. This plus the rotate can be
+ turned into a rldicr instruction. */
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ }
}
/* Cases to handle. A and B are chunks of the two strings.
@@ -1842,8 +1854,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
A == B: branch to result 0.
A != B: cleanup code to compute result. */
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
-
rtx dst_label;
if (remain > 0 || equality_compare_rest)
{
@@ -1857,54 +1867,89 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
/* Branch to end and produce result of 0. */
dst_label = final_move_label;
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
- rtx cond = gen_reg_rtx (CCmode);
+ if (load_mode_size == 1)
+ {
+ /* Special case for comparing just single byte. */
+ if (equality_compare_rest)
+ {
+ /* Use subf./bne to branch to final_move_label if the
+ byte differs, otherwise fall through to the strncmp
+ call. We must also check for a zero byte here as we
+ must not make the library call if this is the end of
+ the string. */
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx cond = gen_reg_rtx (CCmode);
+ rtx diff_rtx = gen_rtx_MINUS (word_mode,
+ tmp_reg_src1, tmp_reg_src2);
+ rs6000_emit_dot_insn (result_reg, diff_rtx, 2, cond);
+ rtx cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
- /* Always produce the 0 result, it is needed if
- cmpb finds a 0 byte in this chunk. */
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
+ /* Check for zero byte here before fall through to
+ library call. This catches the case where the
+ strings are equal and end in a zero byte at this
+ position. */
- rtx cmp_rtx;
- if (remain == 0 && !equality_compare_rest)
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
- else
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ rtx cond0 = gen_reg_rtx (CCmode);
+ emit_move_insn (cond0, gen_rtx_COMPARE (CCmode, tmp_reg_src1,
+ const0_rtx));
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
- lab_ref, pc_rtx);
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j) = dst_label;
- LABEL_NUSES (dst_label) += 1;
+ rtx cmp0eq_rtx = gen_rtx_EQ (VOIDmode, cond0, const0_rtx);
- if (remain > 0 || equality_compare_rest)
+ rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
+ lab_ref, pc_rtx);
+ rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
+ JUMP_LABEL (j0) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+ }
+ else
+ {
+ /* This is the last byte to be compared so we can use
+ subf to compute the final result and branch
+ unconditionally to final_move_label. */
+
+ do_sub3 (result_reg, tmp_reg_src1, tmp_reg_src2);
+
+ rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
+ JUMP_LABEL (j) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+ emit_barrier ();
+ }
+ }
+ else
{
- /* Generate a cmpb to test for a 0 byte and branch
- to final result if found. */
rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
- rtx condz = gen_reg_rtx (CCmode);
+ rtx cmpb_diff = gen_reg_rtx (word_mode);
rtx zero_reg = gen_reg_rtx (word_mode);
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx cond = gen_reg_rtx (CCmode);
+
emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2);
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+ rtx not_diff = gen_rtx_NOT (word_mode, cmpb_diff);
+ rtx orc_rtx = gen_rtx_IOR (word_mode, not_diff, cmpb_zero);
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (cmpb_zero, cmpb_zero, mask);
- }
+ rs6000_emit_dot_insn (result_reg, orc_rtx, 2, cond);
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
- lab_ref_fin, pc_rtx);
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j2) = final_move_label;
- LABEL_NUSES (final_move_label) += 1;
+ rtx cmp_rtx;
+ if (remain == 0 && !equality_compare_rest)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
}
offset += cmp_bytes;
@@ -1915,7 +1960,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
return;
}
-/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
instructions.
BYTES_TO_COMPARE is the number of bytes to be compared.
@@ -1931,7 +1976,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
to strcmp/strncmp if we have equality at the end of the inline comparison.
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
and generate the final comparison result.
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
@@ -1982,12 +2027,12 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
bne 6,.Lmismatch
Use the overlapping compare trick for the last block if it is
- less than 16 bytes.
+ less than 16 bytes.
*/
load_mode = V16QImode;
load_mode_size = GET_MODE_SIZE (load_mode);
-
+
if (bytes_to_compare >= load_mode_size)
cmp_bytes = load_mode_size;
else
@@ -2046,10 +2091,10 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
if (branch_to_cleanup)
{
/* Branch to cleanup code, otherwise fall through to do more
- compares. P8 and P9 use different CR bits because on P8
+ compares. P8 and P9 use different CR bits because on P8
we are looking at the result of a comparsion vs a
register of zeroes so the all-true condition means no
- difference or zero was found. On P9, vcmpnezb sets a byte
+ difference or zero was found. On P9, vcmpnezb sets a byte
to 0xff if there is a mismatch or zero, so the all-false
condition indicates we found no difference or zero. */
if (!cleanup_label)
@@ -2062,7 +2107,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
}
else
{
- /* Branch to final return or fall through to cleanup,
+ /* Branch to final return or fall through to cleanup,
result is already set to 0. */
dst_label = final_move_label;
if (TARGET_P9_VECTOR)
@@ -2088,10 +2133,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
/* Generate the final sequence that identifies the differing
byte and generates the final result, taking into account
zero bytes:
-
- cmpb cmpb_result1, src1, src2
- cmpb cmpb_result2, src1, zero
- orc cmpb_result1, cmp_result1, cmpb_result2
+
cntlzd get bit of first zero/diff byte
addi convert for rldcl use
rldcl rldcl extract diff/zero byte
@@ -2105,10 +2147,7 @@ static void
emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
{
machine_mode m = GET_MODE (str1);
- rtx cmpb_diff = gen_reg_rtx (m);
- rtx cmpb_zero = gen_reg_rtx (m);
rtx rot_amt = gen_reg_rtx (m);
- rtx zero_reg = gen_reg_rtx (m);
rtx rot1_1 = gen_reg_rtx (m);
rtx rot1_2 = gen_reg_rtx (m);
@@ -2117,12 +2156,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
if (m == SImode)
{
- emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_clzsi2 (rot_amt, result));
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotlsi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
@@ -2134,12 +2168,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
}
else if (m == DImode)
{
- emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_clzdi2 (rot_amt, result));
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotldi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
@@ -2151,7 +2180,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
}
else
gcc_unreachable ();
-
+
return;
}
@@ -2169,10 +2198,10 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
lbzx 10,28,9 # use that offset to load differing byte
lbzx 3,29,9
subf 3,3,10 # subtract for final result
-
+
P9:
vclzlsbb # counts trailing bytes with lsb=0
- vextublx # extract differing byte
+ vextublx # extract differing byte
STR1 is the reg rtx for data from string 1.
STR2 is the reg rtx for data from string 2.
@@ -2208,7 +2237,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
gcc_assert (TARGET_P8_VECTOR);
rtx diffix = gen_reg_rtx (DImode);
rtx result_gbbd = gen_reg_rtx (V16QImode);
- /* Since each byte of the input is either 00 or FF, the bytes in
+ /* Since each byte of the input is either 00 or FF, the bytes in
dw0 and dw1 after vgbbd are all identical to each other. */
emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
/* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
@@ -2226,7 +2255,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
else
emit_insn (gen_ctzdi2 (count, diffix));
- /* P8 doesn't have a good solution for extracting one byte from
+ /* P8 doesn't have a good solution for extracting one byte from
a vsx reg like vextublx on P9 so we just compute the offset
of the differing byte and load it from each string. */
do_add3 (off_reg, off_reg, count);
@@ -2247,7 +2276,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
}
/* Expand a string compare operation with length, and return
- true if successful. Return false if we should let the
+ true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
OPERANDS[0] is the target (result).
@@ -2279,9 +2308,9 @@ expand_strn_compare (rtx operands[], int no_length)
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
- /* If we have a length, it must be constant. This simplifies things
+ /* If we have a length, it must be constant. This simplifies things
a bit as we don't have to generate code to check if we've exceeded
- the length. Later this could be expanded to handle this case. */
+ the length. Later this could be expanded to handle this case. */
if (!no_length && !CONST_INT_P (bytes_rtx))
return false;
@@ -2311,7 +2340,7 @@ expand_strn_compare (rtx operands[], int no_length)
else
bytes = UINTVAL (bytes_rtx);
- /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
at least POWER8. That way we can rely on overlapping compares to
do the final comparison of less than 16 bytes. Also I do not
@@ -2363,7 +2392,7 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
-
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -2472,7 +2501,7 @@ expand_strn_compare (rtx operands[], int no_length)
&cleanup_label, final_move_label);
offset = compare_length;
-
+
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */

View File

@ -0,0 +1,40 @@
--- gcc/config.gcc.jj 2008-04-24 15:42:46.000000000 -0500
+++ gcc/config.gcc 2008-04-24 15:44:51.000000000 -0500
@@ -2790,7 +2790,7 @@ sparc-*-rtems*)
tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h sparc/sp-elf.h sparc/rtemself.h rtems.h newlib-stdint.h"
tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems"
;;
-sparc-*-linux*)
+sparc-*-linux* | sparcv9-*-linux*)
tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/tso.h"
extra_options="${extra_options} sparc/long-double-switch.opt"
case ${target} in
@@ -2844,7 +2844,7 @@ sparc64-*-rtems*)
extra_options="${extra_options}"
tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems-64"
;;
-sparc64-*-linux*)
+sparc64*-*-linux*)
tm_file="sparc/biarch64.h ${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/default64.h sparc/linux64.h sparc/tso.h"
extra_options="${extra_options} sparc/long-double-switch.opt"
tmake_file="${tmake_file} sparc/t-sparc sparc/t-linux64"
--- libgcc/config.host.jj 2008-04-24 15:46:19.000000000 -0500
+++ libgcc/config.host 2008-04-24 15:46:49.000000000 -0500
@@ -1002,7 +1002,7 @@ sparc-*-elf*)
tmake_file="${tmake_file} t-fdpbit t-crtfm"
extra_parts="$extra_parts crti.o crtn.o crtfastmath.o"
;;
-sparc-*-linux*) # SPARC's running GNU/Linux, libc6
+sparc-*-linux* | sparcv9-*-linux*) # SPARC's running GNU/Linux, libc6
tmake_file="${tmake_file} t-crtfm"
if test "${host_address}" = 64; then
tmake_file="$tmake_file sparc/t-linux64"
@@ -1050,7 +1050,7 @@ sparc64-*-freebsd*|ultrasparc-*-freebsd*
tmake_file="$tmake_file t-crtfm"
extra_parts="$extra_parts crtfastmath.o"
;;
-sparc64-*-linux*) # 64-bit SPARC's running GNU/Linux
+sparc64*-*-linux*) # 64-bit SPARC's running GNU/Linux
extra_parts="$extra_parts crtfastmath.o"
tmake_file="${tmake_file} t-crtfm sparc/t-linux"
if test "${host_address}" = 64; then

View File

@ -0,0 +1,11 @@
--- nvptx-tools/nvptx-as.c.jj 2017-01-20 12:40:18.000000000 +0100
+++ nvptx-tools/nvptx-as.c 2017-01-20 12:43:53.864271442 +0100
@@ -939,7 +939,7 @@ fork_execute (const char *prog, char *co
fatal_error ("%s: %m", errmsg);
}
else
- fatal_error (errmsg);
+ fatal_error ("%s", errmsg);
}
do_wait (prog, pex);
}

View File

@ -0,0 +1,32 @@
--- nvptx-tools/configure.ac.jj 2017-01-13 12:48:31.000000000 +0100
+++ nvptx-tools/configure.ac 2017-05-03 10:26:57.076092259 +0200
@@ -66,6 +66,8 @@ CPPFLAGS=$save_CPPFLAGS
LDFLAGS=$save_LDFLAGS
LIBS=$save_LIBS
+AC_CHECK_DECLS(getopt)
+
AC_CONFIG_SUBDIRS([libiberty])
AC_CONFIG_FILES([Makefile dejagnu.exp])
AC_OUTPUT
--- nvptx-tools/configure.jj 2017-01-13 12:48:54.000000000 +0100
+++ nvptx-tools/configure 2017-05-03 10:27:13.503876809 +0200
@@ -3963,6 +3963,18 @@ CPPFLAGS=$save_CPPFLAGS
LDFLAGS=$save_LDFLAGS
LIBS=$save_LIBS
+ac_fn_c_check_decl "$LINENO" "getopt" "ac_cv_have_decl_getopt" "$ac_includes_default"
+if test "x$ac_cv_have_decl_getopt" = x""yes; then :
+ ac_have_decl=1
+else
+ ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_GETOPT $ac_have_decl
+_ACEOF
+
+
subdirs="$subdirs libiberty"

View File

@ -0,0 +1,947 @@
--- nvptx-tools/configure.ac
+++ nvptx-tools/configure.ac
@@ -51,6 +51,7 @@ LIBS="$LIBS -lcuda"
AC_CHECK_FUNCS([[cuGetErrorName] [cuGetErrorString]])
AC_CHECK_DECLS([[cuGetErrorName], [cuGetErrorString]],
[], [], [[#include <cuda.h>]])
+AC_CHECK_HEADERS(unistd.h sys/stat.h)
AC_MSG_CHECKING([for extra programs to build requiring -lcuda])
NVPTX_RUN=
--- nvptx-tools/include/libiberty.h
+++ nvptx-tools/include/libiberty.h
@@ -390,6 +390,17 @@ extern void hex_init (void);
/* Save files used for communication between processes. */
#define PEX_SAVE_TEMPS 0x4
+/* Max number of alloca bytes per call before we must switch to malloc.
+
+ ?? Swiped from gnulib's regex_internal.h header. Is this actually
+ the case? This number seems arbitrary, though sane.
+
+ The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+#define MAX_ALLOCA_SIZE 4032
+
/* Prepare to execute one or more programs, with standard output of
each program fed to standard input of the next.
FLAGS As above.
--- nvptx-tools/nvptx-as.c
+++ nvptx-tools/nvptx-as.c
@@ -30,6 +30,9 @@
#include <string.h>
#include <wait.h>
#include <unistd.h>
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
#include <errno.h>
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
@@ -42,6 +45,38 @@
#include "version.h"
+#ifndef R_OK
+#define R_OK 4
+#define W_OK 2
+#define X_OK 1
+#endif
+
+#ifndef DIR_SEPARATOR
+# define DIR_SEPARATOR '/'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) \
+ || defined (__DJGPP__) || defined (__OS2__)
+# define HAVE_DOS_BASED_FILE_SYSTEM
+# define HAVE_HOST_EXECUTABLE_SUFFIX
+# define HOST_EXECUTABLE_SUFFIX ".exe"
+# ifndef DIR_SEPARATOR_2
+# define DIR_SEPARATOR_2 '\\'
+# endif
+# define PATH_SEPARATOR ';'
+#else
+# define PATH_SEPARATOR ':'
+#endif
+
+#ifndef DIR_SEPARATOR_2
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else
+# define IS_DIR_SEPARATOR(ch) \
+ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif
+
+#define DIR_UP ".."
+
static const char *outname = NULL;
static void __attribute__ ((format (printf, 1, 2)))
@@ -816,7 +851,7 @@ traverse (void **slot, void *data)
}
static void
-process (FILE *in, FILE *out)
+process (FILE *in, FILE *out, int verify, const char *outname)
{
symbol_table = htab_create (500, hash_string_hash, hash_string_eq,
NULL);
@@ -824,6 +859,18 @@ process (FILE *in, FILE *out)
const char *input = read_file (in);
Token *tok = tokenize (input);
+ /* By default, when ptxas is not in PATH, do minimalistic verification,
+ just require that the first non-comment directive is .version. */
+ if (verify < 0)
+ {
+ size_t i;
+ for (i = 0; tok[i].kind == K_comment; i++)
+ ;
+ if (tok[i].kind != K_dotted || !is_keyword (&tok[i], "version"))
+ fatal_error ("missing .version directive at start of file '%s'",
+ outname);
+ }
+
do
tok = parse_file (tok);
while (tok->kind);
@@ -897,9 +944,83 @@ fork_execute (const char *prog, char *const *argv)
do_wait (prog, pex);
}
+/* Determine if progname is available in PATH. */
+static bool
+program_available (const char *progname)
+{
+ char *temp = getenv ("PATH");
+ if (temp)
+ {
+ char *startp, *endp, *nstore, *alloc_ptr = NULL;
+ size_t prefixlen = strlen (temp) + 1;
+ size_t len;
+ if (prefixlen < 2)
+ prefixlen = 2;
+
+ len = prefixlen + strlen (progname) + 1;
+#ifdef HAVE_HOST_EXECUTABLE_SUFFIX
+ len += strlen (HOST_EXECUTABLE_SUFFIX);
+#endif
+ if (len < MAX_ALLOCA_SIZE)
+ nstore = (char *) alloca (len);
+ else
+ alloc_ptr = nstore = (char *) malloc (len);
+
+ startp = endp = temp;
+ while (1)
+ {
+ if (*endp == PATH_SEPARATOR || *endp == 0)
+ {
+ if (endp == startp)
+ {
+ nstore[0] = '.';
+ nstore[1] = DIR_SEPARATOR;
+ nstore[2] = '\0';
+ }
+ else
+ {
+ memcpy (nstore, startp, endp - startp);
+ if (! IS_DIR_SEPARATOR (endp[-1]))
+ {
+ nstore[endp - startp] = DIR_SEPARATOR;
+ nstore[endp - startp + 1] = 0;
+ }
+ else
+ nstore[endp - startp] = 0;
+ }
+ strcat (nstore, progname);
+ if (! access (nstore, X_OK)
+#ifdef HAVE_HOST_EXECUTABLE_SUFFIX
+ || ! access (strcat (nstore, HOST_EXECUTABLE_SUFFIX), X_OK)
+#endif
+ )
+ {
+#if defined (HAVE_SYS_STAT_H) && defined (S_ISREG)
+ struct stat st;
+ if (stat (nstore, &st) >= 0 && S_ISREG (st.st_mode))
+#endif
+ {
+ free (alloc_ptr);
+ return true;
+ }
+ }
+
+ if (*endp == 0)
+ break;
+ endp = startp = endp + 1;
+ }
+ else
+ endp++;
+ }
+ free (alloc_ptr);
+ }
+ return false;
+}
+
static struct option long_options[] = {
{"traditional-format", no_argument, 0, 0 },
{"save-temps", no_argument, 0, 0 },
+ {"verify", no_argument, 0, 0 },
{"no-verify", no_argument, 0, 0 },
{"help", no_argument, 0, 'h' },
{"version", no_argument, 0, 'V' },
@@ -912,7 +1033,7 @@ main (int argc, char **argv)
FILE *in = stdin;
FILE *out = stdout;
bool verbose __attribute__((unused)) = false;
- bool verify = true;
+ int verify = -1;
const char *smver = "sm_30";
int o;
@@ -923,7 +1044,9 @@ main (int argc, char **argv)
{
case 0:
if (option_index == 2)
- verify = false;
+ verify = 1;
+ else if (option_index == 3)
+ verify = 0;
break;
case 'v':
verbose = true;
@@ -948,7 +1071,8 @@ Usage: nvptx-none-as [option...] [asmfile]\n\
Options:\n\
-o FILE Write output to FILE\n\
-v Be verbose\n\
+ --verify Do verify output is acceptable to ptxas\n\
--no-verify Do not verify output is acceptable to ptxas\n\
--help Print this help and exit\n\
--version Print version number and exit\n\
\n\
@@ -983,11 +1108,17 @@ This program has absolutely no warranty.\n",
if (!in)
fatal_error ("cannot open input ptx file");
- process (in, out);
- if (outname)
+ if (outname == NULL)
+ verify = 0;
+ else if (verify == -1)
+ if (program_available ("ptxas"))
+ verify = 1;
+
+ process (in, out, verify, outname);
+ if (outname)
fclose (out);
- if (verify && outname)
+ if (verify > 0)
{
struct obstack argv_obstack;
obstack_init (&argv_obstack);
--- nvptx-tools/configure
+++ nvptx-tools/configure
@@ -168,7 +168,8 @@ test x\$exitcode = x0 || exit 1"
as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
- test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1"
+ test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
if (eval "$as_required") 2>/dev/null; then :
as_have_required=yes
else
@@ -552,11 +553,50 @@ PACKAGE_URL=
ac_unique_file="nvptx-tools"
ac_unique_file="nvptx-as.c"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
enable_option_checking=no
ac_subst_vars='LTLIBOBJS
LIBOBJS
subdirs
NVPTX_RUN
+EGREP
+GREP
+CPP
CUDA_DRIVER_LDFLAGS
CUDA_DRIVER_CPPFLAGS
AR
@@ -635,7 +675,8 @@ LIBS
CPPFLAGS
CXX
CXXFLAGS
-CCC'
+CCC
+CPP'
ac_subdirs_all='libiberty'
# Initialize some variables set by options.
@@ -1267,6 +1308,7 @@ Some influential environment variables:
you have headers in a nonstandard directory <include dir>
CXX C++ compiler command
CXXFLAGS C++ compiler flags
+ CPP C preprocessor
Use these variables to override the choices made by `configure' or to help
it to find libraries and programs with nonstandard names/locations.
@@ -1575,6 +1617,203 @@ $as_echo "$ac_res" >&6; }
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
} # ac_fn_c_check_decl
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ return $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_header_compiler=yes
+else
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ ac_header_preproc=yes
+else
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+ yes:no: )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+esac
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ $as_echo_n "(cached) " >&6
+else
+ eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=$ac_status
+fi
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ return $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_header_compile
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
@@ -3284,6 +3523,418 @@ cat >>confdefs.h <<_ACEOF
#define HAVE_DECL_CUGETERRORSTRING $ac_have_decl
_ACEOF
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if test "${ac_cv_prog_CPP+set}" = set; then :
+ $as_echo_n "(cached) " >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then :
+ $as_echo_n "(cached) " >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ if test -z "$EGREP"; then
+ ac_path_EGREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_EGREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_EGREP"; then
+ as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_header_stdc=yes
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then :
+ :
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do :
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+eval as_val=\$$as_ac_Header
+ if test "x$as_val" = x""yes; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in unistd.h sys/stat.h
+do :
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
+eval as_val=\$$as_ac_Header
+ if test "x$as_val" = x""yes; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for extra programs to build requiring -lcuda" >&5
$as_echo_n "checking for extra programs to build requiring -lcuda... " >&6; }

3205
SPECS/gcc.spec Normal file

File diff suppressed because it is too large Load Diff