From 475bdd7d351c3ce80dbb38df719df315473c16d9 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 28 Nov 2017 08:48:49 -0800 Subject: [PATCH] FROMLIST: BACKPORT: kbuild: add support for clang LTO This change adds the configuration option CONFIG_LTO_CLANG, and build system support for clang's Link Time Optimization (LTO). In preparation for LTO support for other compilers, potentially common parts of the changes are gated behind CONFIG_LTO instead. With -flto, instead of object files, clang produces LLVM bitcode, which is compiled into a native object at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html While the kernel normally uses GNU ld for linking, LLVM supports LTO only with lld or GNU gold linkers. This patch set assumes gold will be used with the LLVMgold plug-in to perform the LTO link step. Due to potential incompatibilities with GNU ld, this change also adds LDFINAL_vmlinux for using a different linker for the vmlinux_link step, and defaults to using GNU ld. Assuming LLVMgold.so is in LD_LIBRARY_PATH and CONFIG_LTO_CLANG has been selected, an LTO kernel can be built simply by running make CC=clang. LTO requires clang >= 5.0 and gold from binutils >= 2.27. Bug: 62093296 Bug: 67506682 Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f (am from https://patchwork.kernel.org/patch/10060329/) Signed-off-by: Sami Tolvanen --- Makefile | 54 +++++++++++++++++++- arch/Kconfig | 30 +++++++++++ scripts/Makefile.build | 105 ++++++++++++++++++++++++++++++++++----- scripts/Makefile.modpost | 40 ++++++++++++--- scripts/link-vmlinux.sh | 81 ++++++++++++++++++++++++------ 5 files changed, 276 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index b8ec38eb8c4d..24b1f9259343 100644 --- a/Makefile +++ b/Makefile @@ -348,6 +348,7 @@ include scripts/Kbuild.include # Make variables (CC, etc...) AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +LDGOLD = $(CROSS_COMPILE)ld.gold CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar @@ -623,6 +624,20 @@ CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disabl CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) export CFLAGS_GCOV CFLAGS_KCOV +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link +LDFINAL_vmlinux := $(LD) +LD := $(LDGOLD) +LDFLAGS += -plugin LLVMgold.so +# use llvm-ar for building symbol tables from IR files, and llvm-dis instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_DIS := llvm-dis +export LLVM_AR LLVM_DIS +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -641,6 +656,26 @@ KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +lto-flags := $(lto-clang-flags) +KBUILD_CFLAGS += $(lto-flags) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export DISABLE_LTO + +# LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override +# the linker and flags for vmlinux_link. +export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux +endif + ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) @@ -1083,6 +1118,22 @@ prepare-objtool: $(objtool_target) # CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!") PHONY += prepare-compiler-check prepare-compiler-check: FORCE +# Make sure we're using a supported toolchain with LTO_CLANG +ifdef CONFIG_LTO_CLANG + ifneq ($(call clang-ifversion, -ge, 0500, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 + endif + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif +endif +# Make sure compiler supports LTO flags +ifdef lto-flags + ifeq ($(call cc-option, $(lto-flags)),) + @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \ + >&2 && exit 1 + endif +endif # Make sure compiler supports requested stack protector flag. ifdef stackp-name ifeq ($(call cc-option, $(stackp-flag)),) @@ -1569,7 +1620,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 659bdd079277..5f96c8fb2f09 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -489,6 +489,36 @@ config LD_DEAD_CODE_DATA_ELIMINATION sections (e.g., '.text.init'). Typically '.' in section names is used to distinguish them from label names / C identifiers. +config LTO + bool + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with either lld or GNU gold w/ LLVMgold. + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD + select LTO + select THIN_ARCHIVES + select LD_DEAD_CODE_DATA_ELIMINATION + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang >= + 5.0 (make CC=clang) and GNU gold from binutils >= 2.27, and have the + LLVMgold plug-in in LD_LIBRARY_PATH. + + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3e3efd37ba47..4ce5e1adec58 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -208,6 +208,23 @@ else cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + else \ + if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + fi; \ + mv -f $(@D)/.tmp_$(@F) $@; +else cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -220,6 +237,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT @@ -434,9 +452,30 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ; # ifdef builtin-target +ifdef CONFIG_LTO_CLANG + ifdef CONFIG_MODVERSIONS + # combine symversions for later processing + update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done; + endif + # rebuild the symbol table with llvm-ar to include IR files + update_lto_symtable = ; \ + mv -f $@ $@.tmp; \ + $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \ + $$($(AR) t $@.tmp); \ + rm -f $@.tmp +endif + ifdef CONFIG_THIN_ARCHIVES - cmd_make_builtin = rm -f $@; $(AR) rcST$(KBUILD_ARFLAGS) - cmd_make_empty_builtin = rm -f $@; $(AR) rcST$(KBUILD_ARFLAGS) + cmd_make_builtin = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) + cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) quiet_cmd_link_o_target = AR $@ else cmd_make_builtin = $(LD) $(ld_flags) -r -o @@ -476,7 +515,11 @@ ifdef lib-target quiet_cmd_link_l_target = AR $@ ifdef CONFIG_THIN_ARCHIVES - cmd_link_l_target = rm -f $@; $(AR) rcsT$(KBUILD_ARFLAGS) $@ $(lib-y) + cmd_link_l_target = \ + $(update_lto_symversions) \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \ + $(update_lto_symtable) else cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y) endif @@ -494,14 +537,36 @@ else ref_prefix = EXTERN( endif -quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ - sed -ne '/___ksymtab/{s/.*+/$(ref_prefix)/;s/ .*/)/;p}' >$(ksyms-lds);\ - rm -f $(dummy-object);\ - $(AR) rcs$(KBUILD_ARFLAGS) $(dummy-object);\ +filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ "]*\).*/$(ref_prefix)\1)/p' +link_export_list = rm -f $(dummy-object);\ + echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\ rm $(dummy-object) $(ksyms-lds) +quiet_cmd_export_list = EXPORTS $@ + +ifdef CONFIG_LTO_CLANG +# objdump doesn't understand IR files and llvm-dis doesn't support archives, +# so we'll walk through each file in the archive separately +cmd_export_list = \ + rm -f $(ksyms-lds); \ + for o in $$($(AR) t $<); do \ + if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \ + $(OBJDUMP) -h $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + else \ + $(LLVM_DIS) -o=- $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + fi; \ + done; \ + $(link_export_list) +else +cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \ + $(link_export_list) +endif + $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) @@ -525,20 +590,36 @@ $($(subst $(obj)/,,$(@:.o=-objs))) \ $($(subst $(obj)/,,$(@:.o=-y))) \ $($(subst $(obj)/,,$(@:.o=-m)))), $^) -quiet_cmd_link_multi-y = LD $@ -cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) +cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) + +ifdef CONFIG_THIN_ARCHIVES + quiet_cmd_link_multi-y = AR $@ + cmd_link_multi-y = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \ + $(update_lto_symtable) +else + quiet_cmd_link_multi-y = LD $@ + cmd_link_multi-y = $(cmd_link_multi-link) +endif quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(cmd_link_multi-y) + +ifdef CONFIG_LTO_CLANG + # don't compile IR until needed + cmd_link_multi-m = $(cmd_link_multi-y) +else + cmd_link_multi-m = $(cmd_link_multi-link) +endif $(multi-used-y): FORCE $(call if_changed,link_multi-y) -$(call multi_depend, $(multi-used-y), .o, -objs -y) $(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); \ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) + +$(call multi_depend, $(multi-used-y), .o, -objs -y) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-y) $(multi-used-m) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 16923ba4b5b1..d26169b58a74 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -82,12 +82,28 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we +# need to link them into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LD [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(modules:.ko=$(modpost-ext).o) FORCE $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ @@ -98,8 +114,7 @@ vmlinux.o: FORCE # Declare generated files as targets for modpost $(symverfile): __modpost ; -$(modules:.ko=.mod.c): __modpost ; - +$(modules:.ko=$(modpost-ext).mod.c): __modpost ; # Step 5), compile all *.mod.c files @@ -110,22 +125,33 @@ quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE) \ -c -o $@ $< -$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE +$(modules:.ko=.mod.o): %.mod.o: %$(modpost-ext).mod.c FORCE $(call if_changed_dep,cc_o_c) -targets += $(modules:.ko=.mod.o) +targets += $(modules:.ko=$(modpost-ext).mod.o) ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Step 6), final link of the modules with optional arch pass after final link quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive \ + $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) +else cmd_ld_ko_o = \ $(LD) -r $(LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif -$(modules): %.ko :%.o %.mod.o FORCE +$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE +$(call if_changed,ld_ko_o) targets += $(modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f742c65108b9..d16dc7c0b5d1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -53,9 +53,40 @@ archive_builtin() ${AR} rcsT${KBUILD_ARFLAGS} built-in.o \ ${KBUILD_VMLINUX_INIT} \ ${KBUILD_VMLINUX_MAIN} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + mv -f built-in.o built-in.o.tmp + ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.o $(${AR} t built-in.o.tmp) + rm -f built-in.o.tmp + fi fi } +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return + fi + + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" +} + # Link of vmlinux.o used for section mismatch analysis # ${1} output file modpost_link() @@ -70,7 +101,16 @@ modpost_link() ${KBUILD_VMLINUX_MAIN} \ --end-group" fi - ${LD} ${LDFLAGS} -r -o ${1} ${objects} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO vmlinux.o + else + info LD vmlinux.o + fi + + ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -82,7 +122,15 @@ vmlinux_link() local objects if [ "${SRCARCH}" != "um" ]; then - if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then + local ld=${LD} + local ldflags="${LDFLAGS} ${LDFLAGS_vmlinux}" + + if [ -n "${LDFINAL_vmlinux}" ]; then + ld=${LDFINAL_vmlinux} + ldflags="${LDFLAGS_FINAL_vmlinux} ${LDFLAGS_vmlinux}" + fi + + if [[ -n "${CONFIG_THIN_ARCHIVES}" && -z "${CONFIG_LTO_CLANG}" ]]; then objects="--whole-archive built-in.o ${1}" else objects="${KBUILD_VMLINUX_INIT} \ @@ -92,8 +140,7 @@ vmlinux_link() ${1}" fi - ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ - -T ${lds} ${objects} + ${ld} ${ldflags} -o ${2} -T ${lds} ${objects} else if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then objects="-Wl,--whole-archive built-in.o ${1}" @@ -113,7 +160,6 @@ vmlinux_link() fi } - # Create ${2} .o file with all symbols from the ${1} object file kallsyms() { @@ -164,6 +210,7 @@ cleanup() rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_version + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f built-in.o rm -f System.map @@ -209,15 +256,6 @@ case "${KCONFIG_CONFIG}" in . "./${KCONFIG_CONFIG}" esac -archive_builtin - -#link vmlinux.o -info LD vmlinux.o -modpost_link vmlinux.o - -# modpost vmlinux.o to check for section mismatches -${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o - # Update version info GEN .version if [ ! -r .version ]; then @@ -228,9 +266,24 @@ else expr 0$(cat .old_version) + 1 >.version; fi; +archive_builtin + +#link vmlinux.o +modpost_link vmlinux.o + +# modpost vmlinux.o to check for section mismatches +${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o + # final build of init/ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GCC_PLUGINS_CFLAGS}" +if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Re-use vmlinux.o, so we can avoid the slow LTO link step in + # vmlinux_link + KBUILD_VMLINUX_INIT= + KBUILD_VMLINUX_MAIN=vmlinux.o +fi + kallsymso="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then