freebsd-ports/math/openblas/Makefile

146 lines
3.8 KiB
Makefile
Raw Normal View History

# Created by: Eijiro Shibusawa <ej-sib@ice.uec.ac.jp>
PORTNAME= openblas
DISTVERSIONPREFIX= v
DISTVERSION= 0.3.15
PORTEPOCH= 1
CATEGORIES= math
MASTER_SITES= NL/lapack/timing/:lapack_tmg
DISTFILES= large.tgz:lapack_tmg \
timing.tgz:lapack_tmg
DIST_SUBDIR= openblas
PATCH_SITES= https://github.com/xianyi/OpenBLAS/commit/
PATCHFILES= e1911b2e6056efcafe77beebc80c8e8bb8c8a5a9.patch:-p1 \
42f048cf6c04e51de6c7a3ade5af71b0c4dcea0d.patch:-p1
MAINTAINER= phd_kimberlite@yahoo.co.jp
COMMENT= Optimized BLAS library based on GotoBLAS2
LICENSE= BSD3CLAUSE
LICENSE_FILE= ${WRKSRC}/LICENSE
USES= compiler:c11 fortran gmake perl5 pkgconfig
CONFLICTS_INSTALL= cblas lapacke
USE_GITHUB= yes
GH_ACCOUNT= xianyi
GH_PROJECT= OpenBLAS
LARGE_FILE= large.tgz
TIMING_FILE= timing.tgz
USE_LDCONFIG= yes
USE_PERL5= build
OPENBLAS_SVER= 0
TEST_TARGET= tests
PLIST_SUB+= PORTVERSION=${PORTVERSION}
OPTIONS_DEFINE= DYNAMIC_ARCH INTERFACE64 OPENMP
OPTIONS_DEFAULT= OPENMP
OPTIONS_DEFINE_i386= AVX AVX2
OPTIONS_DEFINE_amd64= AVX AVX2
OPTIONS_EXCLUDE_powerpc64le= OPENMP
OPTIONS_EXCLUDE_powerpc64= OPENMP
OPTIONS_EXCLUDE_powerpc= OPENMP
math/openblas: update to 0.3.10, add POWER8 option Changelog: common: Improved thread locking behaviour in blas_server and parallel getrf Imported bugfix 394 from LAPACK (spurious reference to "XERBL" due to overlong lines) Imported bugfix 403 from LAPACK (compile option "recursive" required for correctness with Intel and PGI) Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB) Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP) Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that could lead to crashes at large matrix sizes Restored internal soname in dynamic libraries on FreeBSD and Dragonfly Added API (openblas_setaffinity) to set thread affinity programmatically on Linux Added initial infrastructure for half-precision floating point (bfloat16) support with a generic implementation of SHGEMM Added CMAKE build system support for building the cblas_Xgemm3m functions Fixed CMAKE support for building in a path with embedded spaces Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC Fixed GCC version detection in the Makefiles Allowed overriding the names of AR, AS and LD in Makefile builds POWER: fixed big-endian POWER8 ELFv2 builds on FreeBSD Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9 Fixed CMAKE build support for POWER9 fixed a potential race condition in the thread buffer allocation Worked around LAPACK test failures on PPC G4 MIPS: fixed a potential race condition in the thread buffer allocation Added support for MIPS 24K/24KE family based on P5600 kernels MIPS64: fixed a potential race condition in the thread buffer allocation Added TARGET=GENERIC ARMV7: fixed a race condition in the thread buffer allocation ARMV8: Fixed a race condition in the thread buffer allocation Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA Improved performance of the ThunderX2 DAXPY kernel Added an optimized SGEMM kernel for Cortex A53 Fixed Makefile support for INTERFACE64 (8-byte integer) x86_64: Fixed a syntax error in the CMAKE setup for SkylakeX Improved performance of STRSM on Haswell, SkylakeX and Ryzen Improved SGEMM performance on SGEMM for workloads with ldc a multiple of 1024 Improved DGEMM performance on Skylake X Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH builds created on SkylakeX Removed data alignment requirement in the SSE2 copy kernels that could cause spurious crashes Added a workaround for an optimizer bug in AppleClang 11.0.3 Fixed LAPACK-TEST failures with Intel Fortran Fixed compilation and LAPACK test results with recent Flang and AMD AOCC Fixed DYNAMIC_ARCH builds with CMAKE on OS X Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max, cblas_?sum, cblas_?gemm3m in the shared library on OS X Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes show the name of an older generation chip supported by the same kernels) IBM Z: Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14 PR: 249120 Approved by: phd_kimberlite@yahoo.co.jp (maintainer)
2020-09-06 19:52:25 +02:00
OPTIONS_RADIO= ${OPTIONS_RADIO_${ARCH}}
OPTIONS_RADIO_powerpc64= POWER
OPTIONS_RADIO_POWER= POWER6 POWER8
OPTIONS_SUB= yes
.if defined(BATCH) || defined(PACKAGE_BUILDING)
OPTIONS_DEFAULT_i386= DYNAMIC_ARCH
OPTIONS_DEFAULT_amd64= DYNAMIC_ARCH
OPTIONS_DEFAULT_powerpc64le= DYNAMIC_ARCH
.endif
DYNAMIC_ARCH_DESC= Optimize for multiple CPU types, otherwise for this CPU
INTERFACE64_DESC= Use 8 byte integers on 64-bit architectures
OPENMP_DESC= Use OpenMP for threading
AVX_DESC= Support Advanced Vector Extensions (AVX)
AVX2_DESC= Support Advanced Vector Extensions 2 (AVX2)
POWER6_DESC= Optimize for POWER6, instead of the default PPC970
math/openblas: update to 0.3.10, add POWER8 option Changelog: common: Improved thread locking behaviour in blas_server and parallel getrf Imported bugfix 394 from LAPACK (spurious reference to "XERBL" due to overlong lines) Imported bugfix 403 from LAPACK (compile option "recursive" required for correctness with Intel and PGI) Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB) Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP) Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that could lead to crashes at large matrix sizes Restored internal soname in dynamic libraries on FreeBSD and Dragonfly Added API (openblas_setaffinity) to set thread affinity programmatically on Linux Added initial infrastructure for half-precision floating point (bfloat16) support with a generic implementation of SHGEMM Added CMAKE build system support for building the cblas_Xgemm3m functions Fixed CMAKE support for building in a path with embedded spaces Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC Fixed GCC version detection in the Makefiles Allowed overriding the names of AR, AS and LD in Makefile builds POWER: fixed big-endian POWER8 ELFv2 builds on FreeBSD Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9 Fixed CMAKE build support for POWER9 fixed a potential race condition in the thread buffer allocation Worked around LAPACK test failures on PPC G4 MIPS: fixed a potential race condition in the thread buffer allocation Added support for MIPS 24K/24KE family based on P5600 kernels MIPS64: fixed a potential race condition in the thread buffer allocation Added TARGET=GENERIC ARMV7: fixed a race condition in the thread buffer allocation ARMV8: Fixed a race condition in the thread buffer allocation Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA Improved performance of the ThunderX2 DAXPY kernel Added an optimized SGEMM kernel for Cortex A53 Fixed Makefile support for INTERFACE64 (8-byte integer) x86_64: Fixed a syntax error in the CMAKE setup for SkylakeX Improved performance of STRSM on Haswell, SkylakeX and Ryzen Improved SGEMM performance on SGEMM for workloads with ldc a multiple of 1024 Improved DGEMM performance on Skylake X Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH builds created on SkylakeX Removed data alignment requirement in the SSE2 copy kernels that could cause spurious crashes Added a workaround for an optimizer bug in AppleClang 11.0.3 Fixed LAPACK-TEST failures with Intel Fortran Fixed compilation and LAPACK test results with recent Flang and AMD AOCC Fixed DYNAMIC_ARCH builds with CMAKE on OS X Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max, cblas_?sum, cblas_?gemm3m in the shared library on OS X Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes show the name of an older generation chip supported by the same kernels) IBM Z: Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14 PR: 249120 Approved by: phd_kimberlite@yahoo.co.jp (maintainer)
2020-09-06 19:52:25 +02:00
POWER8_DESC= Optimize for POWER8, instead of the default PPC970
.include <bsd.port.options.mk>
.if ${ARCH} == powerpc64
USE_GCC= yes
. if ${PORT_OPTIONS:MPOWER6}
TARGET_CPU_ARCH= POWER6
math/openblas: update to 0.3.10, add POWER8 option Changelog: common: Improved thread locking behaviour in blas_server and parallel getrf Imported bugfix 394 from LAPACK (spurious reference to "XERBL" due to overlong lines) Imported bugfix 403 from LAPACK (compile option "recursive" required for correctness with Intel and PGI) Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB) Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP) Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that could lead to crashes at large matrix sizes Restored internal soname in dynamic libraries on FreeBSD and Dragonfly Added API (openblas_setaffinity) to set thread affinity programmatically on Linux Added initial infrastructure for half-precision floating point (bfloat16) support with a generic implementation of SHGEMM Added CMAKE build system support for building the cblas_Xgemm3m functions Fixed CMAKE support for building in a path with embedded spaces Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC Fixed GCC version detection in the Makefiles Allowed overriding the names of AR, AS and LD in Makefile builds POWER: fixed big-endian POWER8 ELFv2 builds on FreeBSD Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9 Fixed CMAKE build support for POWER9 fixed a potential race condition in the thread buffer allocation Worked around LAPACK test failures on PPC G4 MIPS: fixed a potential race condition in the thread buffer allocation Added support for MIPS 24K/24KE family based on P5600 kernels MIPS64: fixed a potential race condition in the thread buffer allocation Added TARGET=GENERIC ARMV7: fixed a race condition in the thread buffer allocation ARMV8: Fixed a race condition in the thread buffer allocation Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA Improved performance of the ThunderX2 DAXPY kernel Added an optimized SGEMM kernel for Cortex A53 Fixed Makefile support for INTERFACE64 (8-byte integer) x86_64: Fixed a syntax error in the CMAKE setup for SkylakeX Improved performance of STRSM on Haswell, SkylakeX and Ryzen Improved SGEMM performance on SGEMM for workloads with ldc a multiple of 1024 Improved DGEMM performance on Skylake X Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH builds created on SkylakeX Removed data alignment requirement in the SSE2 copy kernels that could cause spurious crashes Added a workaround for an optimizer bug in AppleClang 11.0.3 Fixed LAPACK-TEST failures with Intel Fortran Fixed compilation and LAPACK test results with recent Flang and AMD AOCC Fixed DYNAMIC_ARCH builds with CMAKE on OS X Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max, cblas_?sum, cblas_?gemm3m in the shared library on OS X Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes show the name of an older generation chip supported by the same kernels) IBM Z: Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14 PR: 249120 Approved by: phd_kimberlite@yahoo.co.jp (maintainer)
2020-09-06 19:52:25 +02:00
. elif ${PORT_OPTIONS:MPOWER8}
TARGET_CPU_ARCH= POWER8
.else
TARGET_CPU_ARCH= PPC970
. endif
.endif
.if ${ARCH} == powerpc
USE_GCC= yes
TARGET_CPU_ARCH= PPCG4
.endif
.if ${ARCH} == powerpc64le
USE_GCC= yes
.endif
MAXTHREADS?= 64
.include <bsd.port.pre.mk>
post-extract:
cd ${DISTDIR}/${DIST_SUBDIR} ; \
${CP} ${LARGE_FILE} ${TIMING_FILE} ${WRKSRC}
post-patch:
@${ECHO_CMD} MAKE_NB_JOBS=-1 >> ${WRKSRC}/Makefile.rule
@${ECHO_CMD} NUM_THREADS=${MAXTHREADS} >> ${WRKSRC}/Makefile.rule
@${ECHO_CMD} USE_THREAD=1 >> ${WRKSRC}/Makefile.rule
.if ! ${PORT_OPTIONS:MAVX}
@${ECHO_CMD} NO_AVX=1 >> ${WRKSRC}/Makefile.rule
.endif
.if ! ${PORT_OPTIONS:MAVX2}
@${ECHO_CMD} NO_AVX2=1 >> ${WRKSRC}/Makefile.rule
.endif
.if ${PORT_OPTIONS:MDYNAMIC_ARCH}
@${ECHO_CMD} DYNAMIC_ARCH=1 >> ${WRKSRC}/Makefile.rule
@${ECHO_CMD} DYNAMIC_OLDER=1 >> ${WRKSRC}/Makefile.rule
.endif
.if defined(TARGET_CPU_ARCH)
@${ECHO_CMD} TARGET=${TARGET_CPU_ARCH} >> ${WRKSRC}/Makefile.rule
.endif
.if ${PORT_OPTIONS:MINTERFACE64}
@${ECHO_CMD} INTERFACE64=1 >> ${WRKSRC}/Makefile.rule
.endif
.if ${PORT_OPTIONS:MOPENMP}
@${ECHO_CMD} USE_OPENMP=1 >> ${WRKSRC}/Makefile.rule
.endif
.if ${ARCH:M*64*} == ""
@${ECHO_CMD} BINARY=32 >> ${WRKSRC}/Makefile.rule
.else
@${ECHO_CMD} BINARY=64 >> ${WRKSRC}/Makefile.rule
.endif
post-patch-OPENMP-on:
@${REINPLACE_CMD} -e "s+OPENBLAS_NUM_THREADS+OMP_NUM_THREADS+g" \
${WRKSRC}/test/Makefile \
${WRKSRC}/ctest/Makefile
BENCHMARK_MAXTHREADS?= ${MAXTHREADS}
.if ${PORT_OPTIONS:MOPENMP}
BENCHMARK_THREADS_FLAG= OMP_NUM_THREADS=${BENCHMARK_MAXTHREADS}
.else
BENCHMARK_THREADS_FLAG= OPENBLAS_NUM_THREADS=${BENCHMARK_MAXTHREADS}
.endif
benchmark: build
cd ${WRKSRC} ; ${SETENV} ${MAKE_CMD} ${MAKE_ARGS} hpl
cd ${WRKSRC}/benchmark ; ${SETENV} ${BENCHMARK_THREADS_FLAG} ${MAKE_CMD} ${MAKE_ARGS}
post-install-DYNAMIC_ARCH-off:
cd ${STAGEDIR}${PREFIX} && ls lib/libopenblas_*p-r${PORTVERSION}.* >> ${TMPPLIST}
.include <bsd.port.post.mk>