This change adds TILE-Gx SIMD instructions to the software raid (md), modeling the Altivec implementation. This is only for Syndrome generation; there is more that could be done to improve recovery, as in the recent Intel SSE3 recovery implementation. The code unrolls 8 times; this turns out to be the best on tilegx hardware among the set 1, 2, 4, 8 or 16. The code reads one cache-line of data from each disk, stores P and Q then goes to the next cache-line. The test code in sys/linux/lib/raid6/test reports 2008 MB/s data read rate for syndrome generation using 18 disks (16 data and 2 parity). It was 1512 MB/s before this SIMD optimizations. This is running on 1 core with all the data in cache. This is based on the paper The Mathematics of RAID-6. (http://kernel.org/pub/linux/kernel/people/hpa/raid6.pdf). Signed-off-by: Ken Steele <ken@tilera.com> Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Signed-off-by: NeilBrown <neilb@suse.de>
102 lines
2.4 KiB
Makefile
102 lines
2.4 KiB
Makefile
#
|
|
# This is a simple Makefile to test some of the RAID-6 code
|
|
# from userspace.
|
|
#
|
|
|
|
CC = gcc
|
|
OPTFLAGS = -O2 # Adjust as desired
|
|
CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
|
|
LD = ld
|
|
AWK = awk -f
|
|
AR = ar
|
|
RANLIB = ranlib
|
|
OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
|
|
|
|
ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
|
|
ifeq ($(ARCH),i386)
|
|
CFLAGS += -DCONFIG_X86_32
|
|
IS_X86 = yes
|
|
endif
|
|
ifeq ($(ARCH),x86_64)
|
|
CFLAGS += -DCONFIG_X86_64
|
|
IS_X86 = yes
|
|
endif
|
|
|
|
ifeq ($(IS_X86),yes)
|
|
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
|
|
CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
|
|
gcc -c -x assembler - >&/dev/null && \
|
|
rm ./-.o && echo -DCONFIG_AS_AVX2=1)
|
|
else
|
|
HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
|
|
gcc -c -x c - >&/dev/null && \
|
|
rm ./-.o && echo yes)
|
|
ifeq ($(HAS_ALTIVEC),yes)
|
|
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
|
|
endif
|
|
endif
|
|
ifeq ($(ARCH),tilegx)
|
|
OBJS += tilegx8.o
|
|
endif
|
|
|
|
.c.o:
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
%.c: ../%.c
|
|
cp -f $< $@
|
|
|
|
%.uc: ../%.uc
|
|
cp -f $< $@
|
|
|
|
all: raid6.a raid6test
|
|
|
|
raid6.a: $(OBJS)
|
|
rm -f $@
|
|
$(AR) cq $@ $^
|
|
$(RANLIB) $@
|
|
|
|
raid6test: test.c raid6.a
|
|
$(CC) $(CFLAGS) -o raid6test $^
|
|
|
|
altivec1.c: altivec.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
|
|
|
|
altivec2.c: altivec.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
|
|
|
|
altivec4.c: altivec.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
|
|
|
|
altivec8.c: altivec.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
|
|
|
|
int1.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
|
|
|
|
int2.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=2 < int.uc > $@
|
|
|
|
int4.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=4 < int.uc > $@
|
|
|
|
int8.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=8 < int.uc > $@
|
|
|
|
int16.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=16 < int.uc > $@
|
|
|
|
int32.c: int.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=32 < int.uc > $@
|
|
|
|
tilegx8.c: tilegx.uc ../unroll.awk
|
|
$(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@
|
|
|
|
tables.c: mktables
|
|
./mktables > tables.c
|
|
|
|
clean:
|
|
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
|
|
rm -f tilegx*.c
|
|
|
|
spotless: clean
|
|
rm -f *~
|