biology/subread: Read alignment, quantification and mutation discovery

The Subread package comprises a suite of software programs for processing
next-gen sequencing read data including:

    Subread: a general-purpose read aligner
    Subjunc: a read aligner developed for aligning RNA-seq reads
    featureCounts: a software program developed for counting reads to genomic
    features such as genes, exons, promoters and genomic bins.
    Sublong: a long-read aligner that is designed based on seed-and-vote.
    exactSNP: discovers SNPs by testing signals against local background noise.
This commit is contained in:
Jason W. Bacon 2020-06-05 01:55:49 +00:00
parent 6220495836
commit a156fdcf52
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=537982
18 changed files with 367 additions and 0 deletions

View file

@ -160,6 +160,7 @@
SUBDIR += stacks
SUBDIR += star
SUBDIR += stringtie
SUBDIR += subread
SUBDIR += tRNAscan-SE
SUBDIR += tabixpp
SUBDIR += tophat

23
biology/subread/Makefile Normal file
View file

@ -0,0 +1,23 @@
# $FreeBSD$
PORTNAME= subread
DISTVERSION= 2.0.1
DISTVERSIONSUFFIX= -source
CATEGORIES= biology
MASTER_SITES= SF/${PORTNAME}/${PORTNAME}-${PORTVERSION}
MAINTAINER= jwb@FreeBSD.org
COMMENT= High-performance read alignment, quantification and mutation discovery
LICENSE= GPLv3
LICENSE_FILE= ${WRKDIR}/${DISTNAME}/LICENSE
LIB_DEPENDS= libsysinfo.so:devel/libsysinfo
USES= gmake localbase:ldflags
MAKEFILE= ${FILESDIR}/Makefile.pkg
WRKSRC_SUBDIR= src
INSTALL_TARGET= install-strip
.include <bsd.port.mk>

3
biology/subread/distinfo Normal file
View file

@ -0,0 +1,3 @@
TIMESTAMP = 1591305316
SHA256 (subread-2.0.1-source.tar.gz) = d808eb5b1823c572cb45a97c95a3c5acb3d8e29aa47ec74e3ca1eb345787c17b
SIZE (subread-2.0.1-source.tar.gz) = 23260381

View file

@ -0,0 +1,104 @@
include makefile.version
CCFLAGS = -DMAKE_FOR_EXON -DMAKE_STANDALONE \
-DSUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
LDFLAGS += -DMAKE_FOR_EXON -DMAKE_STANDALONE -pthread -lsysinfo -lz -lm
CC ?= gcc
CC += ${CCFLAGS}
BINS = detectionCall sublong repair txUnique featureCounts removeDup \
exactSNP subread-buildindex subindel subread-align subjunc \
qualityScores subread-fullscan propmapped flattenGTF \
MKDIR ?= mkdir -p
INSTALL ?= install
DESTDIR ?= .
PREFIX ?= local
STRIP ?= strip
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib input-blc
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: genRandomReads detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall genRandomReads repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
genRandomReads: gen_rand_reads.c ${ALL_OBJECTS}
${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS}
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
detectionCall: detection-calls.c ${ALL_OBJECTS}
${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS}
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
txUnique: tx-unique.c tx-unique.h ${ALL_OBJECTS}
${CC} -o txUnique tx-unique.c ${ALL_OBJECTS} ${LDFLAGS}
globalReassembly: global-reassembly.c ${ALL_OBJECTS}
${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS}
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
exactSNP: SNPCalling.c SNPCalling.h ${ALL_OBJECTS}
${CC} -o exactSNP SNPCalling.c ${ALL_OBJECTS} ${LDFLAGS}
subread-buildindex: index-builder.c subread.h ${ALL_OBJECTS}
${CC} -o subread-buildindex index-builder.c ${ALL_OBJECTS} ${LDFLAGS}
removeDup: removeDupReads.c removeDupReads.h subread.h ${ALL_OBJECTS}
${CC} -o removeDup removeDupReads.c ${ALL_OBJECTS} ${LDFLAGS}
subindel: SUBindel.c core.h subread.h ${ALL_OBJECTS}
${CC} -o subindel SUBindel.c ${ALL_OBJECTS} ${LDFLAGS}
featureCounts: readSummary.c subread.h ${ALL_OBJECTS}
${CC} -o featureCounts readSummary.c ${ALL_OBJECTS} ${LDFLAGS}
subread-align: core-interface-aligner.c ${ALL_OBJECTS}
${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS}
subjunc: core-interface-subjunc.c ${ALL_OBJECTS}
${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS}
subtools: subtools.c ${ALL_OBJECTS}
${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS}
qualityScores: qualityScores.c ${ALL_OBJECTS}
${CC} -o qualityScores qualityScores.c ${ALL_OBJECTS} ${LDFLAGS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
.PHONY: clean install install-strip
install-strip: install
${STRIP} ${DESTDIR}${PREFIX}/bin/*
install:
${MKDIR} ${DESTDIR}${PREFIX}/bin
${INSTALL} -c \
../bin/exactSNP \
../bin/featureCounts \
../bin/sub* \
../bin/utilities/* \
${DESTDIR}${PREFIX}/bin
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex \
${ALL_OBJECTS}

View file

@ -0,0 +1,20 @@
--- HelperFunctions.c.orig 2020-06-04 23:58:40 UTC
+++ HelperFunctions.c
@@ -845,7 +845,7 @@ int strcmp_number(char * s1, char * s2)
int mac_str(char * str_buff)
{
-#if defined(FREEBSD) || defined(__MINGW32__)
+#if defined(__FreeBSD__) || defined(__MINGW32__)
return 1;
#else
#ifdef MACOS
@@ -2703,7 +2703,7 @@ void main(){
int get_free_total_mem(size_t * total, size_t * free_mem){
-#ifdef FREEBSD
+#ifdef __FreeBSD__
return -1;
#endif

View file

@ -0,0 +1,10 @@
--- core-junction.c.orig 2018-11-08 15:19:02 UTC
+++ core-junction.c
@@ -22,6 +22,7 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#include <unistd.h>
#include "subread.h"
#include "sublog.h"
#include "gene-value-index.h"

View file

@ -0,0 +1,20 @@
--- gene-algorithms.c.orig 2020-06-04 23:57:31 UTC
+++ gene-algorithms.c
@@ -28,7 +28,7 @@
#include <sys/stat.h>
#include <sys/types.h>
-#ifndef FREEBSD
+#ifndef __FreeBSD__
#include <sys/timeb.h>
#endif
@@ -1563,7 +1563,7 @@ int load_offsets(gene_offset_t* offsets , const char i
double miltime(){
double ret;
- #ifdef FREEBSD
+ #ifdef __FreeBSD__
struct timeval tp;
struct timezone tz;
tz.tz_minuteswest=0;

View file

@ -0,0 +1,11 @@
--- input-files.c.orig 2020-06-05 00:33:03 UTC
+++ input-files.c
@@ -48,7 +48,7 @@ FILE * f_subr_open(const char * fname, const char * mo
#ifdef __MINGW32__
return fopen64(fname, mode);
#else
-#if defined(__LP64__) || defined(_LP64) || defined(MACOS)
+#if defined(__LP64__) || defined(_LP64) || defined(MACOS) || defined(__FreeBSD__)
return fopen(fname, mode);
#else
return fopen64(fname, mode);

View file

@ -0,0 +1,11 @@
--- long-hashtable.c.orig 2020-06-04 23:59:33 UTC
+++ long-hashtable.c
@@ -22,7 +22,7 @@
#include <string.h>
#ifndef MACOS
-#ifndef FREEBSD
+#ifndef __FreeBSD__
#include <malloc.h>
#endif
#endif

View file

@ -0,0 +1,15 @@
--- longread-one/LRMsorted-hashtable.c.orig 2018-11-08 14:47:43 UTC
+++ longread-one/LRMsorted-hashtable.c
@@ -22,11 +22,9 @@
#include <string.h>
#include "LRMsorted-hashtable.h"
-#ifndef MACOS
-#ifndef FREEBSD
+#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
-#endif
#include<math.h>
#include "LRMfile-io.h"

View file

@ -0,0 +1,30 @@
--- longread-one/Makefile.orig 2019-09-04 04:22:49 UTC
+++ longread-one/Makefile
@@ -1,12 +1,12 @@
-CC_EXEC = gcc
OPT_LEVEL = 3
include ../makefile.version
include make.version
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\" ${WARNING_LEVEL} ${MINGW32}
-LDFLAGS = -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
-CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
+CC ?= gcc
+CFLAGS += -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 \
+ -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\" ${WARNING_LEVEL} ${MINGW32}
+LDFLAGS += -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper LRMseek-zlib LRMfile-io LRMhashtable
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
@@ -20,8 +20,8 @@ clean:
rm -f *.o LRM
LRM: longread-mapping.c ${ALL_OBJECTS}
- ${CC} -o LRM longread-mapping.c ${ALL_OBJECTS} ${LDFLAGS}
+ ${CC} -o LRM ${CFLAGS} longread-mapping.c ${ALL_OBJECTS} ${LDFLAGS}
$(ALL_OBJECTS): $(ALL_C) $(ALL_H)
- $(CC) -o $@ -c $(subst .o,.c,$@)
+ $(CC) -o $@ -c ${CFLAGS} $(subst .o,.c,$@)

View file

@ -0,0 +1,37 @@
--- longread-one/longread-mapping.c.orig 2019-09-04 04:22:49 UTC
+++ longread-one/longread-mapping.c
@@ -30,7 +30,9 @@
#ifndef __MINGW32__
#include <sys/resource.h>
#endif
+#ifndef __FreeBSD__ // Deprecated on FreeBSD
#include <sys/timeb.h>
+#endif
#include <sys/stat.h>
#include <locale.h>
#include <ctype.h>
@@ -223,9 +225,24 @@ int LRMvalidate_and_init_context(LRMcontext_t ** conte
double LRMmiltime(){
double ret;
+
+/* Why not use gettimeofday() on all platforms? */
+#ifdef __FreeBSD__
+
+ struct timeval tp;
+ struct timezone tz;
+ tz.tz_minuteswest=0;
+ tz.tz_dsttime=0;
+ gettimeofday(&tp,&tz);
+ ret = tp.tv_sec+ 0.001*0.001* tp.tv_usec;
+
+#else
+
struct timeb trp;
ftime(&trp);
ret = trp.time*1.0+(trp.millitm*1.0/1000.0);
+
+#endif
return ret;
}

View file

@ -0,0 +1,10 @@
--- read-repair.c.orig 2018-11-08 15:20:06 UTC
+++ read-repair.c
@@ -3,6 +3,7 @@
#include <string.h>
#include <getopt.h>
#include <zlib.h>
+#include <unistd.h>
#include "subread.h"
#include "core.h"
#include "input-files.h"

View file

@ -0,0 +1,10 @@
--- sambam-file.c.orig 2018-11-08 15:20:35 UTC
+++ sambam-file.c
@@ -30,6 +30,7 @@
#include <string.h>
#include <assert.h>
#include <ctype.h>
+#include <unistd.h>
#include "subread.h"
#include "core.h"
#include "gene-algorithms.h"

View file

@ -0,0 +1,15 @@
--- sorted-hashtable.c.orig 2018-11-08 03:33:50 UTC
+++ sorted-hashtable.c
@@ -23,11 +23,9 @@
#include <stdlib.h>
#include <string.h>
-#ifndef MACOS
-#ifndef FREEBSD
+#if !defined(__FreeBSD__) && !defined(__APPLE__)
#include <malloc.h>
#endif
-#endif
#include<math.h>
#include"core.h"

View file

@ -0,0 +1,11 @@
--- subread.h.orig 2018-11-08 14:46:06 UTC
+++ subread.h
@@ -102,7 +102,7 @@
#define USE_POSIX_MUTEX_LOCK
-#if defined(MACOS) || defined(FREEBSD) || defined(USE_POSIX_MUTEX_LOCK)
+#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_POSIX_MUTEX_LOCK)
typedef pthread_mutex_t subread_lock_t;
#define pthread_spinlock_t pthread_mutex_t
#define pthread_spin_lock pthread_mutex_lock

20
biology/subread/pkg-descr Normal file
View file

@ -0,0 +1,20 @@
The Subread package comprises a suite of software programs for processing
next-gen sequencing read data including:
Subread: a general-purpose read aligner which can align both genomic
DNA-seq and RNA-seq reads. It can also be used to discover genomic
mutations including short indels and structural variants.
Subjunc: a read aligner developed for aligning RNA-seq reads and for the
detection of exon-exon junctions. Gene fusion events can be detected as
well.
featureCounts: a software program developed for counting reads to genomic
features such as genes, exons, promoters and genomic bins.
Sublong: a long-read aligner that is designed based on seed-and-vote.
exactSNP: a SNP caller that discovers SNPs by testing signals against local
background noises.
WWW: http://bioinf.wehi.edu.au/subread-package/

16
biology/subread/pkg-plist Normal file
View file

@ -0,0 +1,16 @@
bin/detectionCall
bin/exactSNP
bin/featureCounts
bin/flattenGTF
bin/genRandomReads
bin/propmapped
bin/qualityScores
bin/removeDup
bin/repair
bin/subindel
bin/subjunc
bin/sublong
bin/subread-align
bin/subread-buildindex
bin/subread-fullscan
bin/txUnique