biology/canu: import canu-1.8
Canu is a fork of the Celera Assembler, designed for high-noise single-molecule sequencing (such as the PacBio RS II/Sequel or Oxford Nanopore MinION). Canu is a hierarchical assembly pipeline which runs in four steps: Detect overlaps in high-noise sequences using MHAP Generate corrected sequence consensus Trim corrected sequences Assemble trimmed corrected sequences
This commit is contained in:
parent
37d4115594
commit
94e269c682
8 changed files with 356 additions and 0 deletions
12
biology/canu/DESCR
Normal file
12
biology/canu/DESCR
Normal file
|
@ -0,0 +1,12 @@
|
|||
Canu is a fork of the Celera Assembler, designed for high-noise single-molecule
|
||||
sequencing (such as the PacBio RS II/Sequel or Oxford Nanopore MinION).
|
||||
|
||||
Canu is a hierarchical assembly pipeline which runs in four steps:
|
||||
|
||||
Detect overlaps in high-noise sequences using MHAP
|
||||
|
||||
Generate corrected sequence consensus
|
||||
|
||||
Trim corrected sequences
|
||||
|
||||
Assemble trimmed corrected sequences
|
80
biology/canu/Makefile
Normal file
80
biology/canu/Makefile
Normal file
|
@ -0,0 +1,80 @@
|
|||
# $NetBSD: Makefile,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
DISTNAME= canu-1.8
|
||||
CATEGORIES= biology java
|
||||
MASTER_SITES= ${MASTER_SITE_GITHUB:=marbl/}
|
||||
GITHUB_TAG= v${PKGVERSION_NOREV}
|
||||
|
||||
MAINTAINER= bacon@NetBSD.org
|
||||
HOMEPAGE= http://canu.readthedocs.io/
|
||||
COMMENT= Single molecule sequence assembler for genomes large and small
|
||||
LICENSE= gnu-gpl-v2
|
||||
|
||||
DEPENDS+= gnuplot>0:../../graphics/gnuplot
|
||||
|
||||
# Makefile compiles directly into ${DESTDIR}${PREFIX}
|
||||
SUBST_CLASSES+= optimize
|
||||
SUBST_STAGE.optimize= pre-configure
|
||||
SUBST_SED.optimize+= -e 's|-g3||g'
|
||||
SUBST_SED.optimize+= -e 's|-O3||g'
|
||||
SUBST_SED.optimize+= -e 's|-O4||g'
|
||||
SUBST_SED.optimize+= -e 's|-funroll-loops||g'
|
||||
SUBST_SED.optimize+= -e 's|-fexpensive-optimizations||g'
|
||||
SUBST_SED.optimize+= -e 's|-finline-functions||g'
|
||||
SUBST_FILES.optimize+= ${WRKSRC}/Makefile
|
||||
|
||||
SUBST_CLASSES+= jar
|
||||
SUBST_STAGE.jar= pre-configure
|
||||
SUBST_SED.jar+= -e 's|\\$$bin/mhap-|${PREFIX}/${JAVAJARDIR}/mhap-|g'
|
||||
SUBST_FILES.jar+= ${WRKSRC}/pipelines/canu/OverlapMhap.pm
|
||||
|
||||
SUBST_CLASSES+= perl
|
||||
SUBST_STAGE.perl= pre-configure
|
||||
SUBST_SED.perl+= -e 's|$$FindBin::RealBin/../lib/site_perl|${PREFIX}/${SITE_PERL_REL}|g'
|
||||
SUBST_FILES.perl+= ${WRKSRC}/pipelines/canu.pl
|
||||
|
||||
REPLACE_PERL+= bogart/*.pl
|
||||
REPLACE_PERL+= pipelines/*.pl
|
||||
REPLACE_PERL+= bogus/*.pl
|
||||
REPLACE_PERL+= bogart-analysis/*.pl
|
||||
REPLACE_PERL+= overlapBasedTrimming/*.pl
|
||||
REPLACE_PERL+= overlapInCore-analysis/*.pl
|
||||
REPLACE_PERL+= merTrim/*.pl
|
||||
REPLACE_PERL+= erateEstimate/*.pl
|
||||
REPLACE_PERL+= meryl/.pl
|
||||
REPLACE_PERL+= fastq-utilities/*.pl
|
||||
REPLACE_PERL+= *.pl
|
||||
|
||||
# May work on other 64-bit processors, but untested
|
||||
ONLY_FOR_PLATFORM= *-*-x86_64
|
||||
|
||||
USE_LANGUAGES= c c++
|
||||
USE_JAVA= run
|
||||
USE_JAVA2= 8
|
||||
USE_TOOLS+= gmake pax perl
|
||||
GCC_REQD+= 4.8
|
||||
|
||||
WRKSRC= ${WRKDIR}/canu-${PKGVERSION_NOREV}/src
|
||||
MAKE_ENV+= DESTDIR=${WRKSRC}
|
||||
|
||||
TMP_INST= ${WRKSRC}${PREFIX}/${OPSYS}-${MACHINE_ARCH:S/x86_64/amd64/}
|
||||
SITE_PERL_REL= lib/perl5/site_perl
|
||||
JAVAJARDIR= share/java/classes
|
||||
INSTALLATION_DIRS= bin ${JAVAJARDIR}
|
||||
|
||||
post-extract:
|
||||
${CHMOD} -R g-w ${WRKDIR}
|
||||
|
||||
post-build:
|
||||
${MKDIR} ${TMP_INST}/lib/perl5
|
||||
${MV} ${TMP_INST}/lib/site_perl ${TMP_INST}/lib/perl5
|
||||
${RM} -f ${TMP_INST}/bin/canu.defaults
|
||||
|
||||
do-install:
|
||||
cd ${TMP_INST}/bin && ${PAX} -wr * ${DESTDIR}${PREFIX}/bin
|
||||
cd ${TMP_INST}/lib/perl5 && ${PAX} -wr * ${DESTDIR}${PREFIX}/lib/perl5
|
||||
cd ${TMP_INST}/share && ${PAX} -wr * ${DESTDIR}${PREFIX}/share
|
||||
|
||||
.include "../../devel/boost-libs/buildlink3.mk"
|
||||
.include "../../lang/perl5/module.mk"
|
||||
.include "../../mk/bsd.pkg.mk"
|
80
biology/canu/PLIST
Normal file
80
biology/canu/PLIST
Normal file
|
@ -0,0 +1,80 @@
|
|||
@comment $NetBSD: PLIST,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
bin/alignGFA
|
||||
bin/bogart
|
||||
bin/bogus
|
||||
bin/canu
|
||||
bin/correctOverlaps
|
||||
bin/dumpBlob
|
||||
bin/edalign
|
||||
bin/errorEstimate
|
||||
bin/falconsense
|
||||
bin/fastqAnalyze
|
||||
bin/fastqSample
|
||||
bin/fastqSimulate
|
||||
bin/fastqSimulate-sort
|
||||
bin/filterCorrectionLayouts
|
||||
bin/filterCorrectionOverlaps
|
||||
bin/findErrors
|
||||
bin/findErrors-Dump
|
||||
bin/generateCorrectionLayouts
|
||||
bin/loadCorrectedReads
|
||||
bin/loadErates
|
||||
bin/loadTrimmedReads
|
||||
bin/meryl
|
||||
bin/mhapConvert
|
||||
bin/mmapConvert
|
||||
bin/ovStoreBucketizer
|
||||
bin/ovStoreBuild
|
||||
bin/ovStoreConfig
|
||||
bin/ovStoreDump
|
||||
bin/ovStoreIndexer
|
||||
bin/ovStoreSorter
|
||||
bin/ovStoreStats
|
||||
bin/overlapConvert
|
||||
bin/overlapImport
|
||||
bin/overlapInCore
|
||||
bin/overlapInCorePartition
|
||||
bin/overlapPair
|
||||
bin/prefixEditDistance-matchLimitGenerate
|
||||
bin/readConsensus
|
||||
bin/sequence
|
||||
bin/splitHaplotype
|
||||
bin/splitReads
|
||||
bin/sqStoreCreate
|
||||
bin/sqStoreCreatePartition
|
||||
bin/sqStoreDumpFASTQ
|
||||
bin/sqStoreDumpMetaData
|
||||
bin/tgStoreCompress
|
||||
bin/tgStoreCoverageStat
|
||||
bin/tgStoreDump
|
||||
bin/tgStoreFilter
|
||||
bin/tgStoreLoad
|
||||
bin/tgTigDisplay
|
||||
bin/trimReads
|
||||
bin/utgcns
|
||||
bin/wtdbgConvert
|
||||
lib/perl5/site_perl/canu/Configure.pm
|
||||
lib/perl5/site_perl/canu/Consensus.pm
|
||||
lib/perl5/site_perl/canu/CorrectReads.pm
|
||||
lib/perl5/site_perl/canu/Defaults.pm
|
||||
lib/perl5/site_perl/canu/Execution.pm
|
||||
lib/perl5/site_perl/canu/Grid.pm
|
||||
lib/perl5/site_perl/canu/Grid_Cloud.pm
|
||||
lib/perl5/site_perl/canu/Grid_DNANexus.pm
|
||||
lib/perl5/site_perl/canu/Grid_LSF.pm
|
||||
lib/perl5/site_perl/canu/Grid_PBSTorque.pm
|
||||
lib/perl5/site_perl/canu/Grid_SGE.pm
|
||||
lib/perl5/site_perl/canu/Grid_Slurm.pm
|
||||
lib/perl5/site_perl/canu/HaplotypeReads.pm
|
||||
lib/perl5/site_perl/canu/Meryl.pm
|
||||
lib/perl5/site_perl/canu/Output.pm
|
||||
lib/perl5/site_perl/canu/OverlapBasedTrimming.pm
|
||||
lib/perl5/site_perl/canu/OverlapErrorAdjustment.pm
|
||||
lib/perl5/site_perl/canu/OverlapInCore.pm
|
||||
lib/perl5/site_perl/canu/OverlapMMap.pm
|
||||
lib/perl5/site_perl/canu/OverlapMhap.pm
|
||||
lib/perl5/site_perl/canu/OverlapStore.pm
|
||||
lib/perl5/site_perl/canu/Report.pm
|
||||
lib/perl5/site_perl/canu/SequenceStore.pm
|
||||
lib/perl5/site_perl/canu/Unitig.pm
|
||||
share/java/classes/mhap-2.1.3.jar
|
10
biology/canu/distinfo
Normal file
10
biology/canu/distinfo
Normal file
|
@ -0,0 +1,10 @@
|
|||
$NetBSD: distinfo,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
SHA1 (canu-1.8.tar.gz) = 7dd79415aa5ecb95f05109f0d8d58f7cbfc336e9
|
||||
RMD160 (canu-1.8.tar.gz) = 78d4872b4034f526037ce225c699debd910bd586
|
||||
SHA512 (canu-1.8.tar.gz) = 650bc96675f371596f8e7748d4ab2d229f0262bf84cee8fed59af43d534d76095a72e4ba0b4a5ce9f561992268c317964cda2f6c89ee514f4920e2ba47fbc86c
|
||||
Size (canu-1.8.tar.gz) = 2465314 bytes
|
||||
SHA1 (patch-Makefile) = aa83003677cbb12558e438c776402ec48df0598d
|
||||
SHA1 (patch-pipelines_canu_Defaults.pm) = 55a4631d86abb1881b0cc997514d44c536209ea6
|
||||
SHA1 (patch-pipelines_canu_Execution.pm) = fbb080c06ea5d2393d1835a61771715d2aef0274
|
||||
SHA1 (patch-utility_objectStore.C) = c8407de79abbaf296f027f704080cc7e878b85f4
|
48
biology/canu/patches/patch-Makefile
Normal file
48
biology/canu/patches/patch-Makefile
Normal file
|
@ -0,0 +1,48 @@
|
|||
$NetBSD: patch-Makefile,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
# Template Makefile requires platform-specific defaults for each OS
|
||||
# To be sent upstream following commit
|
||||
|
||||
--- Makefile.orig 2018-10-22 16:47:31.000000000 +0000
|
||||
+++ Makefile
|
||||
@@ -545,6 +545,40 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
+ifeq (${OSTYPE}, NetBSD)
|
||||
+ CC ?= gcc
|
||||
+ CXX ?= g++
|
||||
+
|
||||
+ # GCC
|
||||
+ CXXFLAGS += -pthread -fopenmp -fPIC
|
||||
+ LDFLAGS += -pthread -fopenmp -lm -lexecinfo
|
||||
+
|
||||
+ #CXXFLAGS += -Wall -Wextra -Wformat -Wno-unused -Wno-parentheses
|
||||
+ CXXFLAGS += -Wall -Wextra -Wformat -Wno-unused-function -Wno-unused-parameter -Wno-unused-variable -Wno-char-subscripts -Wno-write-strings -Wno-sign-compare -Wno-format-truncation
|
||||
+
|
||||
+ # Google Performance Tools malloc and heapchecker (HEAPCHECK=normal)
|
||||
+ #CXXFLAGS +=
|
||||
+ #LDFLAGS += -ltcmalloc
|
||||
+
|
||||
+ # Google Performance Tools cpu profiler (CPUPROFILE=/path)
|
||||
+ #CXXFLAGS +=
|
||||
+ #LDFLAGS += -lprofiler
|
||||
+
|
||||
+ # callgrind
|
||||
+ #CXXFLAGS += -g3 -Wa,--gstabs -save-temps
|
||||
+
|
||||
+ ifeq ($(BUILDOPTIMIZED), 1)
|
||||
+ else
|
||||
+ CXXFLAGS += -g3
|
||||
+ endif
|
||||
+
|
||||
+ ifeq ($(BUILDDEBUG), 1)
|
||||
+ else
|
||||
+ CXXFLAGS += -O3 -finline-functions -fomit-frame-pointer
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
+
|
||||
ifneq (,$(findstring CYGWIN, ${OSTYPE}))
|
||||
CC ?= gcc
|
||||
CXX ?= g++
|
24
biology/canu/patches/patch-pipelines_canu_Defaults.pm
Normal file
24
biology/canu/patches/patch-pipelines_canu_Defaults.pm
Normal file
|
@ -0,0 +1,24 @@
|
|||
$NetBSD: patch-pipelines_canu_Defaults.pm,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
# Add resource limits for SLURM
|
||||
# Upstream is considering a scheduler-independent approach to this feature
|
||||
|
||||
--- pipelines/canu/Defaults.pm.orig 2018-06-22 08:20:52.000000000 +0000
|
||||
+++ pipelines/canu/Defaults.pm
|
||||
@@ -812,6 +812,16 @@ sub setDefaults () {
|
||||
setDefault("gridEngineArraySubmitID", undef, "Grid engine configuration, not documented");
|
||||
setDefault("gridEngineJobID", undef, "Grid engine configuration, not documented");
|
||||
|
||||
+ ##### Slurm-specific parameters for controlling the number of
|
||||
+ ##### cores / tasks dispatched per step or globally (WIP)
|
||||
+
|
||||
+ setDefault( 'slurmCormhapCoreLimit', undef, 'Maximum number of cores allocated for MHAP pre-computing and alignment within the correction phase' );
|
||||
+ setDefault( 'slurmOvbCoreLimit', undef, 'Maximum number of single-core tasks dispatched for the ovlStore bucketizing step within the trimming phase' );
|
||||
+ setDefault( 'slurmOvsCoreLimit', undef, 'Maximum number of single-core tasks dispatched for the ovlStore sorting step within the trimming phase' );
|
||||
+ setDefault( 'slurmRedCoreLimit', undef, 'Maximum number of cores allocated for read error detection within the unitigging phase' );
|
||||
+ setDefault( 'slurmArrayTaskLimit', undef, 'Maximum number of tasks permitted for each step throughout assembly' );
|
||||
+ setDefault( 'slurmArrayCoreLimit', undef, 'Maximum number of cores allocated for each step throughout assembly' );
|
||||
+
|
||||
##### Grid Engine Pipeline
|
||||
|
||||
setDefault("useGrid", 1, "If 'true', enable grid-based execution; if 'false', run all jobs on the local machine; if 'remote', create jobs for grid execution but do not submit; default 'true'");
|
83
biology/canu/patches/patch-pipelines_canu_Execution.pm
Normal file
83
biology/canu/patches/patch-pipelines_canu_Execution.pm
Normal file
|
@ -0,0 +1,83 @@
|
|||
$NetBSD: patch-pipelines_canu_Execution.pm,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
# Add resource limits for SLURM
|
||||
# Upstream is considering a scheduler-independent approach to this feature
|
||||
|
||||
--- pipelines/canu/Execution.pm.orig 2018-06-22 08:20:52.000000000 +0000
|
||||
+++ pipelines/canu/Execution.pm
|
||||
@@ -303,10 +303,6 @@ sub skipStage ($$@) {
|
||||
sub getInstallDirectory () {
|
||||
my $installDir = $FindBin::RealBin;
|
||||
|
||||
- if ($installDir =~ m!^(.*)/\w+-\w+/bin$!) {
|
||||
- $installDir = $1;
|
||||
- }
|
||||
-
|
||||
return($installDir);
|
||||
}
|
||||
|
||||
@@ -694,8 +690,8 @@ sub submitScript ($$) {
|
||||
|
||||
|
||||
|
||||
-sub buildGridArray ($$$$) {
|
||||
- my ($name, $bgn, $end, $opt) = @_;
|
||||
+sub buildGridArray (@) {
|
||||
+ my ( $name, $bgn, $end, $opt, $thr ) = @_;
|
||||
my $off = 0;
|
||||
|
||||
# In some grids (SGE) this is the maximum size of an array job.
|
||||
@@ -725,8 +721,42 @@ sub buildGridArray ($$$$) {
|
||||
$off = "-F \"$off\"";
|
||||
}
|
||||
|
||||
- $opt =~ s/ARRAY_NAME/$name/g; # Replace ARRAY_NAME with 'job name'
|
||||
- $opt =~ s/ARRAY_JOBS/$bgn-$end/g; # Replace ARRAY_JOBS with 'bgn-end'
|
||||
+ if( $opt =~ m/(ARRAY_NAME)/ )
|
||||
+ {
|
||||
+ $opt =~ s/$1/$name/; # Replace ARRAY_NAME with 'job name'
|
||||
+ }
|
||||
+ elsif( $opt =~ m/(ARRAY_JOBS)/ )
|
||||
+ {
|
||||
+ $opt =~ s/$1/$bgn-$end/; # Replace ARRAY_JOBS with 'bgn-end'
|
||||
+
|
||||
+ if( lc( getGlobal( 'gridEngine' ) ) eq 'slurm' && $end > 1 )
|
||||
+ {
|
||||
+ if( $name =~ m/^cormhap_/i && defined getGlobal( 'slurmCormhapCoreLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . int( getGlobal( 'slurmCormhapCoreLimit' ) / $thr );
|
||||
+ }
|
||||
+ elsif( $name =~ m/^ovb_/i && defined getGlobal( 'slurmOvbCoreLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . getGlobal( 'slurmOvbCoreLimit' );
|
||||
+ }
|
||||
+ elsif( $name =~ m/^ovs_/i && defined getGlobal( 'slurmOvsCoreLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . getGlobal( 'slurmOvsCoreLimit' );
|
||||
+ }
|
||||
+ elsif( $name =~ m/^red_/i && defined getGlobal( 'slurmRedCoreLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . int( getGlobal( 'slurmRedCoreLimit' ) / $thr );
|
||||
+ }
|
||||
+ elsif( defined getGlobal( 'slurmArrayTaskLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . getGlobal( 'slurmArrayTaskLimit' );
|
||||
+ }
|
||||
+ elsif( defined getGlobal( 'slurmArrayCoreLimit' ) )
|
||||
+ {
|
||||
+ $opt .= '%' . int( getGlobal( 'slurmArrayCoreLimit' ) / $thr );
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
|
||||
return($opt, $off);
|
||||
}
|
||||
@@ -870,7 +900,7 @@ sub buildGridJob ($$$$$$$$$) {
|
||||
my $jobNameT = makeUniqueJobName($jobType, $asm);
|
||||
|
||||
my ($jobName, $jobOff) = buildGridArray($jobNameT, $bgnJob, $endJob, getGlobal("gridEngineArrayName"));
|
||||
- my ($arrayOpt, $arrayOff) = buildGridArray($jobNameT, $bgnJob, $endJob, getGlobal("gridEngineArrayOption"));
|
||||
+ my ( $arrayOpt, $arrayOff ) = buildGridArray( $jobNameT, $bgnJob, $endJob, getGlobal( "gridEngineArrayOption" ), $thr );
|
||||
|
||||
my $outputOption = buildOutputOption($path, $script);
|
||||
|
19
biology/canu/patches/patch-utility_objectStore.C
Normal file
19
biology/canu/patches/patch-utility_objectStore.C
Normal file
|
@ -0,0 +1,19 @@
|
|||
$NetBSD: patch-utility_objectStore.C,v 1.1 2019/01/07 02:33:17 bacon Exp $
|
||||
|
||||
# NetBSD does not provide WEXITED
|
||||
# Empty bitmask should suffice in this case
|
||||
|
||||
--- utility/objectStore.C.orig 2018-12-23 01:57:06.000000000 +0000
|
||||
+++ utility/objectStore.C
|
||||
@@ -286,6 +286,11 @@ fetchFromObjectStore(char *requested) {
|
||||
// Otherwise, we're still the parent, so wait for the (-1 == any) child
|
||||
// process to terminate.
|
||||
|
||||
+// NetBSD does not provide WEXITED so send empty bitmask
|
||||
+#ifdef __NetBSD__
|
||||
+#define WEXITED 0
|
||||
+#endif
|
||||
+
|
||||
waitpid(-1, &err, WEXITED);
|
||||
|
||||
if ((WIFEXITED(err)) &&
|
Loading…
Reference in a new issue