biology/ddocent: Bash pipeline for RAD sequencing

Approved by:    jrm (mentor)
Differential Revision:  https://reviews.freebsd.org/D15139
This commit is contained in:
Jason W. Bacon 2018-04-20 01:34:05 +00:00
parent 236b50251f
commit d06281fc0f
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=467808
8 changed files with 617 additions and 0 deletions

View file

@ -22,6 +22,7 @@
SUBDIR += clustalw
SUBDIR += consed
SUBDIR += crux
SUBDIR += ddocent
SUBDIR += diamond
SUBDIR += emboss
SUBDIR += fasta

67
biology/ddocent/Makefile Normal file
View file

@ -0,0 +1,67 @@
# $FreeBSD$
PORTNAME= dDocent
DISTVERSIONPREFIX= v
DISTVERSION= 2.2.25
CATEGORIES= biology java
MAINTAINER= jwb@FreeBSD.org
COMMENT= Bash pipeline for RAD sequencing
LICENSE= MIT
# ddocent test data do not unpack with FreeBSD 11.1 /usr/bin/unzip
RUN_DEPENDS= unzip>=0:archivers/unzip \
mawk>=0:lang/mawk \
gawk>=0:lang/gawk \
coreutils>=0:sysutils/coreutils \
gnuplot>=0:math/gnuplot \
parallel>=0:sysutils/parallel \
bash:shells/bash \
bwa>=0.7.13:biology/bwa \
cd-hit>=0:biology/cd-hit \
samtools>=1.3:biology/samtools \
vcftools>=0.1.15:biology/vcftools \
trimmomatic>=0:biology/trimmomatic \
bamtools>=0:biology/bamtools \
stacks>=0:biology/stacks \
rainbow>=0:biology/rainbow \
trimadap>=0:biology/trimadap \
seqtk>=0:biology/seqtk \
bedtools>=2.26.0:biology/bedtools \
pear-merger>=0:biology/pear-merger \
vcflib>=0:biology/vcflib \
freebayes:biology/freebayes
USES= perl5 python shebangfix
SHEBANG_FILES= dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
USE_JAVA= yes
USE_GITHUB= yes
GH_ACCOUNT= jpuritz
NO_BUILD= yes
NO_ARCH= yes
# These are on top of patch-dDocent, so don't apply them within the source
# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
post-install:
${REINPLACE_CMD} -i '' \
-e 's|%%PREFIX%%|${PREFIX}|g' \
-e 's|%%JAVAJARDIR%%|${JAVAJARDIR}|g' \
-e 's|%%BASH%%|${LOCALBASE}/bin/bash|g' \
-e 's|python|${PYTHON_CMD}|g' \
${STAGEDIR}${PREFIX}/bin/dDocent
do-install:
${MKDIR} ${STAGEDIR}${PREFIX}/bin
${INSTALL_SCRIPT} \
${WRKSRC}/dDocent \
${WRKSRC}/*.sh \
${FILESDIR}/ddocent-assembly-test \
${FILESDIR}/ddocent-assembly-test-cleanup \
${WRKSRC}/scripts/*.sh \
${WRKSRC}/scripts/*.pl \
${WRKSRC}/scripts/dDocent_filters \
${STAGEDIR}${PREFIX}/bin
.include <bsd.port.mk>

3
biology/ddocent/distinfo Normal file
View file

@ -0,0 +1,3 @@
TIMESTAMP = 1520345850
SHA256 (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 903c3010b29b2ca95f7fe6099925948e4d3f21655668caff653df97dfa7ecf44
SIZE (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 336804

View file

@ -0,0 +1,380 @@
#!/usr/bin/env bash
set -e
##########################################################################
# Function description:
# Pause until user presses return
##########################################################################
pause()
{
local junk
printf "Press return to continue..."
read junk
}
##########################################################################
# Not necessary if invoking scripts with "bash script-name" as shown
# in the dDocent tutorial, but supplied for completeness. Tutorial
# scripts have also been patched upstream to usr /usr/bin/env, but
# we won't assume they'll all stay that way.
##########################################################################
fix_bash_path()
{
# Don't echo these commands
{ set +x; } 2>/dev/null
if [ $# != 1 ]; then
printf "Usage: $0 script\n"
exit 1
fi
sed -i.bak -e "s|#!/bin/bash|#!/usr/bin/env bash|g" $1
set -x
}
##########################################################################
# Main
##########################################################################
##########################################################################
# Workarounds for running dDocent from FreeBSD ports and Pkgsrc.
# Include this section in any scripts running dDocent commands.
##########################################################################
# Hack to allow remake_reference.sh, etc. to find trimmomatic.
# trimmomatic.jar and adapter files are not an executables and should not
# be in PATH according to filesystem hierarchy standards, but the dDocent
# scripts are coded to look for them there, because that's how dDocent
# installs the bundled Trimmomatic.
if [ -e /usr/local/share/java/classes/trimmomatic.jar ]; then
export PATH=${PATH}:/usr/local/share/java/classes:/usr/local/share/trimmomatic/adapters
elif [ -e $PKGSRC/lib/java/trimmomatic.jar ]; then
export PATH=${PATH}:$PKGSRC/lib/java:$PKGSRC/share/trimmomatic/adapters
else
printf "Error: Trimmomatic is not installed in a known location.\n" >> /dev/stderr
fi
if ! pwd | fgrep dDocent-test; then
mkdir -p dDocent-test
cd dDocent-test
fi
# remake_reference.sh and some other scripts use GNU extensions in awk
# commands. Trick systems into using gawk to get around this without
# having to patch every script.
mkdir -p ddocent-bin
ln -sf `which gawk` ddocent-bin/awk
ln -sf `which python2.7` ddocent-bin/python
export PATH=`pwd`/ddocent-bin:$PATH
##########################################################################
# Actual dDocent commands below
##########################################################################
##########################################################################
# Command sequence from the dDocent tutorial on Github. See link below.
##########################################################################
cat << EOM
This script runs the test commands described at
http://ddocent.com/assembly
You may want to follow along on this web page as the script runs. It will
pause after each step to allow checking the output.
EOM
pause
mkdir -p Data
cd Data
printf "Downloading and unpacking data.zip...\n"
if [ -e data.zip ]; then
mv data.zip /tmp/dDocent-data.zip
rm -rf *
mv /tmp/dDocent-data.zip data.zip
else
rm -rf *
curl --insecure -L -o data.zip \
'https://www.dropbox.com/s/t09xjuudev4de72/data.zip?dl=0'
fi
# Hack: current data.zip contains data.zip. ??!
if [ ! -e simRRLs2.py ]; then
yes | unzip data.zip || true
fi
ls -l
pause
set -x
head SimRAD.barcodes
{ set +x; } 2>/dev/null
pause
set -x
cut -f2 SimRAD.barcodes > barcodes
head barcodes
{ set +x; } 2>/dev/null
pause
set -x
process_radtags -1 SimRAD_R1.fastq.gz -2 SimRAD_R2.fastq.gz -b barcodes \
-e ecoRI --renz_2 mspI -r -i gzfastq
ls | fgrep sample_ | head
{ set +x; } 2>/dev/null
pause
set -x
rm *rem*
{ set +x; } 2>/dev/null
pause
rm -f Rename_for_dDocent.sh # Always get the latest
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/Rename_for_dDocent.sh
more Rename_for_dDocent.sh
{ set +x; } 2>/dev/null
pause
set -x
bash Rename_for_dDocent.sh SimRAD.barcodes
{ set +x; } 2>/dev/null
set -x
ls *.fq.gz
{ set +x; } 2>/dev/null
pause
set -x
ls *.F.fq.gz > namelist
sed -i'' -e 's/.F.fq.gz//g' namelist
AWK1='BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}'
AWK2='!/>/'
AWK3='!/NNN/'
PERLT='while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}'
cat namelist | parallel --no-notice -j 8 "zcat {}.F.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.forward"
cat namelist | parallel --no-notice -j 8 "zcat {}.R.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.reverse"
cat namelist | parallel --no-notice -j 8 "paste -d '-' {}.forward {}.reverse | mawk '$AWK3' | sed 's/-/NNNNNNNNNN/' | perl -e '$PERLT' > {}.uniq.seqs"
{ set +x; } 2>/dev/null
pause
set -x
cat *.uniq.seqs > uniq.seqs
for i in {2..20};
do
echo $i >> pfile
done
cat pfile | parallel --no-notice \
"echo -n {}xxx && mawk -v x={} '\$1 >= x' uniq.seqs | wc -l" \
| mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.data
rm pfile
{ set +x; } 2>/dev/null
pause
set -x
more uniqseq.data
{ set +x; } 2>/dev/null
pause
set -x
gnuplot << \EOF
set terminal dumb size 80, 30
set autoscale
set xrange [2:20]
unset label
set title "Number of Unique Sequences with More than X Coverage (Counted within individuals)"
set xlabel "Coverage"
set ylabel "Number of Unique Sequences"
plot 'uniqseq.data' with lines notitle
pause -1
EOF
{ set +x; } 2>/dev/null
pause
set -x
parallel --no-notice -j 8 mawk -v x=4 \''$1 >= x'\' ::: *.uniq.seqs | cut -f2 | perl -e 'while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}' > uniqCperindv
wc -l uniqCperindv
{ set +x; } 2>/dev/null
pause
set -x
for ((i = 2; i <= 10; i++));
do
echo $i >> ufile
done
cat ufile | parallel --no-notice "echo -n {}xxx && mawk -v x={} '\$1 >= x' uniqCperindv | wc -l" | mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.peri.data
rm ufile
{ set +x; } 2>/dev/null
pause
set -x
gnuplot << \EOF
set terminal dumb size 80, 30
set autoscale
unset label
set title "Number of Unique Sequences present in more than X Individuals"
set xlabel "Number of Individuals"
set ylabel "Number of Unique Sequences"
plot 'uniqseq.peri.data' with lines notitle
pause -1
EOF
{ set +x; } 2>/dev/null
pause
set -x
mawk -v x=4 '$1 >= x' uniqCperindv > uniq.k.4.c.4.seqs
wc -l uniq.k.4.c.4.seqs
{ set +x; } 2>/dev/null
pause
set -x
cut -f2 uniq.k.4.c.4.seqs > totaluniqseq
mawk '{c= c + 1; print ">Contig_" c "\n" $1}' totaluniqseq > uniq.fasta
{ set +x; } 2>/dev/null
pause
set -x
sed -e 's/NNNNNNNNNN/\t/g' uniq.fasta | cut -f1 > uniq.F.fasta
{ set +x; } 2>/dev/null
pause
set -x
cd-hit-est -i uniq.F.fasta -o xxx -c 0.8 -T 0 -M 0 -g 1
{ set +x; } 2>/dev/null
pause
set -x
mawk '{if ($1 ~ /Cl/) clus = clus + 1; else print $3 "\t" clus}' xxx.clstr | sed 's/[>Contig_,...]//g' | sort -g -k1 > sort.contig.cluster.ids
paste sort.contig.cluster.ids totaluniqseq > contig.cluster.totaluniqseq
sort -k2,2 -g contig.cluster.totaluniqseq | sed -e 's/NNNNNNNNNN/\t/g' > rcluster
{ set +x; } 2>/dev/null
pause
set -x
head rcluster
{ set +x; } 2>/dev/null
pause
set -x
cut -f2 rcluster | uniq | wc -l
{ set +x; } 2>/dev/null
pause
set -x
rainbow div -i rcluster -o rbdiv.out
{ set +x; } 2>/dev/null
pause
set -x
rainbow div -i rcluster -o rbdiv.out -f 0.5 -K 10
{ set +x; } 2>/dev/null
pause
set -x
rainbow merge -o rbasm.out -a -i rbdiv.out
{ set +x; } 2>/dev/null
pause
set -x
rainbow merge -o rbasm.out -a -i rbdiv.out -r 2
{ set +x; } 2>/dev/null
pause
# If missing fdesc mount for bash, <(echo "E") will not work
# cat rbasm.out <(echo "E") |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
set -x
echo "E" > endfile
cat rbasm.out endfile |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
if (NR == 1) e=$2;
else if ($1 ~/E/ && lenp > len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq2 "NNNNNNNNNN" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
else if ($1 ~/E/ && lenp <= len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
else if ($1 ~/C/) clus=$2;
else if ($1 ~/L/) len=$2;
else if ($1 ~/S/) seq=$2;
else if ($1 ~/N/) freq=$2;
else if ($1 ~/R/ && $0 ~/0/ && $0 !~/1/ && len > lenf) {seq1 = seq; fclus=clus;lenf=len}
else if ($1 ~/R/ && $0 ~/0/ && $0 ~/1/) {seq1 = seq; fclus=clus; len1=len}
else if ($1 ~/R/ && $0 ~!/0/ && freq > freqp && len >= lenp || $1 ~/R/ && $0 ~!/0/ && freq == freqp && len > lenp) {seq2 = seq; lenp = len; freqp=freq}
}' > rainbow.fasta
{ set +x; } 2>/dev/null
pause
set -x
cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 -T 0 -c 0.9
{ set +x; } 2>/dev/null
pause
rm -f remake_reference.sh
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/remake_reference.sh
more remake_reference.sh
#fix_bash_path remake_reference.sh
bash remake_reference.sh 4 4 0.90 PE 2
{ set +x; } 2>/dev/null
pause
rm -f ReferenceOpt.sh
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/ReferenceOpt.sh
more ReferenceOpt.sh
bash ReferenceOpt.sh 4 8 4 8 PE 16
{ set +x; } 2>/dev/null
pause
set -x
bash remake_reference.sh 4 4 0.90 PE 2
head reference.fasta
{ set +x; } 2>/dev/null
pause
set -x
wc -l reference.fasta
{ set +x; } 2>/dev/null
pause
set -x
mawk '/>/' reference.fasta | wc -l
{ set +x; } 2>/dev/null
pause
set -x
mawk '/^NAATT.*N*.*CCGN$/' reference.fasta | wc -l
grep '^NAATT.*N*.*CCGN$' reference.fasta | wc -l
{ set +x; } 2>/dev/null
pause
printf "Bonus Section: Optimize reference assemblies? (takes a long time) y/[n] "
read bonus
if [ 0$bonus = 0y ]; then
set -x
curl -L -O https://raw.githubusercontent.com/jpuritz/dDocent/master/scripts/RefMapOpt.sh
{ set +x; } 2>/dev/null
printf "Running dDocent to trim reads.\n"
pause
set -x
cat << EOM | dDocent || true
yes
8
10g
yes
no
no
no
bacon@uwm.edu
EOM
bash RefMapOpt.sh 4 8 4 8 0.9 64 PE
{ set +x; } 2>/dev/null
pause
more mapping.results
pause
fi

View file

@ -0,0 +1,8 @@
#!/bin/sh -e
if pwd | fgrep dDocent-test; then
rm -rf .*.bak Data ddocent-bin
else
printf "$0 can only be run in a dDocent-test directory.\n"
exit 1
fi

View file

@ -0,0 +1,134 @@
--- dDocent.orig 2018-04-20 00:10:34 UTC
+++ dDocent
@@ -1,6 +1,9 @@
#!/usr/local/bin/bash
export LC_ALL=en_US.UTF-8
+# GNU Parallel uses $SHELL and has issues with [t]csh
+export SHELL=%%BASH%%
+
##########dDocent##########
VERSION='2.2.25'
#This script serves as an interactive bash wrapper to QC, assemble, map, and call SNPs from double digest RAD (SE or PE), ezRAD (SE or PE) data, or SE RAD data.
@@ -27,15 +30,15 @@ do
fi
done
-if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'trim' ; then
- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
+if [ -e %%JAVAJARDIR%%/trimmomatic.jar ]; then
+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
else
echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
fi
-if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
+if [ -e %%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa ]; then
+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
else
echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
@@ -80,6 +83,7 @@ FREEB=(`freebayes | grep -oh 'v[0-9].*'
exit 1
fi
VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$' | sed 's/[a-z)]//')
+ echo $VCFTV
if [ "$VCFTV" -lt "10" ]; then
echo "The version of VCFtools installed in your" '$PATH' "is not optimized for dDocent."
echo "Please install at least version 0.1.11"
@@ -89,7 +93,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '
elif [ "$VCFTV" -ge "12" ]; then
VCFGTFLAG="--max-missing"
fi
-BWAV=$(bwa 2>&1 | mawk '/Versi/' | sed 's/Version: //g' | sed 's/0.7.//g' | sed 's/-.*//g' | cut -c 1-2)
+BWAV=$(bwa 2>&1 | mawk '/Versi/' | sed 's/Version: //g' | sed 's/0.7.//g' | sed 's/a*-.*//g')
if [ "$BWAV" -lt "13" ]; then
echo "The version of bwa installed in your" '$PATH' "is not optimized for dDocent."
echo "Please install at least version 0.7.13"
@@ -107,13 +111,12 @@ BTC=$( bedtools --version | mawk '{print
exit 1
fi
-if ! awk --version | fgrep -v GNU &>/dev/null; then
+if ! awk --version | fgrep GNU &>/dev/null; then
awk=gawk
else
awk=awk
fi
-
if [ $NUMDEP -gt 0 ]; then
echo -e "\nPlease install all required software before running dDocent again."
exit 1
@@ -291,9 +294,9 @@ echo "Using BWA to map reads."
for i in "${NAMES[@]}"
do
if [ -f "$i.R2.fq.gz" ]; then
- bwa mem reference.fasta $i.R1.fq.gz $i.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
+ bwa mem -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz $i.R2.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
else
- bwa mem reference.fasta $i.R1.fq.gz -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
+ bwa mem -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
fi
samtools sort -@$NUMProc $i.bam -o $i.bam
mv $i.bam $i-RG.bam
@@ -388,10 +391,10 @@ if [ "$SNP" != "no" ]; then
}
export -f call_genos
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
####
- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
rm mapped.*.bed
@@ -447,8 +450,8 @@ fi
#Function for trimming reads using trimmomatic
trim_reads(){
- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
if [ -f $1.R.fq.gz ]; then
java -Xmx2g -jar $TRIMMOMATIC PE -threads 2 -phred33 $1.F.fq.gz $1.R.fq.gz $1.R1.fq.gz $1.unpairedF.fq.gz $1.R2.fq.gz $1.unpairedR.fq.gz ILLUMINACLIP:$ADAPTERS:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:5:10 $TW &> $1.trim.log
@@ -747,7 +750,14 @@ else
fi
#Tries to get number of processors, if not asks user
-NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
+if [ `uname` = Linux ]; then
+ NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
+elif [ `uname` = FreeBSD ]; then
+ NUMProc=( `sysctl -n hw.ncpu` )
+else
+ printf "Unsupported platform: `uname`\n"
+ exit 1
+fi
NUMProc=$(($NUMProc + 0))
echo "dDocent detects $NUMProc processors available on this system."
@@ -764,7 +774,15 @@ if [ $NUMProc -lt 1 ]; then
fi
#Tries to get maximum system memory, if not asks user
-MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
+if [ `uname` = Linux ]; then
+ MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
+elif [ `uname` = FreeBSD ]; then
+ MAXMemory=`sysctl -n hw.realmem`
+ MAXMemory=$((MAXMemory / 1073741824))G
+else
+ printf "Unsupported platform: `uname`\n"
+ exit 1
+fi
echo "dDocent detects $MAXMemory maximum memory available on this system."
echo "Please enter the maximum memory to use for this analysis. The size can be postfixed with

View file

@ -0,0 +1,7 @@
dDocent is simple bash wrapper to QC, assemble, map, and call SNPs from almost
any kind of RAD sequencing. If you have a reference already, dDocent can be
used to call SNPs from almost any type of NGS data set. It is designed to run
on Linux based machines with large memory capacity and multiple processing
cores, and it can be modified for use on HPC.
WWW: http://ddocent.com

17
biology/ddocent/pkg-plist Normal file
View file

@ -0,0 +1,17 @@
bin/ErrorCount.sh
bin/RefMapOpt.sh
bin/ReferenceOpt.hyb.sh
bin/ReferenceOpt.sh
bin/Rename_SequenceFiles.sh
bin/Rename_for_dDocent.sh
bin/Rename_for_dDocent_with-INDEX.sh
bin/dDocent
bin/dDocent_filters
bin/dDocent_ngs.sh
bin/ddocent-assembly-test
bin/ddocent-assembly-test-cleanup
bin/filter_hwe_by_pop.pl
bin/filter_missing_ind.sh
bin/pop_missing_filter.sh
bin/remake_reference.sh
bin/remove.bad.hap.loci.sh