biology/haplohseq: Resurrect and fix python2 dependency

Move upstream to Github fork
This commit is contained in:
Jason W. Bacon 2021-01-14 15:51:19 +00:00
parent 8ae978198b
commit eb0d9154a7
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=561567
13 changed files with 308 additions and 0 deletions

View file

@ -0,0 +1,36 @@
# $FreeBSD$
PORTNAME= haplohseq
DISTVERSION= 0.1.2
PORTREVISION= 2
CATEGORIES= biology
MAINTAINER= jwb@FreeBSD.org
COMMENT= Identify regions of allelic imbalance
LICENSE= MIT
LIB_DEPENDS= libboost_system.so:devel/boost-libs
RUN_DEPENDS= R-cran-optparse>0:devel/R-cran-optparse
USES= compiler:c++11-lang gmake localbase:ldflags python
USE_GITHUB= yes
GH_ACCOUNT= outpaddling
OPTIONS_DEFINE= EXAMPLES
MAKEFILE= makefile
LDFLAGS+= -lpthread
MAKE_ENV= STRIP=${STRIP}
SUB_FILES= haplohseq-example
post-install-EXAMPLES-on:
${INSTALL_SCRIPT} ${WRKDIR}/haplohseq-example ${STAGEDIR}${PREFIX}/bin
(cd ${WRKSRC} && ${COPYTREE_SHARE} "example ldmap scripts" ${STAGEDIR}${EXAMPLESDIR})
do-test:
@(cd ${WRKSRC}/build/test && ${FILESDIR}/run-tests)
.include <bsd.port.mk>

View file

@ -0,0 +1,3 @@
TIMESTAMP = 1589733633
SHA256 (outpaddling-haplohseq-0.1.2_GH0.tar.gz) = 64f61023a6795decb680c23d142b1a349988d4c0e3bef8c7d52ca33bf42f29f0
SIZE (outpaddling-haplohseq-0.1.2_GH0.tar.gz) = 12084603

View file

@ -0,0 +1,37 @@
#!/bin/sh -e
##########################################################################
# Script description:
# Run haplohseq example
#
# https://sites.google.com/site/integrativecancergenomics/software/haplohseq
#
# History:
# Date Name Modification
# 2019-11-13 Jason Bacon Begin
##########################################################################
usage()
{
printf "Usage: $0 directory\n"
exit 1
}
##########################################################################
# Main
##########################################################################
if [ $# != 1 ]; then
usage
fi
dir="$1"
if [ -e "$dir" ]; then
printf "'$dir' already exists. Please remove it or specify another.\n"
exit 1
fi
cp -R %%EXAMPLESDIR%% "$dir"
cd "$dir/example"
sh ./example_run.sh

View file

@ -0,0 +1,24 @@
--- example/example_run.sh.orig 2019-11-13 15:45:57 UTC
+++ example/example_run.sh
@@ -1,4 +1,4 @@
-#! /bin/bash
+#!/bin/sh
# Example:
# Identify allelic imbalance (AI) given a tumor
@@ -6,13 +6,13 @@
# of the GATK. This involves the following 3 steps.
printf "STEP 1: PHASING 1KG HET SITES ...\n"
-python ../scripts/simple_phaser.py \
+python2.7 ../scripts/simple_phaser.py \
--ldmap ../ldmap/hg19.exome.ldmap \
--vcf example_input/tumor_exome.vcf \
-o example_output/tumor_exome
printf "\nSTEP 2: IDENTIFYING REGIONS OF AI ...\n"
-../haplohseq \
+haplohseq \
--vcf example_output/tumor_exome.hap.vcf \
--phased example_output/tumor_exome.hap \
--event_prevalence 0.1 \

View file

@ -0,0 +1,105 @@
--- makefile.orig 2019-08-29 18:03:23 UTC
+++ makefile
@@ -7,23 +7,32 @@
#
# Constants
-CXX=g++
+# Default to g++ if not set by make args or environment
+CXX?=g++
#-O0 -g will turn on debugging
#The rule of thumb:
#When you need to debug, use -O0 (and -g to generate debugging symbols.)
#When you are preparing to ship it, use -O2.
#When you use gentoo, use -O3...!
#When you need to put it on an embedded system, use -Os (optimize for size, not for efficiency.)
-CXX_FLAGS=-Wall -g -stdlib=libstdc++
+# Use canonincal compiler variables, which may be provided by build env
+CXXFLAGS?=-Wall -g -stdlib=libstdc++
SRC=src
CONF=conf
HMM_SRC=$(SRC)/hmm
UTIL_SRC=$(SRC)/util
-BOOST=/usr/local/boost_1_52_0
+BOOST?=/usr/local/boost_1_52_0
INCLUDES=-I./$(SRC) -I./$(HMM_SRC) -I./$(UTIL_SRC) -isystem$(BOOST)
-LIBRARY_PATHS=-Llib/macosx
+LIBRARY_PATHS?=-Llib/macosx
LIBRARIES=-lm -lboost_program_options -lboost_system -lboost_filesystem -lboost_thread
+# Installation target with destdir support
+DESTDIR?=.
+PREFIX?=/usr/local
+MKDIR?=mkdir
+INSTALL?=install
+STRIP?= # empty, set to -s to install stripped binary
+
# Generated directories which are generated in this script and cleaned up with 'make clean'
BUILD=build
OBJ=$(BUILD)/obj
@@ -35,18 +44,21 @@ directories:
# Create object files into the OBJ directory from cpp files in the SRC directory.
$(OBJ)/%.o: $(SRC)/%.cpp directories
- $(CXX) $(CXX_FLAGS) $(INCLUDES) -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $<
$(OBJ)/%.o: $(HMM_SRC)/%.cpp directories
- $(CXX) $(CXX_FLAGS) $(INCLUDES) -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $<
$(OBJ)/%.o: $(UTIL_SRC)/%.cpp directories
- $(CXX) $(CXX_FLAGS) $(INCLUDES) -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(INCLUDES) -c -o $@ $<
-all: clean haplohseq
+all: haplohseq test
haplohseq: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/Reporter.o $(OBJ)/FreqPhase.o $(OBJ)/VcfUtil.o $(OBJ)/HaplohSeq.o
- $(CXX) -o $(BIN)/$@ $(CXX_FLAGS) $(INCLUDES) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/Reporter.o $(OBJ)/FreqPhase.o $(OBJ)/VcfUtil.o $(OBJ)/HaplohSeq.o $(LIBRARY_PATHS) $(LIBRARIES)
- cp -r $(CONF) $(BIN)/.
-
+ $(CXX) -o $(BIN)/$@ $(CXXFLAGS) $(INCLUDES) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/Reporter.o $(OBJ)/FreqPhase.o $(OBJ)/VcfUtil.o $(OBJ)/HaplohSeq.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
+
+install:
+ $(MKDIR) -p $(DESTDIR)$(PREFIX)/bin
+ $(INSTALL) -c $(STRIP) $(BIN)/* $(DESTDIR)$(PREFIX)/bin
+
clean:
rm -rf $(BUILD)
@@ -74,28 +86,28 @@ test: FreqPhaseTest HaplohSeqTest HmmTest InputProcess
# Create test object files into the OBJ directory from cpp files in the SRC directory.
$(TEST_BIN)/%.o: $(TEST_SRC)/%.cpp directories test_directory
- $(CXX) $(CXX_FLAGS) $(INCLUDES) $(TEST_INCLUDES) -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(INCLUDES) $(TEST_INCLUDES) -c -o $@ $<
HaplohSeqTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/HaplohSeqTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/HaplohSeqTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/HaplohSeqTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
FreqPhaseTest: $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/FreqPhase.o $(TEST_BIN)/FreqPhaseTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/FreqPhase.o $(TEST_BIN)/FreqPhaseTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(OBJ)/FreqPhase.o $(TEST_BIN)/FreqPhaseTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
HmmTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/HmmTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/HmmTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/HmmTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
InputProcessorTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/InputProcessorTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/InputProcessorTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/InputProcessor.o $(TEST_BIN)/InputProcessorTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
MathUtilTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/MathUtilTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/MathUtilTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/MathUtilTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
ReporterTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/ReporterTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/ReporterTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(TEST_BIN)/ReporterTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
ThreadPoolTest: $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/ThreadPool.o $(TEST_BIN)/ThreadPoolTest.o
- $(CXX) -o $(TEST_BIN)/$@ $(CXX_FLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/ThreadPool.o $(TEST_BIN)/ThreadPoolTest.o $(LIBRARY_PATHS) $(LIBRARIES)
+ $(CXX) -o $(TEST_BIN)/$@ $(CXXFLAGS) $(OBJ)/Hmm.o $(OBJ)/DataStructures.o $(OBJ)/MathUtil.o $(OBJ)/StringUtil.o $(OBJ)/ThreadPool.o $(TEST_BIN)/ThreadPoolTest.o $(LIBRARY_PATHS) $(LIBRARIES) $(LDFLAGS)
############################## END TEST LOGIC ##############################

View file

@ -0,0 +1,8 @@
--- scripts/ldmap.py.orig 2019-11-13 15:51:02 UTC
+++ scripts/ldmap.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python2.7
+
import argparse
import sys
import random

View file

@ -0,0 +1,8 @@
--- scripts/simple_phaser.py.orig 2019-11-13 15:52:02 UTC
+++ scripts/simple_phaser.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python2.7
+
import argparse
import random
import sys

View file

@ -0,0 +1,29 @@
--- src/FreqPhase.cpp.orig 2019-11-13 14:19:06 UTC
+++ src/FreqPhase.cpp
@@ -5,6 +5,7 @@
* Email: sanlucas@gmail.com
*/
+#include <sysexits.h>
#include "FreqPhase.h"
namespace haplohseq {
@@ -180,10 +181,17 @@ double FreqPhase::meanValue(const std::v
double FreqPhase::medianValue(const std::vector<double>& values) {
double median;
size_t size = values.size();
+
+ // We can probably detect this condition earlier while loading the VCF
+ if ( size == 0 ) {
+ std::cerr << "FreqPhase::medianValue(): values vector is empty." << std::endl;
+ std::cerr << "Make sure your VCF has all of GT:AD:DP in the FORMAT column." << std::endl;
+ exit(EX_DATAERR);
+ }
std::vector<double> tempFreqs(values);
sort(tempFreqs.begin(), tempFreqs.end());
- if (size % 2 == 0) {
+ if (size % 2 == 0) {
median = (tempFreqs[size / 2 - 1] + tempFreqs[size / 2]) / 2;
}
else {

View file

@ -0,0 +1,20 @@
--- src/HaplohSeq.cpp.orig 2019-09-01 14:58:30 UTC
+++ src/HaplohSeq.cpp
@@ -202,7 +202,7 @@ void HaplohSeq::runBafHaplohseq( std::string& obsType,
// Spawn worker threads
for (std::size_t t = 0; t < numThreads; t++) {
- threadPool.create_thread(boost::bind((unsigned long int (boost::asio::io_service::*)())&boost::asio::io_service::run, &ioService));
+ threadPool.create_thread(boost::bind((boost::asio::io_context::count_type (boost::asio::io_service::*)())&boost::asio::io_service::run, &ioService));
}
std::map<std::string, boost::shared_ptr<Hmm> > chrHmms;
@@ -420,7 +420,7 @@ void HaplohSeq::runVcfHaplohseq( std::string& obsType,
// Spawn worker threads
for (std::size_t t = 0; t < numThreads; t++) {
- threadPool.create_thread(boost::bind((unsigned long int (boost::asio::io_service::*)())&boost::asio::io_service::run, &ioService));
+ threadPool.create_thread(boost::bind((boost::asio::io_context::count_type (boost::asio::io_service::*)())&boost::asio::io_service::run, &ioService));
}
std::map<std::string, boost::shared_ptr<Hmm> > chrHmms;

View file

@ -0,0 +1,11 @@
--- src/test/InputProcessorTest.cpp.orig 2019-09-01 12:33:22 UTC
+++ src/test/InputProcessorTest.cpp
@@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(isHetTest) {
BOOST_REQUIRE_EQUAL(inputProc.isHet('A','?'), false);
BOOST_REQUIRE_EQUAL(inputProc.isHet('A','.'), false);
BOOST_REQUIRE_EQUAL(inputProc.isHet('A',' '), false);
- BOOST_REQUIRE_EQUAL(inputProc.isHet('C', NULL), true);
+ BOOST_REQUIRE_EQUAL(inputProc.isHet('C','\0'), true);
}
BOOST_AUTO_TEST_CASE(readAllelesTest) {

View file

@ -0,0 +1,10 @@
#!/bin/sh -e
printf "\n=== FreqPhaseTest ===\n\n"
./FreqPhaseTest
printf "\n=== InputProcessorTest ===\n\n"
./InputProcessorTest
printf "\n=== HmmTest ===\n\n"
./HmmTest
printf "\n=== HaplohSeqTest ===\n\n"
./HaplohSeqTest

View file

@ -0,0 +1,6 @@
Haplohseq identifies regions of allelic imbalance (AI) in sequencing data
obtained from impure samples where AI events exist in a potentially low
proportion of cells in the sample. Input to the software includes a VCF file
of genotypes and estimated phased genotypes.
WWW: https://sites.google.com/site/integrativecancergenomics/software/haplohseq

View file

@ -0,0 +1,11 @@
bin/haplohseq
%%PORTEXAMPLES%%bin/haplohseq-example
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/example/example_input/tumor_exome.vcf
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/example/example_run.sh
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/ldmap/hg19.exome.ldmap
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/haplohseq_plot.R
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/ldmap.py
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/ldmap.py.orig
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/simple_phaser.py
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/simple_phaser.py.orig
%%PORTEXAMPLES%%@dir %%EXAMPLESDIR%%/example/example_output