Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v3

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ cython_debug/
.idea/

*.pickle
.vscode

# mapping data/output
notebooks/analysis/analysis_files
Expand Down
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11.4
51 changes: 51 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
FROM --platform=x86_64 python:3.11

RUN apt update
# Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly.
RUN apt-get clean && apt-get update && apt-get install -y \
postgresql-client \
build-essential \
curl \
git \
libbz2-dev \
libcurl4-openssl-dev \
libgsl0-dev \
liblzma-dev \
libncurses5-dev \
libperl-dev \
libssl-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*

# download and install blat executable
WORKDIR /usr/bin
RUN wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat
RUN chmod +x blat

# set dcd_mapping resources directory and download reference file
WORKDIR /home/.local/share/dcd_mapping
ENV DCD_MAPPING_RESOURCES_DIR=/home/.local/share/dcd_mapping
RUN curl -LJO https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.2bit

# Install samtools and htslib.
ARG htsversion=1.19
RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj && \
(cd htslib-${htsversion} && ./configure --enable-plugins --with-plugin-path='$(libexecdir)/htslib:/usr/libexec/htslib' && make install) && \
ldconfig && \
curl -L https://github.com/samtools/samtools/releases/download/${htsversion}/samtools-${htsversion}.tar.bz2 | tar xj && \
(cd samtools-${htsversion} && ./configure --with-htslib=system && make install) && \
curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \
(cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install)

RUN mkdir /usr/src/app
WORKDIR /usr/src/app
COPY . .

RUN pip install -e '.[dev,tests]'
# use polars-lts-cpu to avoid issues with x86 emulation on arm machine
RUN pip install -U polars-lts-cpu
# install gene normalizer with pg dependencies. TODO: can the pg dependencies be specified in pyproject.toml?
#RUN pip install 'gene-normalizer[pg]'
ENV PYTHONUNBUFFERED 1

ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/src"
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ Use `dcd-map --help` to see other available options.

Notebooks for manuscript data analysis and figure generation are provided within `notebooks/analysis`. See [`notebooks/analysis/README.md`](notebooks/analysis/README.md) for more information.

Following installation instructions for [CoolSeqTool](https://coolseqtool.readthedocs.io/latest/install.html) and [Gene Normalizer](https://gene-normalizer.readthedocs.io/latest/install.html) should take care of the external data dependencies.

Note that Gene Normalizer's `pg` dependency group must be installed to make use of the PostgreSQL-based backend:

```shell
python3 -m pip install 'gene-normalizer[pg]'
```

## Development

Clone the repo
Expand Down
39 changes: 39 additions & 0 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
version: "3"

services:
app:
build: .
command: bash -c "tail -f /dev/null"
depends_on:
- db
- seqrepo
env_file:
- settings/.env.dev
environment:
DB_HOST: db
DB_PORT: 5432
ports:
- "8002:8000"
volumes:
- .:/usr/src/app
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

db:
image: postgres:14
env_file:
- settings/.env.dev
ports:
- "5434:5432"
expose:
- 5432
volumes:
- vrs-mapping-data-dev:/var/lib/postgresql/data

seqrepo:
image: biocommons/seqrepo:2021-01-29
volumes:
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo

volumes:
vrs-mapping-data-dev:
vrs-mapping-seqrepo-dev:
215 changes: 215 additions & 0 deletions notebooks/analysis/analysis_files/mave_dat.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/analysis/mave_mapping_fig_3b.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ df <- data.frame('Experiment Cellular Context' = names, 'value' = context_counts

ggplot(df, aes(x = factor(Experiment.Cellular.Context, levels = c('Human', 'Yeast', 'Bacteria', 'Mouse', 'Bacteriophage', 'N/A')), y = value, fill = rownames(df))) +
geom_bar(stat = 'identity', fill = c("#F8766D","#B79F00","#90ee90","#00BFC4","#619CFF","#F564E3")) +
geom_text(aes(label = value), vjust = ifelse(df$value != 92, -1, 3), size = 10, colour = ifelse(df$value == 97, 'white', 'black')) +
geom_text(aes(label = value), vjust = ifelse(df$value != 86, -1, 3), size = 10, colour = ifelse(df$value == 97, 'white', 'black')) +
xlab('MAVE Experiment Cellular Context') +
ylab('Number of Experiments') +
scale_y_continuous(expand = c(0, 0)) +
Expand Down
Loading