diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index da5f4a6..3656ac6 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/.gitignore b/.gitignore index ac63811..fd06d06 100644 --- a/.gitignore +++ b/.gitignore @@ -161,6 +161,7 @@ cython_debug/ .idea/ *.pickle +.vscode # mapping data/output notebooks/analysis/analysis_files diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..0c7d5f5 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11.4 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..22a982b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,51 @@ +FROM --platform=x86_64 python:3.11 + +RUN apt update +# Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly. +RUN apt-get clean && apt-get update && apt-get install -y \ + postgresql-client \ + build-essential \ + curl \ + git \ + libbz2-dev \ + libcurl4-openssl-dev \ + libgsl0-dev \ + liblzma-dev \ + libncurses5-dev \ + libperl-dev \ + libssl-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# download and install blat executable +WORKDIR /usr/bin +RUN wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat +RUN chmod +x blat + +# set dcd_mapping resources directory and download reference file +WORKDIR /home/.local/share/dcd_mapping +ENV DCD_MAPPING_RESOURCES_DIR=/home/.local/share/dcd_mapping +RUN curl -LJO https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.2bit + +# Install samtools and htslib. +ARG htsversion=1.19 +RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj && \ + (cd htslib-${htsversion} && ./configure --enable-plugins --with-plugin-path='$(libexecdir)/htslib:/usr/libexec/htslib' && make install) && \ + ldconfig && \ + curl -L https://github.com/samtools/samtools/releases/download/${htsversion}/samtools-${htsversion}.tar.bz2 | tar xj && \ + (cd samtools-${htsversion} && ./configure --with-htslib=system && make install) && \ + curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \ + (cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install) + +RUN mkdir /usr/src/app +WORKDIR /usr/src/app +COPY . . + +RUN pip install -e '.[dev,tests]' +# use polars-lts-cpu to avoid issues with x86 emulation on arm machine +RUN pip install -U polars-lts-cpu +# install gene normalizer with pg dependencies. TODO: can the pg dependencies be specified in pyproject.toml? +#RUN pip install 'gene-normalizer[pg]' +ENV PYTHONUNBUFFERED 1 + +ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/src" diff --git a/README.md b/README.md index abfbadc..da3cb1e 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,14 @@ Use `dcd-map --help` to see other available options. Notebooks for manuscript data analysis and figure generation are provided within `notebooks/analysis`. See [`notebooks/analysis/README.md`](notebooks/analysis/README.md) for more information. +Following installation instructions for [CoolSeqTool](https://coolseqtool.readthedocs.io/latest/install.html) and [Gene Normalizer](https://gene-normalizer.readthedocs.io/latest/install.html) should take care of the external data dependencies. + +Note that Gene Normalizer's `pg` dependency group must be installed to make use of the PostgreSQL-based backend: + +```shell +python3 -m pip install 'gene-normalizer[pg]' +``` + ## Development Clone the repo diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml new file mode 100644 index 0000000..e2edbe4 --- /dev/null +++ b/docker-compose-dev.yml @@ -0,0 +1,39 @@ +version: "3" + +services: + app: + build: . + command: bash -c "tail -f /dev/null" + depends_on: + - db + - seqrepo + env_file: + - settings/.env.dev + environment: + DB_HOST: db + DB_PORT: 5432 + ports: + - "8002:8000" + volumes: + - .:/usr/src/app + - vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo + + db: + image: postgres:14 + env_file: + - settings/.env.dev + ports: + - "5434:5432" + expose: + - 5432 + volumes: + - vrs-mapping-data-dev:/var/lib/postgresql/data + + seqrepo: + image: biocommons/seqrepo:2021-01-29 + volumes: + - vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo + +volumes: + vrs-mapping-data-dev: + vrs-mapping-seqrepo-dev: diff --git a/notebooks/analysis/analysis_files/mave_dat.csv b/notebooks/analysis/analysis_files/mave_dat.csv new file mode 100644 index 0000000..7c809e3 --- /dev/null +++ b/notebooks/analysis/analysis_files/mave_dat.csv @@ -0,0 +1,215 @@ +,urn,target_sequence,target_sequence_type,target,assembly_id,uniprot_id,target_type +0,urn:mavedb:00000041-a-1,CTGCGGCTGGAGGTCAAGCTGGGCCAGGGCTGCTTTGGCGAGGTGTGGATGGGGACCTGGAACGGTACCACCAGGGTGGCCATCAAAACCCTGAAGCCTGGCACGATGTCTCCAGAGGCCTTCCTGCAGGAGGCCCAGGTCATGAAGAAGCTGAGGCATGAGAAGCTGGTGCAGTTGTATGCTGTGGTTTCAGAGGAGCCCATTTACATCGTCACGGAGTACATGAGCAAGGGGAGTTTGCTGGACTTTCTCAAGGGGGAGACAGGCAAGTACCTGCGGCTGCCTCAGCTGGTGGACATGGCTGCTCAGATCGCCTCAGGCATGGCGTACGTGGAGCGGATGAACTACGTCCACCGGGACCTTCGTGCAGCCAACATCCTGGTGGGAGAGAACCTGGTGTGCAAAGTGGCCGACTTTGGGCTGGCTCGGCTCATTGAAGACAATGAGTACACGGCGCGGCAAGGTGCCAAATTCCCCATCAAGTGGACGGCTCCAGAAGCTGCCCTCTATGGCCGCTTCACCATCAAGTCGGACGTGTGGTCCTTCGGGATCCTGCTGACTGAGCTCACCACAAAGGGACGGGTGCCCTACCCTGGGATGGTGAACCGCGAGGTGCTGGACCAGGTGGAGCGGGGCTACCGGATGCCCTGCCCGCCGGAGTGTCCCGAGTCCCTGCACGACCTCATGTGCCAGTGCTGGCGGAAGGAGCCTGAGGAGCGGCCCACCTTCGAGTACCTGCAGGCCTTCCTG,dna,Src catalytic domain,GCF_000001405.26,P12931,Protein coding +1,urn:mavedb:00000048-a-1,GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC,dna,CXCR4,GCF_000001405.26,P61073,Protein coding +2,urn:mavedb:00000068-b-1,ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCCTCTGAGTCAGGAAACATTTTCAGACCTATGGAAACTACTTCCTGAAAACAACGTTCTGTCCCCCTTGCCGTCCCAAGCAATGGATGATTTGATGCTGTCCCCGGACGATATTGAACAATGGTTCACTGAAGACCCAGGTCCAGATGAAGCTCCCAGAATGCCAGAGGCTGCTCCCCGCGTGGCCCCTGCACCAGCAGCTCCTACACCGGCGGCCCCTGCACCAGCCCCCTCCTGGCCCCTGTCATCTTCTGTCCCTTCCCAGAAAACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAGCACTGCCCAACAACACCAGCTCCTCTCCCCAGCCAAAGAAGAAACCACTGGATGGAGAATATTTCACCCTTCAGATCCGTGGGCGTGAGCGCTTCGAGATGTTCCGAGAGCTGAATGAGGCCTTGGAACTCAAGGATGCCCAGGCTGGGAAGGAGCCAGGGGGGAGCAGGGCTCACTCCAGCCACCTGAAGTCCAAAAAGGGTCAGTCTACCTCCCGCCATAAAAAACTCATGTTCAAGACAGAAGGGCCTGACTCAGACTAG,dna,TP53 (P72R),GCF_000001405.26,,Protein coding +3,urn:mavedb:00000045-c-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +4,urn:mavedb:00000018-a-1,GGTGTCTGTTTGAGGTTGCTAGTGAACACAGTTGTGTCAGAAGCAAATGTAAGCAATAGATGGCTCTGCCCTGACTTTTATGCCCAGCCCTGGCTCCTGCCCTCCCTGCTCCTGGGAGTAGATTGGCCAACCCTAGGGTGTGGCTCCACAGGGTGAGGTCTAAGTGATGACAGCCGTACCTGTCCTT,dna,HBB promoter,GCF_000001405.26,,Regulatory +5,urn:mavedb:00000099-a-1,ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA,dna,RHO,GCF_000001405.26,,Protein coding +6,urn:mavedb:00000001-c-1,ATGGCTGATCAGCTGACCGAAGAACAGATTGCTGAATTCAAGGAAGCCTTCTCCCTATTTGATAAAGATGGCGATGGCACCATCACAACAAAGGAACTTGGAACTGTCATGAGGTCACTGGGTCAGAACCCAACAGAAGCTGAATTGCAGGATATGATCAATGAAGTGGATGCTGATGGTAATGGCACCATTGACTTCCCCGAATTTTTGACTATGATGGCTAGAAAAATGAAAGATACAGATAGTGAAGAAGAAATCCGTGAGGCATTCCGAGTCTTTGACAAGGATGGCAATGGTTATATCAGTGCAGCAGAACTACGTCACGTCATGACAAACTTAGGAGAAAAACTAACAGATGAAGAAGTAGATGAAATGATCAGAGAAGCAGATATTGATGGAGACGGACAAGTCAACTATGAAGAATTCGTACAGATGATGACTGCAAAATGA,dna,CALM1,GCF_000001405.26,P0DP23,Protein coding +7,urn:mavedb:00000049-a-3,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +8,urn:mavedb:00000050-a-1,ATGGCGGTGCAGCCGAAGGAGACGCTGCAGTTGGAGAGCGCGGCCGAGGTCGGCTTCGTGCGCTTCTTTCAGGGCATGCCGGAGAAGCCGACCACCACAGTGCGCCTTTTCGACCGGGGCGACTTCTATACGGCGCACGGCGAGGACGCGCTGCTGGCCGCCCGGGAGGTGTTCAAGACCCAGGGGGTGATCAAGTACATGGGGCCGGCAGGAGCAAAGAATCTGCAGAGTGTTGTGCTTAGTAAAATGAATTTTGAATCTTTTGTAAAAGATCTTCTTCTGGTTCGTCAGTATAGAGTTGAAGTTTATAAGAATAGAGCTGGAAATAAGGCATCCAAGGAGAATGATTGGTATTTGGCATATAAGGCTTCTCCTGGCAATCTCTCTCAGTTTGAAGACATTCTCTTTGGTAACAATGATATGTCAGCTTCCATTGGTGTTGTGGGTGTTAAAATGTCCGCAGTTGATGGCCAGAGACAGGTTGGAGTTGGGTATGTGGATTCCATACAGAGGAAACTAGGACTGTGTGAATTCCCTGATAATGATCAGTTCTCCAATCTTGAGGCTCTCCTCATCCAGATTGGACCAAAGGAATGTGTTTTACCCGGAGGAGAGACTGCTGGAGACATGGGGAAACTGAGACAGATAATTCAAAGAGGAGGAATTCTGATCACAGAAAGAAAAAAAGCTGACTTTTCCACAAAAGACATTTATCAGGACCTCAACCGGTTGTTGAAAGGCAAAAAGGGAGAGCAGATGAATAGTGCTGTATTGCCAGAAATGGAGAATCAGGTTGCAGTTTCATCACTGTCTGCGGTAATCAAGTTTTTAGAACTCTTATCAGATGATTCCAACTTTGGACAGTTTGAACTGACTACTTTTGACTTCAGCCAGTATATGAAATTGGATATTGCAGCAGTCAGAGCCCTTAACCTTTTTCAGGGTTCTGTTGAAGATACCACTGGCTCTCAGTCTCTGGCTGCCTTGCTGAATAAGTGTAAAACCCCTCAAGGACAAAGACTTGTTAACCAGTGGATTAAGCAGCCTCTCATGGATAAGAACAGAATAGAGGAGAGATTGAATTTAGTGGAAGCTTTTGTAGAAGATGCAGAATTGAGGCAGACTTTACAAGAAGATTTACTTCGTCGATTCCCAGATCTTAACCGACTTGCCAAGAAGTTTCAAAGACAAGCAGCAAACTTACAAGATTGTTACCGACTCTATCAGGGTATAAATCAACTACCTAATGTTATACAGGCTCTGGAAAAACATGAAGGAAAACACCAGAAATTATTGTTGGCAGTTTTTGTGACTCCTCTTACTGATCTTCGTTCTGACTTCTCCAAGTTTCAGGAAATGATAGAAACAACTTTAGATATGGATCAGGTGGAAAACCATGAATTCCTTGTAAAACCTTCATTTGATCCTAATCTCAGTGAATTAAGAGAAATAATGAATGACTTGGAAAAGAAGATGCAGTCAACATTAATAAGTGCAGCCAGAGATCTTGGCTTGGACCCTGGCAAACAGATTAAACTGGATTCCAGTGCACAGTTTGGATATTACTTTCGTGTAACCTGTAAGGAAGAAAAAGTCCTTCGTAACAATAAAAACTTTAGTACTGTAGATATCCAGAAGAATGGTGTTAAATTTACCAACAGCAAATTGACTTCTTTAAATGAAGAGTATACCAAAAATAAAACAGAATATGAAGAAGCCCAGGATGCCATTGTTAAAGAAATTGTCAATATTTCTTCAGGCTATGTAGAACCAATGCAGACACTCAATGATGTGTTAGCTCAGCTAGATGCTGTTGTCAGCTTTGCTCACGTGTCAAATGGAGCACCTGTTCCATATGTACGACCAGCCATTTTGGAGAAAGGACAAGGAAGAATTATATTAAAAGCATCCAGGCATGCTTGTGTTGAAGTTCAAGATGAAATTGCATTTATTCCTAATGACGTATACTTTGAAAAAGATAAACAGATGTTCCACATCATTACTGGCCCCAATATGGGAGGTAAATCAACATATATTCGACAAACTGGGGTGATAGTACTCATGGCCCAAATTGGGTGTTTTGTGCCATGTGAGTCAGCAGAAGTGTCCATTGTGGACTGCATCTTAGCCCGAGTAGGGGCTGGTGACAGTCAATTGAAAGGAGTCTCCACGTTCATGGCTGAAATGTTGGAAACTGCTTCTATCCTCAGGTCTGCAACCAAAGATTCATTAATAATCATAGATGAATTGGGAAGAGGAACTTCTACCTACGATGGATTTGGGTTAGCATGGGCTATATCAGAATACATTGCAACAAAGATTGGTGCTTTTTGCATGTTTGCAACCCATTTTCATGAACTTACTGCCTTGGCCAATCAGATACCAACTGTTAATAATCTACATGTCACAGCACTCACCACTGAAGAGACCTTAACTATGCTTTATCAGGTGAAGAAAGGTGTCTGTGATCAAAGTTTTGGGATTCATGTTGCAGAGCTTGCTAATTTCCCTAAGCATGTAATAGAGTGTGCTAAACAGAAAGCCCTGGAACTTGAGGAGTTTCAGTATATTGGAGAATCGCAAGGATATGATATCATGGAACCAGCAGCAAAGAAGTGCTATCTGGAAAGAGAGCAAGGTGAAAAAATTATTCAGGAGTTCCTGTCCAAGGTGAAACAAATGCCCTTTACTGAAATGTCAGAAGAAAACATCACAATAAAGTTAAAACAGCTAAAAGCTGAAGTAATAGCAAAGAATAATAGCTTTGTAAATGAAATCATTTCACGAATAAAAGTTACTACGTGA,dna,MSH2,GCF_000001405.26,P43246,Protein coding +9,urn:mavedb:00000061-i-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +10,urn:mavedb:00000083-b-1,AGTTGAAGAAGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - SRSF1 (ASF/SF2) binding site,GCF_000001405.13,,Other noncoding +11,urn:mavedb:00000094-a-5,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +12,urn:mavedb:00000043-a-2,ACCGAGACCGCCTGGATCTCCTTGGTGACCGCTCTGCATCTAGTGCTGGGCCTCAACGCCGTCCTGGGCCTGCTGCTGCTGAGGTGGCAGTTT,dna,S505N MPL,GCF_000001405.10,P40238,Protein coding +13,urn:mavedb:00000055-0-1,ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG,dna,NUDT15,GCF_000001405.26,Q9NV35,Protein coding +14,urn:mavedb:00000104-a-2,CGGTGGCCGGTGCGGCGTGTTCGGTGGCGGCTCTGGCCGCTCAGGCGCCTGCGGCTGGGTGAGCGCACGCGAGGCGGCGAGGCGGCAGCGTGTTTCTAGGTCGTGGCGTCGGGCTTCCGGAGCTTTGGCGGCAGCTAGGGGAGGATGGCGGAGTCTTCGGATAAGCTCTATCGAGTCGAGTACGCCAAGAGCGGGCGCGCCTCTTGCAAGAAATGCAGCGAGAGCATCCCCAAGGACTCGCTCCGGATGGCCATCATGGTGCAGTCGCCCATGTTTGATGGAAAAGTCCCACACTGGTACCACTTCTCCTGCTTCTGGAAGGTGGGCCACTCCATCCGGCACCCTGACGTTGAGGTGGATGGGTTCTCTGAGCTTCGGTGGGATGACCAGCAGAAAGTCAAGAAGACAGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAAGTCCAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGTGGACCCGGAGAAGCCACAGCTAGGCATGATTGACCGCTGGTACCATCCAGGCTGCTTTGTCAAGAACAGGGAGGAGCTGGGTTTCCGGCCCGAGTACAGTGCGAGTCAGCTCAAGGGCTTCAGCCTCCTTGCTACAGAGGATAAAGAAGCCCTGAAGAAGCAGCTCCCAGGAGTCAAGAGTGAAGGAAAGAGAAAAGGCGATGAGGTGGATGGAGTGGATGAAGTGGCGAAGAAGAAATCTAAAAAAGAAAAAGACAAGGATAGTAAGCTTGAAAAAGCCCTAAAGGCTCAGAACGACCTGATCTGGAACATCAAGGACGAGCTAAAGAAAGTGTGTTCAACTAATGACCTGAAGGAGCTACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCTGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAAGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAACAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTTCAGCAGATAAGCCATTATCCAACATGAAGATCCTGACTCTCGGGAAGCTGTCCCGGAACAAGGATGAAGTGAAGGCCATGATTGAGAAACTCGGGGGGAAGTTGACGGGGACGGCCAACAAGGCTTCCCTGTGCATCAGCACCAAAAAGGAGGTGGAAAAGATGAATAAGAAGATGGAGGAAGTAAAGGAAGCCAACATCCGAGTTGTGTCTGAGGACTTCCTCCAGGACGTCTCCGCCTCCACCAAGAGCCTTCAGGAGTTGTTCTTAGCGCACATCTTGTCCCCTTGGGGGGCAGAGGTGAAGGCAGAGCCTGTTGAAGTTGTGGCCCCAAGAGGGAAGTCAGGGGCTGCGCTCTCCAAAAAAAGCAAGGGCCAGGTCAAGGAGGAAGGTATCAACAAATCTGAAAAGAGAATGAAATTAACTCTTAAAGGAGGAGCAGCTGTGGATCCTGATTCTGGACTGGAACACTCTGCGCATGTCCTGGAGAAAGGTGGGAAGGTCTTCAGTGCCACCCTTGGCCTGGTGGACATCGTTAAAGGAACCAACTCCTACTACAAGCTGCAGCTTCTGGAGGACGACAAGGAAAACAGGTATTGGATATTCAGGTCCTGGGGCCGTGTGGGTACGGTGATCGGTAGCAACAAACTGGAACAGATGCCGTCCAAGGAGGATGCCATTGAGCACTTCATGAAATTATATGAAGAAAAAACCGGGAACGCTTGGCACTCCAAAAATTTCACGAAGTATCCCAAAAAGTTCTACCCCCTGGAGATTGACTATGGCCAGGATGAAGAGGCAGTGAAGAAGCTGACAGTAAATCCTGGCACCAAGTCCAAGCTCCCCAAGCCAGTTCAGGACCTCATCAAGATGATCTTTGATGTGGAAAGTATGAAGAAAGCCATGGTGGAGTATGAGATCGACCTTCAGAAGATGCCCTTGGGGAAGCTGAGCAAAAGGCAGATCCAGGCCGCATACTCCATCCTCAGTGAGGTCCAGCAGGCGGTGTCTCAGGGCAGCAGCGACTCTCAGATCCTGGATCTCTCAAATCGCTTTTACACCCTGATCCCCCACGACTTTGGGATGAAGAAGCCTCCGCTCCTGAACAATGCAGACAGTGTGCAGGCCAAGGTGGAAATGCTTGACAACCTGCTGGACATCGAGGTGGCCTACAGTCTGCTCAGGGGAGGGTCTGATGATAGCAGCAAGGATCCCATCGATGTCAACTATGAGAAGCTCAAAACTGACATTAAGGTGGTTGACAGAGATTCTGAAGAAGCCGAGATCATCAGGAAGTATGTTAAGAACACTCATGCAACCACACACAATGCGTATGACTTGGAAGTCATCGATATCTTTAAGATAGAGCGTGAAGGCGAATGCCAGCGTTACAAGCCCTTTAAGCAGCTTCATAACCGAAGATTGCTGTGGCACGGGTCCAGGACCACCAACTTTGCTGGGATCCTGTCCCAGGGTCTTCGGATAGCCCCGCCTGAAGCGCCCGTGACAGGCTACATGTTTGGTAAAGGGATCTATTTCGCTGACATGGTCTCCAAGAGTGCCAACTACTGCCATACGTCTCAGGGAGACCCAATAGGCTTAATCCTGTTGGGAGAAGTTGCCCTTGGAAACATGTATGAACTGAAGCACGCTTCACATATCAGCAAGTTACCCAAGGGCAAGCACAGTGTCAAAGGTTTGGGCAAAACTACCCCTGATCCTTCAGCTAACATTAGTCTGGATGGTGTAGACGTTCCTCTTGGGACCGGGATTTCATCTGGTGTGAATGACACCTCTCTACTATATAACGAGTACATTGTCTATGATATTGCTCAGGTAAATCTGAAGTATCTGCTGAAACTGAAATTCAATTTTAAGACCTCCCTGTGGTAATTGGGAGAGGTAGCCGAGTCACACCCGGTGGCTCTGGTATGAATTCACCCGAAGCGCTTCTGCACCAACTCACCTGGCCGCTAAGTTGCTGATGGGTAGTACCTGTACTAAACCACCTCAGAAAGGATTTTACAGAAACGTGTTAAAGGTTTTCTCTAACTTCTCAAGTCCCTTGTTTTGTGTTGTGTCTGTGGGGAGGGGTTGTTTTGGGGTTGTTTTTGTTTTTTCTTGCCAGGTAGATAAAACTGACATAGAGAAAAGGCTGGAGAGAGATTCTGTTGCATAGACTAGTCCTATGGAAAAAACCAAGCTTCGTTAGAATGTCTGCCTTACTGGTTTCCCCAGGGAAGGAAAAATACACTTCCACCCTTTTTTCTAAGTGTTCGTCTTTAGTTTTGATTTTGGAAAGATGTTAAGCATTTATTTTTAGTTAAAAATAAAAACTAATTTCATACTATTTAGATTTTCTTTTTTATCTTGCACTTATTGTCCCCTTTTTAGTTTTTTTTGTTTGCCTCTTGTGGTGAGGGGTGTGGGAAGACCAAAGGAAGGAACGCTAACAATTTCTCATACTTAGAAACAAAAAGAGCTTTCCTTCTCCAGGAATACTGAACATGGGAGCTCTTGAAATATGTAGTATTAAAAGTTGCATTTGAAATTCTTGACTTTCTTATGGGCACTTTTGTCTTCCAAATTAAAACTCTACCACAAATATACTTACCCAAGGGCTAATAGTAATACTCGATTAAAAATGCAGATGCCTTCTCTA,dna,PARP1,GCF_000001405.26,,Protein coding +15,urn:mavedb:00000005-a-6,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +16,urn:mavedb:00000083-h-1,AGTTCGCGCCGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - CG-containing enhancer,GCF_000001405.13,,Other noncoding +17,urn:mavedb:00000098-a-1,LFRVIRLARIGR,protein,SCN5A,GCF_000001405.26,Q14524,Protein coding +18,urn:mavedb:00000108-a-2,ATGTCTGGTAACGGCAATGCGGCTGCAACGGCGGAAGAAAACAGCCCAAAGATGAGAGTGATTCGCGTGGGTACCCGCAAGAGCCAGCTTGCTCGCATACAGACGGACAGTGTGGTGGCAACATTGAAAGCCTCGTACCCTGGCCTGCAGTTTGAAATCATTGCTATGTCCACCACAGGGGACAAGATTCTTGATACTGCACTCTCTAAGATTGGAGAGAAAAGCCTGTTTACCAAGGAGCTTGAACATGCCCTGGAGAAGAATGAAGTGGACCTGGTTGTTCACTCCTTGAAGGACCTGCCCACTGTGCTTCCTCCTGGCTTCACCATCGGAGCCATCTGCAAGCGGGAAAACCCTCATGATGCTGTTGTCTTTCACCCAAAATTTGTTGGGAAGACCCTAGAAACCCTGCCAGAGAAGAGTGTGGTGGGAACCAGCTCCCTGCGAAGAGCAGCCCAGCTGCAGAGAAAGTTCCCGCATCTGGAGTTCAGGAGTATTCGGGGAAACCTCAACACCCGGCTTCGGAAGCTGGACGAGCAGCAGGAGTTCAGTGCCATCATCCTGGCAACAGCTGGCCTGCAGCGCATGGGCTGGCACAACCGGGTGGGGCAGATCCTGCACCCTGAGGAATGCATGTATGCTGTGGGCCAGGGGGCCTTGGGCGTGGAAGTGCGAGCCAAGGACCAGGACATCTTGGATCTGGTGGGTGTGCTGCACGATCCCGAGACTCTGCTTCGCTGCATCGCTGAAAGGGCCTTCCTGAGGCACCTGGAAGGAGGCTGCAGTGTGCCAGTAGCCGTGCATACAGCTATGAAGGATGGGCAACTGTACCTGACTGGAGGAGTCTGGAGTCTAGACGGCTCAGATAGCATACAAGAGACCATGCAGGCTACCATCCATGTCCCTGCCCAGCATGAAGATGGCCCTGAGGATGACCCACAGTTGGTAGGCATCACTGCTCGTAACATTCCACGAGGGCCCCAGTTGGCTGCCCAGAACTTGGGCATCAGCCTGGCCAACTTGTTGCTGAGCAAAGGAGCCAAAAACATCCTGGATGTTGCACGGCAGCTTAACGATGCCCATTAA,dna,HMBS,GCF_000001405.26,P08397,Protein coding +19,urn:mavedb:00000001-a-4,ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA,dna,UBE2I,GCF_000001405.26,P63279,Protein coding +20,urn:mavedb:00000094-a-6,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +21,urn:mavedb:00000078-a-1,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,protein,VKOR,GCF_000001405.26,,Protein coding +22,urn:mavedb:00000103-c-1,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,protein,MAPK1,GCF_000001405.26,,Protein coding +23,urn:mavedb:00000043-a-1,ACCGAGACCGCCTGGATCTCCTTGGTGACCGCTCTGCATCTAGTGCTGGGCCTCAGCGCCGTCCTGGGCCTGCTGCTGCTGAGGTGGCAGTTT,dna,MPL,GCF_000001405.10,P40238,Protein coding +24,urn:mavedb:00000061-d-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +25,urn:mavedb:00000046-a-1,GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG,dna,CD86,GCF_000001405.10,P42081,Protein coding +26,urn:mavedb:00000081-a-1,MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEISLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENSSLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPCSENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLLASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKRKRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEKESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSEEIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKEFVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLGKAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVSKRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKDKPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSMSPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKLNAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDDLLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELPCFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLFSSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEAASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALEDLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHSCSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRAPESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSGLTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKMLNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTLGTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY,protein,BRCA1,GCF_000001405.13,P38398,Protein coding +27,urn:mavedb:00000097-r-1,TTTCTTTCAGCATGATTTTGAAGTCAGAGGAGATGTGGTCAATGGAAGAAACCACCAAGGTCCAAAGCGAGCAAGAGAATCCCAGGACAGAAAGGTAAAGCTCC,dna,BRCA1 Exon 19,GCF_000001405.13,,Protein coding +28,urn:mavedb:00000067-a-1,ATGAAGCATTACGAGGTGGAGATTCTGGACGCAAAGACAAGGGAGAAGCTGTGTTTCTTGGACAAGGTGGAGCCCCACGCCACCATTGCGGAGATCAAGAACCTCTTCACTAAGACCCATCCGCAGTGGTACCCCGCCCGCCAGTCCCTCCGCCTGGACCCCAAGGGCAAGTCCCTGAAGGATGAGGATGTTCTGCAGAAGCTGCCCGTGGGCACCACGGCCACACTGTACTTCCGGGACCTGGGGGCCCAGATCAGCTGGGTGACGGTCTTCCTAACAGAGTACGCGGGGCCCCTTTTCATCTACCTGCTCTTCTACTTCCGAGTGCCCTTCATCTATGGCCACAAATATGACTTTACGTCCAGTCGGCATACAGTGGTGCACCTCGCCTGCATCTGTCACTCATTCCACTACATCAAGCGCCTGCTGGAGACGCTCTTCGTGCACCGCTTCTCCCATGGCACTATGCCTTTGCGCAACATCTTCAAGAACTGCACCTACTACTGGGGCTTCGCCGCGTGGATGGCCTATTACATCAATCACCCTCTCTACACTCCCCCTACCTACGGAGCTCAGCAGGTGAAACTGGCGCTCGCCATCTTTGTGATCTGCCAGCTCGGCAACTTCTCCATCCACATGGCCCTGCGGGACCTGCGGCCCGCTGGGTCCAAGACGCGGAAGATCCCATACCCCACCAAGAACCCCTTCACGTGGCTCTTCCTGCTGGTGTCCTGCCCCAACTACACCTACGAGGTGGGGTCCTGGATCGGTTTCGCCATCATGACGCAGTGTCTCCCAGTGGCCCTGTTCTCCCTGGTGGGCTTCACCCAGATGACCATCTGGGCCAAGGGCAAGCACCGCAGCTACCTGAAGGAGTTCCGGGACTACCCGCCCCTGCGCATGCCCATCATCCCCTTCCTGCTCTGA,dna,TECR,GCF_000001405.26,Q9NZ01,Protein coding +29,urn:mavedb:00000001-c-2,ATGGCTGATCAGCTGACCGAAGAACAGATTGCTGAATTCAAGGAAGCCTTCTCCCTATTTGATAAAGATGGCGATGGCACCATCACAACAAAGGAACTTGGAACTGTCATGAGGTCACTGGGTCAGAACCCAACAGAAGCTGAATTGCAGGATATGATCAATGAAGTGGATGCTGATGGTAATGGCACCATTGACTTCCCCGAATTTTTGACTATGATGGCTAGAAAAATGAAAGATACAGATAGTGAAGAAGAAATCCGTGAGGCATTCCGAGTCTTTGACAAGGATGGCAATGGTTATATCAGTGCAGCAGAACTACGTCACGTCATGACAAACTTAGGAGAAAAACTAACAGATGAAGAAGTAGATGAAATGATCAGAGAAGCAGATATTGATGGAGACGGACAAGTCAACTATGAAGAATTCGTACAGATGATGACTGCAAAATGA,dna,CALM1,GCF_000001405.26,P0DP23,Protein coding +30,urn:mavedb:00000049-a-7,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +31,urn:mavedb:00000060-a-2,ATGTCTGAATATATTCGGGTAACCGAAGATGAGAACGATGAGCCCATTGAAATACCATCGGAAGACGATGGGACGGTGCTGCTCTCCACGGTTACAGCCCAGTTTCCAGGGGCGTGTGGGCTTCGCTACAGGAATCCAGTGTCTCAGTGTATGAGAGGTGTCCGGCTGGTAGAAGGAATTCTGCATGCCCCAGATGCTGGCTGGGGAAATCTGGTGTATGTTGTCAACTATCCAAAAGATAACAAAAGAAAAATGGATGAGACAGATGCTTCATCAGCAGTGAAAGTGAAAAGAGCAGTCCAGAAAACATCCGATTTAATAGTGTTGGGTCTCCCATGGAAAACAACCGAACAGGACCTGAAAGAGTATTTTAGTACCTTTGGAGAAGTTCTTATGGTGCAGGTCAAGAAAGATCTTAAGACTGGTCATTCAAAGGGGTTTGGCTTTGTTCGTTTTACGGAATATGAAACACAAGTGAAAGTAATGTCACAGCGACATATGATAGATGGACGATGGTGTGACTGCAAACTTCCTAATTCTAAGCAAAGCCAAGATGAGCCTTTGAGAAGCAGAAAAGTGTTTGTGGGGCGCTGTACAGAGGACATGACTGAGGATGAGCTGCGGGAGTTCTTCTCTCAGTACGGGGATGTGATGGATGTCTTCATCCCCAAGCCATTCAGGGCCTTTGCCTTTGTTACATTTGCAGATGATCAGATTGCGCAGTCTCTTTGTGGAGAGGACTTGATCATTAAAGGAATCAGCGTTCATATATCCAATGCCGAACCTAAGCACAATAGCAATAGACAGTTAGAAAGAAGTGGAAGATTTGGTGGTAATCCAGGTGGCTTTGGGAATCAGGGTGGATTTGGTAATAGCAGAGGGGGTGGAGCTGGTTTGGGAAACAATCAAGGTAGTAATATGGGTGGTGGGATGAACTTTGGTGCGTTCAGCATTAATCCAGCCATGATGGCTGCCGCCCAGGCAGCACTACAGAGCAGTTGGGGTATGATGGGCATGTTAGCCAGCCAGCAGAACCAGTCAGGCCCATCGGGTAATAACCAAAACCAAGGCAACATGCAGAGGGAGCCAAACCAGGCCTTCGGTTCTGGAAATAACTCTTATAGTGGCTCTAATTCTGGTGCAGCAATTGGTTGGGGATCAGCATCCAATGCAGGGTCGGGCAGTGGTTTTAATGGAGGCTTTGGCTCAAGCATGGATTCTAAGTCTTCTGGCTGGGGAATG,dna,TARDBP,GCF_000001405.13,Q13148,Protein coding +32,urn:mavedb:00000058-a-1,GATGCAGAATTCCGACATGACTCAGGATATGAAGTTCATCATCAAAAATTGGTGTTCTTTGCAGAAGATGTGGGTTCAAACAAAGGTGCAATCATTGGACTCATGGTGGGCGGTGTTGTCATAGCG,dna,Aβ42,GCF_000001405.26,P05067,Protein coding +33,urn:mavedb:00000045-k-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +34,urn:mavedb:00000094-a-14,MAAASSPPRAERKRWGWGRLPGARRGSAGLAKKCPFSLELAEGGPAGGALYAPIAPGAPGPAPPASPAAPAAPPVASDLGPRPPVSLDPRVSIYSTRRPVLARTHVQGRVYNFLERPTGWKCFVYHFAVFLIVLVCLIFSVLSTIEQYAALATGTLFWMEIVLVVFFGTEYVVRLWSAGCRSKYVGLWGRLRFARKPISIIDLIVVVASMVVLCVGSKGQVFATSAIRGIRFLQILRMLHVDRQGGTWRLLGSVVFIHRQELITTLYIGFLGLIFSSYFVYLAEKDAVNESGRVEFGSYADALWWGVVTVTTIGYGDKVPQTWVGKTIASCFSVFAISFFALPAGILGSGFALKVQQKQRQKHFNRQIPAAASLIQTAWRCYAAENPDSSTWKIYIRKAPRSHTLLSPSPKPKKSVVVKKKKFKLDKDNGVTPGEKMLTVPHITCDPPEERRLDHFSVDGYDSSVRKSPTLLEVSMPHFMRTNSFAEDLDLEGETLLTPITHISQLREHHRATIKVIRRMQYFVAKKKFQQARKPYDVRDVIEQYSQGHLNLMVRIKELQRRLDQSIGKPSLFISVSEKSKDRGSNTIGARLNRVEDKVTQLDQRLALITDMLHQLLSLHGGSTPGSGGPPREGGAHITQPCGSGGSVDPELFLPSNTLPTYEQLTVPRRGPDEGS,protein,KCNQ1,GCF_000001405.26,,Protein coding +35,urn:mavedb:00000069-a-2,MSSSSWLLLSLVAVTAAQSTIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADQSIKVRISLKSALGDKAYEWNDNEMYLFRSSVAYAMRQYFLKVKNQMILFGEEDVRVANLKPRISFNFFVTAPKNVSDIIPRTEVEKAIRMSRSRINDAFRLNDNSLEFLGIQPTLGPPNQPPVSIWLIVFGVVMGVIVVGIVILIFTGIRDRKKKNKARSGENPYASIDISKGENNPGFQNTDDVQTSF,protein,ACE2,GCF_000001405.26,Q9BYF1,Protein coding +36,urn:mavedb:00000106-b-1,MDSLLMNRRKFLYQFKNVRWAKGRRETYLCYVVKRRDSATSFSLDFGYLRNKNGCHVELLFLRYISDWDLDPGRCYRVTWFTSWSPCYDCARHVADFLRGNPNLSLRIFTARLYFCEDRKAEPEGLRRLHRAGVQIAIMTFKDYFYCWNTFVENHERTFKAWEGLHENSVRLSRQLRRILLPLYEVDDLRDAFRTLGL,protein,AID,GCF_000001405.26,,Protein coding +37,urn:mavedb:00000046-a-2,GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG,dna,CD86,GCF_000001405.10,P42081,Protein coding +38,urn:mavedb:00000069-a-1,MSSSSWLLLSLVAVTAAQSTIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADQSIKVRISLKSALGDKAYEWNDNEMYLFRSSVAYAMRQYFLKVKNQMILFGEEDVRVANLKPRISFNFFVTAPKNVSDIIPRTEVEKAIRMSRSRINDAFRLNDNSLEFLGIQPTLGPPNQPPVSIWLIVFGVVMGVIVVGIVILIFTGIRDRKKKNKARSGENPYASIDISKGENNPGFQNTDDVQTSF,protein,ACE2,GCF_000001405.26,,Protein coding +39,urn:mavedb:00000034-b-1,TGAGATATGGCTTCATTTTCTGTAATAAACACTAAGATCAAAACATGACCCAAGTTAAATTTCCTTGCAGGGTTCCCAGCAGGGGCTTCCCTTTTGTCTGTGATTTCCTCTCACCCACCAGAACCAGGCCAAATATGCGCATGTGCCACTAACACTAAGCAGCACTTCCTTAATCACTCATTTCCAACAATTTATGGATCATCAGTGGCAAAAAACGAGCAAAAATAATGAAAGAATGCAATGAAAGCTCGTGGAGACAGAGGCTGGACTTCCTACTCACTCTGTGTCTCTTTAAGATGGAGGCCTGATACAAATTAGCCACTGGGGGGAAAAAGTCATCTGGTCATAAAATACAGTACAAGGTCACTTTTATGTAAGTTTGCCAAAAGGGACATAAACCAGGACAATTTCAAACTGTGACACAGGATAGAAACATATTAAAAAAATCTTTGTTCCTCCTCTATTGTGCTGTCATGTTGCTCAGCA,dna,ZRS enhancer,GCF_000001405.26,,Regulatory +40,urn:mavedb:00000049-a-8,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +41,urn:mavedb:00000047-c-1,GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG,dna,CCR5,GCF_000001405.26,P51681,Protein coding +42,urn:mavedb:00000094-a-8,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +43,urn:mavedb:00000041-b-1,GGTAGCAACAAGAGCAAGCCCAAGGATGCCAGCCAGCGGCGCCGCAGCCTGGAG,dna,Src SH4 domain,GCF_000001405.26,P12931,Protein coding +44,urn:mavedb:00000097-o-1,TGTAACCTGTCTTTTCTATGATCTCTTTAGGGGTGACCCAGTCTATTAAAGAAAGAAAAATGCTGAATGAGGTAAGTACTTGATGTTACAAACTAACCAGA,dna,BRCA1 Exon 18,GCF_000001405.13,,Protein coding +45,urn:mavedb:00000045-a-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +46,urn:mavedb:00000097-0-1,AAGTTCATTGGAACAGAAAGAAATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGGTAAGTCAGCACAAGAGTGTATTAATTTGGGATTCCTATGATTATCTCCTATGCAAATGAACAGAATTGACCTTACATACTAGGGAAGAAAAGACATGTCTAGTAAGATTAGGCTATTGTAATTGCTGATTTTCTTAACTGAAGAACTTTAAAAATATAGAAAATGATTCCTTGTTCTCCATCCACTCTGCCTCTCCCACTCCTCTCCTTTTCAACACAAATCCTGTGGTCCGGGAAAGACAGGGACTCTGTCTTGATTGGTTCTGCACTGGGGCAGGAATCTAGTTTAGATTAACTGGCATTTTGGCTTTTCTTCCAGCTCTAAAACAAGCTCCATCACTTGAAATGGCAAAATAAAATCATGGATGAGGCCGAGGGCGGTGGCTTATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGTAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCCAACATGGTGAAACCCCCTCTCCACTAAAAATACAAAAATTAGCTGGGCGTAGTGGCATGTGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCACTTGAACCAGGAGGCAGATGTTGCTGTGAGCCAATATGGCACCACTGAACTCCAGCGACAGAGCTAAACTCCATCTCAAAAAAAAAAAAAAAAAAAAAAAAACATGGATGATCGGTGTCGTTGAGAGGATAGGTATTTGGAAGAACCTTTGTTTGAAACTGGCTCTGTACATACAATGAAATTACATACTTATTTACATACAATGAAATGCAGAGGTTTTTTTTTTATATAGGATCTCTGTCGAGAGGCTGGAGTGCAGTGGTGCTATCACAGCTCACTGCAGCCTCAACCTCGTCAGGCTCAAGCAATCCTCCCACCTCAGCCTCCAGAGTAGCAGGGACGATAGGTGTGCACCACCATGCCCAGCTAATTTTTGTATTTTTTTTTCTTTTTTTGAGATGGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAAACTCTGCCTCCCGGGTTCATGCCATTCTTCTGCCTGAGCCTCCTGAATAGCTGGGACTACAAGCACCCACTACCACGCCCGGCTAATTTTTTGTATTTTTTTTTCTTTTTTAGTAGAGGCGGGATTTCACCGTGTTAGCCAGGATAGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTGGGCCTCCCAAAGTGCTAGGATTACAGGCATAAGCCACTGCGTCCAGCCATTCTTGTATTTTTCTGTTGTAGAGATAGGGTTTTGCTATGTTGGCCATGCTGGTCTCAAACTCCTGACCTCAAGTGATCTACCCTCCCTTGGCCTCTCAAGGTGCTGGGATTACAGGCCTGAGCCATTGCACCCAGCCATGGTCTAAAAATCTTGATTGAAATACCACCTTTTCATTTCCAGACACCCCTATTTAAAATTACCACACCCCCAGCACACACTTTATCTTCTATTCCTGCTGCTTCTCCATAACACTGATTACTAGCTGACATTCTATGTAATGTATCCATTTTTTATCTCTAGTCCCACAGAATGTAAACTCCAGGATGGGATTTTTGTTTTGTTTACATACATCTGTATGTTCAGTAGTTAGAACGGTACTTGGGACCTAGTTGCCACTCAATAAACATTTGTCAAATAAATAATAAACTAAACTAAATTAGTTCTTTAATTTTTTTAAATATGGTGATGGTTAGTAGTGAGTAACATTCAAAAAATAAGTTGAAAAGTTGTACCATTGCCTCTTACCCACAATAAAAAAGGGTAAATTCTTTTCTGCTTTATGAAAGTTGTTTTTCATATTTGAAGTCAAGTTAATCAGATTAAGGAAAATGTATGTTGTGTTTTCAGAGCGATACAAGATTTATAAATAACCATCCTCTCCCTTGCCCTTCAACATTATAGCTAAACAAAAATAAGAGGAAAACAGGATTCACAATTTATCAATTTATTGAAAATCAGAGCCAGAGAAGCAGGAAATGACATTGTAGGAAAAAACTGCTTTTGAAAAAGCACAAAACTTACTCATGACAATCAGTGATCAGGAAAATCCTCAATAGTGTGGCATTTGGATACATTTATGTTTCATTTCCATGGGAGAGAGTCATAAAAATAGGATGTTCTTTCTCATTCTGGCAAATTAAACCATCAATTAAAAACTCAGATACATAAAAATTAAAGATGTAAGAATGAAAATGCTAAATTGTTATTTTCAATCAACTATTATGTTTTCTAGCTTTTCATTGCTTTTTTCTGTTTCCTGTTAAGATTAATTTCTTTTTTTTTTTTTTTTTTTTTTTTTGAGACAGACTTTGGCTCTTGTTGCCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTACAACCTCCACCTCCCGGGTTCAAGCAATTCTGCTGCCTCAGCCTCCGGAGTACCTGGGATTGCAGGCATGTGCCATCACACCAGCTAATTTTGTATTTTTAGTAGAGACAGGGTTTCTCCATATTGGTCAGGTTGGTCTCGAACTCCTGACCTCAGGTGATCCTCCTGCCTTGGCCTCCGAAAGTGCTGGGATTACAGGCGTGAGCCACCGCTCCCAGACTTTTTGTTTTGTTTTGTTTTGTTTTTTTGAGACACGGTCTCGCTCTGCTGCCTAGGCTGGAGTGCAGTGGCACGATCTTGGCTCACTGCCAGCTCCGCCTCCCGGGTTCAGGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCACCACTATGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTAGCCAAGATGGTCTCGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTTCCAAAGTGCTGGGATTACAGTCCTGAGCCACTGCGCCCGGCCTGGACCTTTTTTTTTCGGGGTGGGGGGTTGGAGTCTGGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCCATCTTGGCTCACTGCAACCTCCGCCTGCCAGGTTCAAGTTCAAGCGCTTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTATAGGCGCACGCCACCGTGGCCGGCTAATTTTGTATTTTTAGTAGAGATAGGGTTTCATCACGTTGGTCAGGCTGGTCTTGAAGTCCTGATCTCGTGATCCACCCGCCTCGGCCTTCCAAAGTGCTGGCGTGAGCCACTGCGCCTGGCTTAAGATTAATTTTTGTTTGTTTTGTTTTTGAGACGGAGTCTCGCTCTTTCACCCAGGCCGGAGTGCAGTGGCGCCATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCGGCCCCCCAAGTAGCTGGGACTACAGGCGTCCACCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCCACTTCCTGACCTCGTGATCCGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTAAGATTAATTTTTATGGTGTTTTACATTCATTTGTATGGAAAGTTCTAGGATAGGGATCATATTTCACTTCCTTTTAATATAGTACAGTATAGCACAATTTGCAGTTATGTCTTAATATGTGATCAGGAATGATCATGACTGGAAACAGTGTTATTTGTGGTAGCTATAGGGTAGGTAAGGTTTTCAGCCTGTTTTAGGTTTCTTGAACTAAAATTCCTTCTGCTGTCTTCTAAGTCAATATTGGCAGCTATTTCTGACAATTGGTAGTTCTTTGTAACTTTTTACCTATGACTATAACATTTTTGACTTTCAGAAGAATTTGCTAAAATGTGTTCCCCGGTGGGTTGTTGTTTTTCAACCTAAACCTAGCTGCTTTTTCCAGTCACTTATCCGTATTGGAAGCTCAAAATGCAAATATACAGTAGGCCTAAAATATTGCCTGGTTTGAAAAGTGTTTAAAATATTTGAATCATTTTTATAGTAAACATTTACTCTCATCAGGACCTAGAAGGGGAACATTTTAATTTTTTTTCTTTTCCCTTTTCACAGTCTTCCTTCAACATTCATTACCTTTTTACATATCGGAGTTTTCATCTGTTCAAAGTTTGTGTTTACAGTGTGTTTATATAGTTTAGATTATAATTACCATACTGAAATATAATTGTTTCAGAATTGAGTCAGTGGTGAGAATGAAAGCCATCTGGTATGATAACTGAATCCAATTTTTCTTTTACGGAGAATTTCTTTGAAATGTAGCTTATCTCAGAAATAGGGATTTAGTAACCAATCAGAGTTTTCTTTGTCAAGGTTGTTTTTCTTTTTAAAGTCACATTTGGTCCCAGTAATAATACCAATGTTGGTACAAGTTATCTCAGGTTGTGAAGCATTTTTCCCAAGTCATCTCAGGTTGTGAAGCATTTTCCCAAGTAGCATTTAATTTTATTCTTGCAATAGCCCAAGGAGTCTGGCAGGGTGAATGGCAAGAGAAGGAAACAGGTTCAGGTAGAGTGGTTAGCCCAAGGTGGCTCTGCTTATATACACAACTGGTAGTAGAAACCCAGCCTCCTGACTTAGTTCATTGTTTTTCTTTTCACTGCCCTGTGCTATGTCAAAAACCCCATGATTACAAGAGTTGTATTACAACCCTTCACAATAAGGTTACTGTCCACAAGCTTTTCTTGTGATCCTTTTCTTTTTTTTTTTTCTTTTTTTGAGATGGATTCTCTGTCACCCAGGCTGGCCCGCCTTGGCCTCCCAAAATGCTGGGATTACAGCGTGAGCCACCGCACCTGGCCCTTGTGATCCTTTTCTAAAAAGTTAAATATTTAAGGAAAAAACCACATTCTTGTCACACTGCCAGGTTAGTCGTTCTTTGATATCTTGCCTGGACTTTATCCAAAAAATCCGTTTCAAAAATTCACATTTAGAGCTAAGTGTAGTGGCTCACGCCTGTAATCCCGGTCGAGGCAGATGGATCACTTGAGGTCAGGACTTCAAGACCAGCCTGGGCAATATGGTGAAACCCCTTCCCTACCAAAAATACAAAAAAATTAGCCGGGTGTGGCAGCACGCGCCTGTAGTCCCAGCTACTTGGAATGCTGAGGCACAAGAATCACTTCAATCCGAGAGGCAGAGGTTGCAGTGAGCCAAGACCACACCACTGCACTCCAGCCTGAGCAGCAGAGTGAGTGAGACTCCATCTCCAAAAAAAAAAAAAAAGGTTCACATTCAGAAGAAAGCTAAAGGCCGGGTATAGTAGCTCACACCTGTAATCCCAGCACTTTGGGAAGCCGAAGCAGGAAGATTGCTTGATGCCAGGCATTCAAGACCAGCATGGGCATCATAGTGAGATCCTGTCTCTACAAAAATTAATTAACATTAAAAATTAAAAAGATGGCTGGCATGGTGGCTCACTCCTGTAATCCCAGTACTTTGGGAGGCCAAGGCATGGTGGTGCATGCCTTTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCACTTGAATTCAGGAGGCGGAGGTTACAGAGAGCCGAGATGGTGCCACTGCACTCCAGCCTGGGCGACAGAACGAGACTCTGTCTGAAAAAAAAAAAGAAAATTAAAAAGACCAGAATAAAGCTAAAGATTTAAAATAGCCTATAGGTTCCTACCAGAAGTTACCAGCTACCTCTCTGATAGTCTTTCCCTACAATATCCTCCTGGATTATTACATTTTAGCACCTTGACCTATCTGATGTCCTGCATACACAGGCATGGTCCTGCTCAGGGTTTGCCTTCTCTGCTCCCTCTTTCTTGGAATGCTCTTCCCCTAATTGTTGCATAGTGTGTTTCTTTACATTATTAAGCTATCCTCTAGTCTCACCTCAGTGAAACCTTTCCTGACTCCCCCCATGTACATCTCACCCCCACATAGATATTGAACTACCTGTTTCCCCTTACCCTGCTTAATTTTTCTCTTTAATGCACTTATTCCCATGTATTCTTTAATTCCGTATCAACTGTCTACCACACTAGAATATGAGCTCTATGAGAGCAGGCTTTATTTTGTAAACTGCTACATTTCTATCTCCTAGAATAGTACTTGAATATAGTAGTAGATACTTAATAAACACTTGTTATATTAGTATAATAAATGAACTAATCTCAGGAATGCCTTGGTTTTGTGGATAGACAGGTAGGGATGGGAACTTGGGTGATGTATTTTCTGAAGTTTTTATTTTTAAGCTTATTATTATTTTGAGATGGAGTCCAGCTCTGTCGCCCAGGTTGGAGTACAGTGGCGCGATCTTGGCTCACTGCAACGTGCACTTCCCCGGTTCAAGCGATTCTCCTGCCTTAGCCTCCCAAGTAGCTGGGATTACAGGCGCATGCCACCATGCCCAGTTAGTTTTGGTATTTTTAGTAGAGACAGCGTTTCACTGTGTTGGCCAGGCTGGTCTCGAAATCCTGACCTCATGATCCGCCCGCCTCGGCCTCCCAAGTGCTGGGATTACAAGCATGAGCCCCCGTGTCTGGCCTTATTTTCTTTTTTTTGAGACAGAGTCTTCCTCTGTCACCTAGGCTGGAGTGCAGTGGCACGATATTGGCTCACTCTGCAACCTCCACCTCCAGGATTCAAGTGATCCTTCTACCTTAGTCTCCAAAGTAGCTGAGACCACAGGCATGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGCGTAGACAGGGTTTCACCATATTGTCCAGGATGATCTGGAACTCCTGAGCTCAGGTGATCCACCCACCTCAGCCTCCCAAAGTGCTAGGATTACAGGCATGAGGCACCATGCCCGGCCTTAAGCTTATCATTTTCTAAATTTCCTTTAGTGAGTACTTATTACACTGTTTTTACAAAGTAATCACAAACCAAACATCATGCCTCTTCTGAAGTGATCTAATAAGAGTACACAGTACCATCTGTAAAGTGTTCTTGCCAGAAAGTTGAACCTGAATGATTAAGCCTGTAAGTCTAGTTTATAGGAAATAAGGCTAGAGGAACAAGTTAAACCTCACCATAGGGTTATACAATCAGCAAAATCCAGAATGGGGGAAACTCCACAGGTCAAATGACCTAATTTTAAAAATAAATGACAAGGGAGAAAAAGTAAGAGACACCTATAGATCAGAAGACACTTGGGGCTGGGCATGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGCGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCGGCCAACATGGTGAAACCCCAACTCTACTAAAAATACGAAAAATCAGCCGGGCGTGGTGGCGCACGCCTGTAGTTCCAACTACCTGGGAGGCTGAGGCAGGAGAATCACTTGAACTTGGGAGGCAGAGGTTGCAGTGAGCCGAGATCGCACCATTGCATGCCAGTCTGGGCTACAAAAGCAAAACCCCATCTCAAAAAAAAGAAGACACTTGGGTTTGGGTGTGTTGGCTCATGCCTGTAAACCCCGTGCTGGGAGGATTGCTTGAGCCCAGGAGTTCAAGGCTGCAGTGAGGTATGTTTGCACCACTGCACTCCAGCCTAGGTGACAGAGTGTGACCTTATCTTAAAAGTAATAATAATTAAAATAATCTGGGGTAGGGGTGGATATGGGTGAAACAGCTTGGCCATGAGTTGATGGTTGTTGGACCAGGGTGATGGTCCATATAGTTCATTTTATTATTTTATTTACTTGAAATTTTGAAATACTTGAAATTTTCCATATTAAGTTAAAAAGGCATTTACAGTAAACAAAAAAAAGTTCTAGGAAGGAATTCAAAAGAAATATAAGCAGAAAATTTTGTCTTTATGGAGCTTAAAGATGAGATGTGCACCCACAGTGATAGTGCAGAAAAATATATCACTGGAAATGAATTCGTACGAACTATTATCAACTAATCTTTTAAATGCTGATGATAGTATAGAGTATTGAAGGGATCAATATAATTCTGTTTTGATATCTGAAAGCTCACTGAAGGTAAGGATCGTATTCTCTGCTGTATTCTCAGTTCCTGACACAGCAGACATTTAATAAATATTGAACGAACTTGAGGCCTTATGTTGACTCAGTCATAACAGCTCAAAGTTGAACTTATTCACTAAGAATAGCTTTATTTTTAAATAAATTATTGAGCCTCATTTATTTTCTTTTTCTCCCCCCCTACCCTGCTAGTCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAGTAAGTTTGAATGTGTTATGTGGCTCCATTATTAGCTTTTGTTTTTGTCCTTCATAACCCAGGAAACACCTAACTTTATAGAAGCTTTACTTTCTTCAATTAAGTGAGAACGAAAAATCCAACTCCATTTCATTCTTTCTCAGAGAGTATATAGTTATCAAAAGTTGGTTGTAATCATAGTTCCTGGTAAAGTTTTGACATATATTATCTTTTTTTTTTTTTTTGAGACAAAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACTGCAACCTCCGCCCCCCGAGTTCAAGCGATTCTTCTACCTCAGCCTCCCAGGTAGCTGGGACTACAGGCACCCGCCACCATGCTTGGCTAATTTTTGTACTTTTAGTAGAGATAAGGTTTCACCATATTGGCCAGGCTGGTCTCGAACTCCTGACCTTGTGATCCACCTGCCTCGGCCTCCCAAAGTTCTGGGATTACAGGCGTGAGCCACCACACCCGACTGACATATATTATCTATTAGGATGTAACATCATTTTGAACAGTGTTTTGTATTTTTTGTGTCCATCAGTGAAAGCAAACTGCAAGCAGTTTTGAAATAAGCACATTGTGTTTGAGCCTTCCCAGTTTCTCCTTTCTGTTCATTTCTGCATATCCTTATGCATTCCCCCTTCTAAGGGTCAGTGTTTGCCCGCTTTGTAATCATTGTGAAGACAGGAAAGGACCTGATACCAGTTTCTATTTAGGCCAAAATTCATTTATAGCAGTGATTCAAGTTATATTTACGTATTTGATGATCTTGTCTTTTGAAATGAAAATGTTTGTTTCTTAATAAAAGAATTTCAGAAAAAGTAGAGTAGGTAATTTAGTAGAACAAGTGGGCTTTCTCCTTTTCTTTATGTTAAGCTATGGCTCACATCTTACCTTAAATGTCAACTAATTTGTTTTTAAGTATTTATGTACCTGGTACATAACCTGGTACCAGGTACAAACTATGTACTTGGTAAAAAGTTTATTAGCACAAAAAGGTATATGATGCAAAGTATACTTCCCTCTTACCCTACAACCCCTGCCTCCCTGTTCCCTCCCCAGACAACCACAATGATCAATTTCTTATGTATCCTTTGAGGAATTTTTAAATTCCAGAGTTCTTAACTTGGGGTTTATGAATAGTCTTTATGAATTTCCTAGAATTATATTTAAATTGTATTCAAAACTATGGCCATGTACATTTTTCTGGGAAGATAGTCCATAATTTTCATCTGAGTGAGCTAAGATCATGCCACTGCATTCCAGCCTGGGCGACAAGAGGGAGACTCAAAAAAAAAAAAAAAAGTCCCAGTATTTACTACAGAGAGCTAAAGATTAACCTTTAAAGCCCTGGGGCTTTCAATTTATCTGGATGAGAATCTTTCTGGAATGAACTGTATGTTTTCTTGTCAGCTTGAGTAACAAATGCTGAACATACTATACTATTATTACAGGGACTCAAGGGCCCAGTGTGGTAGCTCCTGCCCATAACCCCAGCACTTTGGGAGGCCAAGGCAGGAAGATCACTTAAGGCCAGGAGTTCGAAGCTGTAGTGAGCTATGATCACACCACTGCACTCCAGCCTAGATGACAGAGTGAGACCCTGTCTTTTTTTTTTTTTGAGATGGTGTTTCACTCTATTGCCCAGGCTGGAGTGCAGTGGTGTGATCTCGGCTCACTGCAACCTCCACCTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCCTCTTGAGTAGCTGGGATTACAGGCATCTGCCACCACACCCAGCTAATTTTTGTATTTTTAGTCGAGACAGGGTTTTCACCATGTTGGCCAGGCTGCTCTCAAACTCCTGACTTCAGCTACCTTGGCCTTAAAAAGTGTTGGGATTACAGGTGTAAGCCACCGCGCCTGGCTGACCCTGTCTCTTAACAAAAAAAGAGAGATTAAGTTATGAATATAGTTGCTTTGAGAACTTGTGGAAGAAGGAAATTATAGGCTTATAGGCAGAGATAATAATACGAGCAAATGTACAAATAAAAGAAAATAGAGGACGGGCGCGGTGGCTCACGCCTATAATACCAGCACTTTGGGAGGTCGAGGTGGGCGGATCACGAGGTCAGGAAATTAAGACCATCCTGGCCAAAATGGCGAAACACTGTCTCTACTAAAACACACAAAAAACTAGCCTGGCATGGTGGCACGTACCTGTAGTCCCAGCTACTTGGTAGGCTGCGGCAGGGGTATCACTTGAACCTGGGAGGCAGAGGTTGCCCTGAGCCGAGATCATGCCAATGCACTCCAGCCTGACAACAGAGTGAGACTCTGTCTGAAAAAAAAGAAAAGAAAAGAAAATACATCCAGGAAAAATAAGCTAACTTTGCATATGTGTATAGGAGTTGTGTTAGAAAAGGAAGAAGCCCTCAAAGATGGGAAGCCATTTGCAAGAAAGAGAAGGTCCAAGAGGAGGCAGAAGGGATTGGAAATAGAAAAAGGATGTAAGAAAGAGTTGATTATTACTCATAAACAGTAATGAAGGAAAAGGAGAGTAATTCTACAGGAAGATGCTGAGGTGCTTTGAGCCCAGTGAAGTTGGAGGTAAAGACAGCTGTTGAGGCCGGGCACGGTGGCTCACGCCTCTAATCCTAGCACTTTTGGAGCCCAAGGCAGGTGGATCACCTGAGGTCAGGAGCTCAAGACCAGCCTGACCAACATAGAGAAACCCCATCTCTACTAAAAATACAAAATTAGACGGGCGTGGAGGCGCATGCCTGTAATCCCAGTTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATTGCGCCATTGCACTCCAGCCTGGGCGACAAGAGTGAAAACTGTCTCAAAAAAAAAAAACAACAAAAAACAGCTGTTGAGATTGAGAGGATTAGAGTTGGCAACTGGAGAAGAGTGAGAAGCTTGGTTTCAAGCTTGTGATAGTCAGGATTGTGATAGTCAGGAAAGAACCAGTCATAAAGATATATGTGTGTGTATACATATAAATATGTTATATATATGTGTGTGTGTGACACATATATATTTTTGTTTGTTTCTTTGAGACAGTGTCTCCCTCTGACACCCAGGCTGGAGTTCAGTGGTGTGATCATAGTTCACTTTTACCTTGCAATCTGGGTTCAAGCAATCTCTCATCTCAGCCCCTCAAGTAGCTAGGACTACAGGTACATGGCATTTGCCCAGCTAATTTTTAAGTTTCTTGTAGAGATGGGCCAGCCATATTTTAAATTGTGTTTTGAATGTTATATTAGAATTAAAAGTCCAAAGCCGGGTGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCGGATCACGAGGTCAGGAGTTCGAGACCAGCCTGGCCAATATGGTAACACCATCTCTACTAAAAATACAAAAATTAGCTGGGTATGGGGGCACATGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGAGGAACCTCTTGAACCCAGGAGGCAGAGGCTGCAGTGAGTTGAGATCGTGCCACTGTACTCTAGCCTGGGCGACAGAGCAAGATTCCGTCTCAAAAAAAAAAAAAGTCCAGTATAATGCCCATGTGATAGATCGACTTTTTCATGAAATCTCTTCTGTAATATCAATATAATCTGAATAACACTTTGATCTATATGATGAGAAAGCTGGGAGCCTGGGAGCGATACCCCCATGCTTTTGTTGTATTAATTGTATTTTCTACGGATAAACTCTAATTGCTAAAAATAAAACAACTTTATTGACCCAAGCAAGCCTAAAGTTCTGAAATCTTTTTTTTATTTTTGTTTGTTTGTTTGTTTGTTTTTGTTTGTTTTGTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCGGTGGTGCAGTCTCGGCTCACTGCAAGCTCCACCTCCCGGGTTCACACCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGACGCCTGCCACCACGCCCAGCTAATTTTTTTGTATTTTTAGTAGAGAAAGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCCGCCTTGGCCTCCCTAAGTTCTGGGATTACAAGTGTGAGCCACCACGCCCGGCTGTTTTTTTTTGTTTTGTTTTGAGACGGAGTCTCACTGTGTTGCCCAGACTGGAGTGCAGTGGCATGATCTCAGCTCACTGCCACCTCCATCTCCTGGGTTCAAGCAAATCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCATGTGCCACCACACCTGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACTATGTTGGCCAGGCTGGTCCAAAACTCCTGACCTCAGGTGATCTGCTCGCCTTGGCCTCCCACAGTGCCAGGATTACAGGCATGAGCCACCTTGCCCAGCCAGTTCTGAAATCTTTTATGAAGCCTATAAAAAAAGATAATAATACCAATCTAGAAAATATTTCTTAAGGCAGTCATGCATTAGTTTGAACTTTCCAAACAAAAAAATGCAATGTGTAATACTTTTTTTTTTTTTTTTGAGATGGAGTCTTGTTCTGTTGCCCAGGCTGGAGTGCAGTGGTACAATCTCGGCTCACTGCAGCCTCTGCCTCTCTGGTTCAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCGTGCACCACCATGCATGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGACAAGGCTGATCTCGAACTCCTGACCTCAGGTGATCCGCCCACCTCAGCCTCCCAAAGTGCTGAGATTACAGGCATTAGCCACCACGCCCAGCCTTTTATTTTAGTAGAGACCATGTTTCACCATGTTGACCAAGCTGGTCTTGAGCTGACCTCAAGTGATCCGCCCACCTCCACCTCCCAAAATGGTGGGATTATAGGCATGAGCCACCGCACCCAGCCTGTAATACTTTTTTGAAGATCTAGAACCACATTGTTCAAAGAGATAGAATGTGAGCAATAAATGTAACTTAAATTTTTCAACAGCTACTTTTTTTTTTTTTTTTTGAGACAGGGTCTTACTCTGTTGTCCCAGCTGGAGTACAGTGGTGCGATCATGAGGCTTACTGTTGCCTTGACCTCCTAGGCTCAAGCGATCCTATCACCTCAGTCTCCCAAGTAGCTGGGACTGTAAGTGCACACCACCATATCCAGCTAAATTTTGTGTTTTCTGTAGAGACGGGGTTTCGCCATGTTTCCCAGGCTGGTCTTGAACTTTGGGCTTAACCCGTCTGCCCACCTAGGCATCCCAAAGTGCTAGGATTACAGGTGTGAGTCATCATGCCTGGCCAGTATTTTAGTTAGCTCTGTCTTTTCAAGTCATATACAAGTTCATTTTCTTTTAAGTTTAGTTAACAACCTTTATACATGTATTCTTTTTCTAGCATAAAGAAAGATTCGAGGCCGGGTGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGATGGGCACATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACATGGTGAAACCCCGCCTCTACTAAAATTACAAAAAGTTAGCCAGGCGTGGTAGCGGGCACCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCAGAGCTTGCAGTGAGCAGAGATTGTGCCACTGCACTCCAGCCTGAGAGACAGAGCGAGACTCCGTCTAAAAAAAAAAAAAAAGATTCGAATCCTTATCTTGGTTGATTTTTGCGTATCTAGTTCCACTGAATTATTTATATAATTGTATAGACTACAGCACGAGACAGCTTAGCTTGTCACTCTACTGTACTATATTCTGCAGTACTATCATAAGGGAATTTCCTCCCTACCCCTGCTCTGAATTGTTCAATTGTACTATTTGCTGGAGTAATGCTTGATGCCTTCTTGATCCATTATACTAGAGTATATGTAGTATTTGTAGATTCTGAAGGAGTGGGAGCCTCTATTCTGAGTTTTAAAGGTACTTATGTACAGTGGAGGTAGCTTTTTGACAGCCTCATCTTCCAAACTATAGAGTCATTGTTTTGTTGAGTGCAATATGGTACTTGAAGCATCTATATCGGCGAAGAAGGACCCAAGTCTCCTTGACCTTACCTACCTACATTCACTTTCTCTGGTAGGAAGATTGTGGGTGCCTCTCTCCAGACTTAGTTTCCATGTCAAAAAAGAAAAAAGGAAGATTGTGGGCTTTGCTACAATCCAATTCTGGATCCAATATAACCTTCATTGCTTAATTACTGTGTGATCTGGGACAAGCCTCTACTCTATAAAAATGAAGATAAGGCCAGGCTTGATGGCTCATGCCTGTAATCCCAGCACGTTGGGATGCCAAGGCAGGAGGATCACTTGAGGTCAGGAGTTCGAGACCAGACTGGGCAATATAGTGAAACCACATCTGTACAAAAATAAAGATAGAAAGTAGCCCAGCGCAATGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAAGCAGGCGATCACTTGAGGTCGGGAGTTCAAGACTGTAGACAGATAGATAGGTAGGTAGATAGATAGAGATATAGATATAGTTGGGGTTTTTTTGTTTTGTTTTGTTTTGTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAATGGCGCGATCTCAGTTTACTGCAACCTCCGCCTCCCGGGTTCAAGAGATTCTCCTGCCTCAGCCTCCTGAGTAGCCAGGATTACAGGCATATGCCACCATGCCCGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCTCCGTGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACCGCTCCTGGCCTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTCCTCTGTTGCCCAGGGTGGAGTGCAGTGGCACTCTTCTCAGCTCATTGCAACCTCTGCCATCCTGGGTTCCAGTGATTCTCATGCCTCAGCCTCCCAAGTAGCTGGGACTCAGGCGTGTGCCCACCACGCCTGGCTAATTTTGTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTAGCCAGGCTGGTCTCAAACTCCAGGCCTCAAGTGATCTGCCTGCCTCAGCCTCCTGGGATTGCAGACATGAGCCACTGCACCCGGCCAAGAGAGGGTAATAAATGTTAAATTACCTGGCTAGTAAAAAATATTCTCTAAGTGTCTTTTCTCACAATTCCCAATGCCTTTTTTTTTTTTTTGGCACAATCTCACTCTGTTGCCCAGGCTGGAATGCAATGGTGCAATATTGGCTCACTGTAACCCCCGCCTCACAGGTTCAACTTATTCTCATGCCTCAGCCTCCCGAGTAACTGGGACTACAGTGCACCACCACCACACCCAGCTAATTTTTGAATATTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGCCTCAAGTGATTCACCCACCCCGCAAGTGCTGGGATTACAGGTGTGGACCACCGTGCACAGCCCTAGTGACTTTTTTTTTAGCCCCTTAATCTTTTCTTTCCTGGGTCTCTTCATTGTCAGTGTCTGCTATTTACTCCCTACCTAGTCACCCCCTTCACCAGTATATTATGTCCTTTATGTTTTATTTTGCAGGATCTTATTTTGCTTTTCTATTGAATCCCCTCCATCTAGAATAGTACTAGACATAGTAAATATTGGTTGTATGAGTGAATCGCTGCTTTTAATTATCATCACCATTGCTCTCTCTACTTCTGGTCTATGATCCACTTTGAGTTAACTTTTGTTATTTGGTGTGAGATAGGAGTATAATTTCATTCTTTTACATGTGGTTATACTTTTGTCTCAACACTGTTTGTTAAAAACACAAAAAGTATTATTTTCCCATTTAATCATCTTTGGCCTGGGCACGGTGGCTCATGCCTGTAATCCCAGCACTCTGGAAGGCCAAGGCAGATGGATCAATTTGAGGCCAGGAGTTCAAGACTAGCCAACATGGTGAAACTAAAAATACAAAAAATTAGCTGGGTATGGTGGTGCATGTCTGTAATCCCAGCTACTCGGGAGGCTGAGGCACGAGAATTGCTTGAGCCTAGGAGGTGGAGGTTGTAGTGAGCTGAGATTGTGTCACTACCCTCCAGCCTGGGTGATAGAGTGAGTCTGTCTCAAAAAAAAAAAAAAAAAATTAAGAAAATAAAAATCGTCGGCCAGGCATGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCAGAGGCGGGCAGATCACGAGGTCAGGAGATGGAGACCATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAATAAAAAAATTAGCCGGGCATGGTGCTGGGCGCCTGTAGTCCCAGCTGCTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGTGGAGCTTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGAGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAATTGTCTTGGTATTTATTATTGTTGAAAATCGCTTGATCACAGATGTATGTATGAGTTTATTTCTGTACTGTCAATTCCATTTTATTGATGTATGTGTCTATTCTTATGCTATTACCACACTTTCTTGATTACTATAGCTTTGTGGTGAGGTGTTGAGATTTTAAACTAATTATAAGCATCTTACATGAACTACTTACCGTTTATATTTGATTATGCAGCATGAAATAATTATGAATATATCATTAAATATGCCATATTAACTTTTATTAAGTTTTATGTGATCATAACAGTAAGCCATATGCATGTAAGTTCAGTTTTCATAGATCATTGCTTATGTAGTTTAGGTTTTTGCTTATGCAGCATCCAAAAACAATTAGGAAACTATTGCTTGTAATTCACCTGCCATTACTTTTTAAATGGCTCTTAAGGGCAGTTGTGAGATTATCTTTTCATGGCTATTTGCCTTTTGAGTATTCTTTCTACAAAAGGAAGTAAATTAAATTGTTCTTTCTTTCTTTATAATTTATAGATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGTATATAATTTGGTAATGATGCTAGGTTGGAAGCAACCACAGTAGGAAAAAGTAGAAATTATTTAATAACATAGCGTTCCTATAAAACCATTCATCAGAAAAATTTATAAAAGAGTTTTTAGCACACAGTAAATTATTTCCAAAGTTATTTTCCTGAAAGTTTTATGGGACATCTGCCTTATACAGGTATTAGAAACTTACTGCCTTTCTCTAATGCTTCTAGTGTAAAAACTTGCAGACTTATGTAAAGTAGGGCTGTATCGCCGTGCCCCCATTGTCTGTTAATCTTGTTTTTATATTTTTGATTGTGTTTCCTTTTCTTTTTTTTTTTTTTTTTAAGACAGGGTCCTGCTCTGTCACTGAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGTAGCCTCTGTCTCCCAGCCTCTTCCTGCCTTAGCCTCCCAAATAGCTGGGACTACAGGCACACGCTACCATGCCCGGCCAATTTTTGTATTTTTTGTAGAGATGAGGTTTTACCATGTTGCCCAGGCTGGTAACTCCTGAGCTCAGGTGATCTGCCCACCTCGGCCTCCCAAAGTGCTGGGGTTCACAGGTGTGTGTTTATTTCTATCTAATTATTTACACAAACACAATGTATTTATATATTGTGTATCTCTTCTGCTACAATGTAAATTCTATGAGAGTAGTAATTTTGTCTGTCTCAACACTGTTTTTCCTAAGTTTGGTACATAGTAGGCACTCAGATGCTTAAAGGAATGAATGAATTGTGCTTTAATTCCACTTTACTAAACCCAAATCTCCCTTTGGACATTGTTATCTATGTGTTTTCAAAGAAGTATAATCATAATTTGACAGAAATCCTTGAGAGGCAGAACTAAGTGAGGGATTGGGCAGGGTTCAGATGTTAAGAACAGTAAGCTCAGCAGGGTGTGATTGCTCATGCCTATAACCCTAGCACTCTAGGAGGCTGAGGTGGGATGATTGCTTGAGGCCAGGAGTTTGAAATCAGCCTGGGCAACATAGTGAGACCCCATCACTACCAACAAAATAAATAAATAAATGTACATGGTGGCATATGCCCATAGTCCTAGCTACTTGGGAGGCTATAGTGGGAGGATAGCTTGAGTACAGAAGTCTGAGGCTGCAGTGAGCTATGATTGTGGCACTGCATGCTAGCCTGGGCAATAGAGCAAGACCCTGTCTCTAAATTAAACAAAAAAAAAAGTACTCTAGTTTTCTATGCAATGCATTATATCTGCTGTGGATTTAGGGCAGTATTATATCAGATAATTTTAGGCATTTGGTAGGCTTAAATGAATGACAAAAAGTTACTAAATCACTGCCATCACACGGTTTATACAGATGTCAATGATGTATTGATTATAGAGGTTTTCTACTGTTGCTGCATCTTATTTTTATTTGTTTACATGTCTTTTCTTATTTTAGTGTCCTTAAAAGGTTGATAATCACTTGCTGAGTGTGTTTCTCAAACAATTTAATTTCAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTGTAAGTGTTGAATATCCCAAGAATGACACTCAAGTGCTGTCCATGAAAACTCAGGAAGTTTGCACAATTACTTTCTATGACGTGGTGATAAGACCTTTTAGTCTAGGTTAATTTTAGTTCTGTATCTGTAATCTATTTTTAAAAAATTACTCCCACTGGTCTCACACCTTATTTTATCAATCGTAAGGTGCACATTTTTCACATCTTAACATCTCTGAAATTGGGAACATTTTACTATTGAGGGTGTGTCATTTGTTTAATTTGTGTGCTTTCTTTCTTAGTGATACAGAAAATAATAGTGCAACTTACATTGTTGGTGTCTTAGCTTTAGTGAAATACAGTATTGATAGGCAAATTTCTTAGTGTTAAGGTAGAAAACAAGGACTCTAAATAACTTTGATGGTCTGTGTATTTGTTTTTGTTTCCTAGGAGTAAAATTTCCAGTTGATTTTTTAAAATTTGATTTTTAAAAAAAATCACAGGTAACCTTAATGCATTGTCTTAACACAACAAAGAGCATACATAGGGTTTCTCTTGGTTTCTTTGATTATAATTCATACATTTTTCTCTAACTGCAAACATAATGTTTTCCCTTGTATTTTACAGATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGGTAAAACCATTTGTTTTCTTCTTCTTCTTCTTCTTCTTTTCTTTTTTTTTTCTTTTTTTTTTTTGAGATGGAGTCTTGCTCTGTGGCCCAGGCTAGAAGCAGTCCTCCTGCCTTAGCCCCCTTAGTAGCTGGGATTACAGGCACGCGCCACCATGCCAGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCATCATGTTGGCCAGGCTGGTCTCGAACTCCTAACCTCAGGTGATCCACCCACCTCGGCTCCCCAAATTGCTGGGATTACAGGTGTGAGCCACTGTGCCCGGCCGGTAAAACCATTTTCATTTATTCTGGCAACATCTCTTTATTGAGCATTGTGAATATGTTAGTGAATGTGCTAGATGCTCATAGATTTATATAAAAAGTTAGTGAAGAAGGAAAGATGGTATATTAAGTGGTTAGACAAGTGTTCTAATCAGTTAGAGTTCAGAGAAGGTCAGGGTACCTGATATAATCAAGAGAGAGACCTTACAGCCAGGTGAGGTGAATGTACCTATAATCCCAGCTACTTAGGAGGCTGAAATGGGAGGATCACTTGAGTCCAGGTTTGAGACCAGCCCAGGCAACATAGCAAGATCCCCATCAGATACACCAAAAAGACAGATTTCTTTTTTTTTTTTTTTTTTGAGACAGAGTCTCGCTCTGTCGCCCAGGCTGGAGCGCAGTGACACGATGTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGTTGGGACTACAGGGGTACGACACCAGACCTGGCTAATTTTTGTAATTTTAGTAGAGTCGGGGTTTCACCATATTGGTCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCACCCTCCTTGGCCTCCCAGAGTGCTGGGATTACAGGCGTGAGCCACCAAGCCCGGCCAAAAAAGAGAGCTCTTATAGGCCCTTCCTTGCTTTGGAGCTTTATCTGCTCTGTGATGCTTATCTAAAATAGCCATAAGGTCACTGATATTTTTAAGCATTTGGAAATTACTTCAGCTGGGTGCCATGGCTCATGCCTATAATCCCAACCCTTTGGGAGGCTGAGGTAGGAGGTCCTTTGAGCCCAGCTTGGGCAACACAGTGAGACACTGTCTCTGCAATTAAAAAAAAAAAAAAGTAGCTGGGTGCCGTGGCTCACGCCTGTAATTCCAGCACTAGGAGGCTTGAGGATTGCCTGAGCTCAGGAGTTCAAGACCAGTTTGGGCAACATAGCAAGTCCTTGTCTATATTAAAAGTTTTTTTAAATTATCTGGGCATGGTGGTGTGTGCCTGTAGTCCCAGCTACTTGGGAAGCTGAGACAGAAGGATCACTTGAGTCCAGGAGATGTAGACTACAGTGAGCTATGATCACTCCACTGCACTTCAGCGTGGGCGGCAAAGCAAGATCTAGTTGCAAAAAAAAAAAGAACTGGCTGGGTGCGGCGGCTAACACCTGCAATCCCAGCACCTTGGGAGGCTGAGGCCAGTGGATCATGAGGTCAGGAGATTGAGACCACCCTGGCCAACATGGTGAAACCCGGTCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCACGTGCCTGTAATCCCAGCTACTCCAGAGGCTGAGGATGGAGAATCACTTGAACCTGAGAGTCGGAGGTTGCAGTGAGCCGAGATTGCGCCACTGCACTCCAGCCTGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAGCTTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGTCAAGTGGATCACGAGGTGTGGAGATCAAGACTATCCTGGCTCACATGGTGAAAGCCCGTCTCTACTAAAAACACAGAAAAATTAGCTGAGCGTGATGGCGGACTCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATAGCATGAACCCGGGAGGTGGAGCTTGCAGTGAGCCGAGATCCCGCCACTGCGATCCAGCCTGGGCGACAGAGTGAGACTCTGTCTCAAAAAAAAAACAAAAAAACTTAGCTGGGCGTGGTGGTATGCACCTGTGGTCCTAGCTACTTGGGAGGCTGAGGCTGGAGCATTGCTTTAACATAGAGAGTCAAGGCTGCAGTTGAGCTATGACTGTGCCACTGGACTCCAGCGCAGGTGACTGAGACCCTATCTTTTAAAAAAAGGGAAAATTACTTGAACTTAAAAGGTGTAATTGTTAAAGAAAATGTAGTGATTTGCTCTGTTGTTACTTATATGTGCATGAATGATGGAGATCTTAAAAAGTAATCATTCTGGGGCTGGGCGTAGTAGCTTGCACCTGTAATCCCAGCACTTCGGGAGGCTGAGGCAGGCAGATAATTTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCATCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCACGTACCTGTAATCCCAGCTACTCGGGAGGCGGAGGCACAAGAATTGCTTGAACCTAGGACGCGGAGGTTGCAGCGAGCCAAGATCGCGCCACTGCACTCCAGCCTGGGCCGTAGAGTGAGACTCTGTCTCAAAAAAGAAAAAAAAGTAATTGTTCTAGCTGGGCGCAGTGGCTCTTGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCTCGAGTCCTAGAGTTCAAGACCAGCCTAGGCAATGTGGTGAAACCCCATCGCTACAAAAAATACAAAAATTAGCCAGGCATGGTGGCGTGCGCATGTAGTCCCAGCTCCTTGGGAGGCTGAGGTGGGAGGATCACTTGAACCCAGGAGACAGAGGTTGCAGTGAACCGAGATCACGCCACCACGCTCCAGCCTGGGCAACAGAACAAGACTCTGTCTAAAAAAATACAAATAAAATAAAAGTAGTTCTCACAGTACCAGCATTCATTTTTCAAAAGATATAGAGCTAAAAAGGAAGGAAAAAAAAAGTAATGTTGGGCTTTTAAATACTCGTTCCTATACTAAATGTTCTTAGGAGTGCTGGGGTTTTATTGTCATCATTTATCCTTTTTAAAAATGTTATTGGCCAGGCACGGTGGCTCATGGCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGCAGATCACCTGAGGTCAGGAGTGTGAGACCAGCCTGGCCAACATGGCGAAACCTGTCTCTACTAAAAATACAAAAATTAACTAGGCGTGGTGGTGTACGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCAACTGAACCAGGGAGGTGGAGGTTGCAGTGTGCCGAGATCACGCCACTGCACTCTAGCCTGGCAACAGAGCAAGATTCTGTCTCAAAAAAAAAAAACATATATACACATATATCCCAAAGTGCTGGGATTACATATATATATATATATATATATATTATATATATATATATATATATGTTATATATATGTTATATATATATAACATATATATATGTTATATATATGTTATATATATATAATATATATATGTTATATATATGTTATATATATATATACACACACACACACATATATATGTATATATATATACACACACACACACAAATTAGCCAGGCATAGTTGCACACGCTTGTAGACCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCTCTTGAACTTAGGAGGCGGAGGTTGCAGTGAGCTGAGATTGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAGGACTCTGTACACCCCCCAAAACAAAAAAAAAAGTTATCAGATGTGATTGGAATGTATATCAAGTATCAGCTTCAAAATATGCTATATTAATACTTCAAAAATTACACAAATAATACATAATCAGGTTTGAAAAATTTAAGACAACAGAAAAAAAAATTCAAATCACACATATCCCACACATTTTATTATTACTACTACTATTATTTTGTAGAGACTGGGTCTCACTCTGTTGCTTATGCTGGTCTTGAACTCCTGGCCTCAAGCAGTCCTGCTCCAGCCTCCCAAAGTGCTGGGATTATAGGCATGAGCTACCGCTCCCAGCCCCAGACATTTTAGTGTGTAAATTCCTGGGCATTTTTTCCAGGCATCATACATGTTAGCTGACTGATGATGGTCAATTTATTTTGTCCATGGTGTCAAGTTTCTCTTCAGGAGGAAAAGCACAGAACTGGCCAACAATTGCTTGACTGTTCTTTACCATACTGTTTAGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAAGACGTCTGTCTACATTGAATTGGGTAAGGGTCTCAGGTTTTTTAAGTATTTAATAATAATTGCTGGATTCCTTATCTTATAGTTTTGCCAAAAATCTTGGTCATAATTTGTATTTGTGGTAGGCAGCTTTGGGAAGTGAATTTTATGAGCCCTATGGTGAGTTATAAAAAATGTAAAAGACGCAGTTCCCACCTTGAAGAATCTTACTTTAAAAAGGGAGCAAAAGAGGCCAGGCATGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAAGTGGGTGGATCACCTGAGGTCGGGAGTTCGAGACCAGCCTAGCCAACATGGAGAAACTCTGTCTGTACCAAAAAATAAAAAATTAGCCAGGTGTGGTGGCACATAACTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCACTTGAACCCGGGAGGTGGAGGTTGCGGTGAACCGAGATCGCACCATTGCACTCCAGCCTGGGCAAAAATAGCGAAACTCCATCTAAAAAAAAAAAAGAGAGCAAAAGAAAGAATATCTGGTTTTAAATATGTGTAAATATGTTTTGGAAAGATGGAGAGTAGCAATAAGAAAAAACATGATGGATTGCTACAGTATTTAGTTCCAAGATAAATTGTACTAGATGAGGAAGCCTTTTAAGAAGAGCTGAATTGCCAGGCGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTACTAAAAAAAAAAAAAAAAAAATTAGCCGGGGTGGTGGCTTATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAAGCAGAGGTTGCAGTGAGCCAAGATCGCACCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCATCTCAAAAAAAAAAAAAAAGAGCTGAATCTTGGCTGGGCAGGATGGCTCGTGCCTGTAATCCTAACGCTTTGGAAGACCGAGGCAGAAGGATTGGTTGAGTCCACGAGTTTAAGACCAGCCTGGCCAACATAGGGGAACCCTGTCTCTATTTTTAAAATAATAATACATTTTTGGCCGGTGCGGTGGCTCATGCCTGTAATCCCAATACTTTGGGAGGCTGAGGCAGGTAGATCACCTGAGGTCAGAGTTCGAGACCAGCCTGGATAACCTGGTGAAACCCCTCTTTACTAAAAATACAAAAAAAAAAAAAAATTAGCTGGGTGTGGTAGCACATGCTTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCGCTTGAACCAGGGAGGCGGAGGTTACAATGAGCCAACACTACACCACTGCACTCCAGCCTGGGCAATAGAGTGAGACTGCATCTCAAAAAAATAATAATTTTTAAAAATAATAAATTTTTTTAAGCTTATAAAAAGAAAAGTTGAGGCCAGCATAGTAGCTCACATCTGTAATCTCAGCAGTGGCAGAGGATTGCTTGAAGCCAGGAGTTTGAGACCAGCCTGGGCAACATAGCAAGACCTCATCTCTACAAAAAAATTTCTTTTTTAAATTAGCTGGGTGTGGTGGTGTGCATCTGTAGTCCCAGCTACTCAGGAGGCAGAGGTGAGTGGATACATTGAACCCAGGAGTTTGAGGCTGTAGTGAGCTATGATCATGCCACTGCACTCCAACCTGGGTGACAGAGCAAGACCTCCAAAAAAAAAAAAAAAAGAGCTGCTGAGCTCAGAATTCAAACTGGGCTCTCAAATTGGATTTTCTTTTAGAATATATTTATAATTAAAAAGGATAGCCATCTTTTGAGCTCCCAGGCACCACCATCTATTTATCATAACACTTACTGTTTTCCCCCCTTATGATCATAAATTCCTAGACAACAGGCATTGTAAAAATAGTTATAGTAGTTGATATTTAGGAGCACTTAACTATATTCCAGGCACTATTGTGCTTTTCTTGTATAACTCATTAGATGCTTGTCAGACCTCTGAGATTGTTCCTATTATACTTATTTTACAGATGAGAAAATTAAGGCACAGAGAAGTTATGAAATTTTTCCAAGGTATTAAACCTAGTAAGTGGCTGAGCCATGATTCAAACCTAGGAAGTTAGATGTCAGAGCCTGTGCTTTTTTTTTGTTTTTGTTTTTGTTTTCAGTAGAAACGGGGGTCTCACTTTGTTGGCCAGGCTGGTCTTGAACTCCTAACCTCAAATAATCCACCCATCTCGGCCTCCTCAAGTGCTGGGATTACAGGTGAGAGCCACTGTGCCTGGCGAAGCCCATGCCTTTAACCACTTCTCTGTATTACATACTAGCTTAACTAGCATTGTACCTGCCACAGTAGATGCTCAGTAAATATTTCTAGTTGAATATCTGTTTTTCAACAAGTACATTTTTTTAACCCTTTTAATTAAGAAAACTTTTATTGATTTATTTTTTGGGGGGAAATTTTTTAGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGGTGAGTCAAAGAGAACCTTTGTCTATGAAGCTGGTATTTTCCTATTTAGTTAATATTAAGGATTGATGTTTCTCTCTTTTTAAAAATATTTTAACTTTTATTTTAGGTTCAGGGATGTATGTGCAGTTTGTTATATAGGTAAACACACGACTTGGGATTTGGTGTATAGATTTTTTTCATCATCCGGGTACTAAGCATACCCCACAGTTTTTTGTTTGCTTTCTTTCTGAATTTCTCCCTCTTCCCACCTTCCTCCCTCAAGTAGGCTGGTGTTTCTCCAGACTAGAATCATGGTATTGGAAGAAACCTTAGAGATCATCTAGTTTAGTTCTCTCATTTTATAGTGGAGGAAATACCCTTTTTGTTTGTTGGATTTAGTTATTAGCACTGTCCAAAGGAATTTAGGATAACAGTAGAACTCTGCACATGCTTGCTTCTAGCAGATTGTTCTCTAAGTTCCTCATATACAGTAATATTGACACAGCAGTAATTGTGACTGATGAAAATGTTCAAGGACTTCATTTTCAACTCTTTCTTTCCTCTGTTCCTTATTTCCACATATCTCTCAAGCTTTGTCTGTATGTTATATAATAAACTACAAGCAACCCCAACTATGTTACCTACCTTCCTTAGGAATTATTGCTTGACCCAGGTTTTTTTTTTTTTTTTTTTGGAGACGGGGTCTTGCCCTGTTGCCAGGATGGAGTGTAGTGGCGCCATCTCGGCTCACTGCAATCTCCAACTCCCTGGTTCAAGCGATTCTCCTGTCTCAATCTCACGAGTAGCTGGGACTACAGGTATACACCACCACGCCCGGTTAATTGACCATTCCATTTCTTTCTTTCTCTCTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTGCTCTGTTGCCCAGGCTGGAGTACAGAGGTGTGATCTCACCTCTCCGCAACGTCTGCCTCCCAGGTTGAAGCCATACTCCTGCCTCAGCCTCTCTAGTAGCTGGGACTACAGGCGCGCGCCACCACACCCGGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCATGACCTCAAGTGGTCCACCCGCCTCAGCCTCCCAAAGTGCTGGAATTACAGGCTTGAGCCACCGTGCCCAGCAACCATTTCATTTCAACTAGAAGTTTCTAAAGGAGAGAGCAGCTTTCACTAACTAAATAAGATTGGTCAGCTTTCTGTAATCGAAAGAGCTAAAATGTTTGATCTTGGTCATTTGACAGTTCTGCATACATGTAACTAGTGTTTCTTATTAGGACTCTGTCTTTTCCCTATAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGGTAATGGCAAAGTTTGCCAACTTAACAGGCACTGAAAAGAGAGTGGGTAGATACAGTACTGTAATTAGATTATTCTGAAGACCATTTGGGACCTTTACAACCCACAAAATCTCTTGGCAGAGTTAGAGTATCATTCTCTGTCAAATGTCGTGGTATGGTCTGATAGATTTAAATGGTACTAGACTAATGTACCTATAATAAGACCTTCTGTAACTGATTGTTGCCCTTTCGTTTTTTTTTTTGTTTGTTTGTTTGTTTTTTTTTGAGATGGGGTCTCACTCTGTTGCCCAGGCTGGAGTGCAGTGATGCAATCTTGGCTCACTGCAACCTCCACCTCCAAGGCTCAAGCTATCCTCCCACTTCAGCCTCCTGAGTAGCTGGGACTACAGGCGCATGCCACCACACCCGGTTAATTTTTTGTGGTTTTATAGAGATGGGGTTTCACCATGTTACCGAGGCTGGTCTCAAACTCCTGGACTCAAGCAGTCTGCCCACTTCAGCCTCCCAAAGTGCTGCAGTTACAGGCTTGAGCCACTGTGCCTGGCCTGCCCTTTACTTTTAATTGGTGTATTTGTGTTTCATCTTTTACCTACTGGTTTTTAAATATAGGGAGTGGTAAGTCTGTAGATAGAACAGAGTATTAAGTAGACTTAATGGCCAGTAATCTTTAGAGTACATCAGAACCAGTTTTCTGATGGCCAATCTGCTTTTAATTCACTCTTAGACGTTAGAGAAATAGGTGTGGTTTCTGCATAGGGAAAATTCTGAAATTAAAAATTTAATGGATCCTAAGTGGAAATAATCTAGGTAAATAGGAATTAAATGAAAGAGTATGAGCTACATCTTCAGTATACTTGGTAGTTTATGAGGTTAGTTTCTCTAATATAGCCAGTTGGTTGATTTCCACCTCCAAGGTGTATGAAGTATGTATTTTTTTAATGACAATTCAGTTTTTGAGTACCTTGTTATTTTTGTATATTTTCAGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACTCCCAGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACACTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGTTCTGATGACTCACATGATGGGGAGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGTGATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAATATTGAAGACAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAAGAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGACCTACATCAGGCCTTCATCCTGAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAACGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTCGAAAAAGAATCTGCTTTCAAAACGAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGAAGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAGTACAACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGGTAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAAGTGTTCAAATACCAGTGAACTTAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAACTAGAAACAGTTAAAGTGTCTAATAATGCTGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGGGTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCATGGTTGTTCCAAAGATAATAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAAGTTAACCACAGTCGGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATGCTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCCGTTTTCAAATCCAGGAAATGCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCTGGGTCCTTAAAGAAACAAAGTCCAAAAGTCACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTGGTCAGAAAGATAAGCCAGTTGATAATGCCAAATGTAGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCGTATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAATCTGCTAGAGGAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATTCCAAGTACAGTGAGCACAATTAGCCGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAATAGGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTTCCTGGAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTAGTTCAGACTGTTAATACAGATTTCTCTCCATATCTGATTTCAGATAACTTAGAACAGCCTATGGGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTCCAGAAAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGTTACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAGGATGAAGAGCTTCCCTGCTTCCAACACTTGTTATTTGGTAAAGTAAACAATATACCTTCTCAGTCTACTAGGCATAGCACCGTTGCTACCGAGTGTCTGTCTAAGAACACAGAGGAGAATTTATTATCATTGAAGAATAGCTTAAATGACTGCAGTAACCAGGTAATATTGGCAAAGGCATCTCAGGAACATCACCTTAGTGAGGAAACAAAATGTTCTGCTAGCTTGTTTTCTTCACAGTGCAGTGAATTGGAAGACTTGACTGCAAATACAAACACCCAGGATCCTTTCTTGATTGGTTCTTCCAAACAAATGAGGCATCAGTCTGAAAGCCAGGGAGTTGGTCTGAGTGACAAGGAATTGGTTTCAGATGATGAAGAAAGAGGAACGGGCTTGGAAGAAAATAATCAAGAAGAGCAAAGCATGGATTCAAACTTAGGTATTGGAACCAGGTTTTTGTGTTTGCCCCAGTCTATTTATAGAAGTGAGCTAAATGTTTATGCTTTTGGGGAGCACATTTTACAAATTTCCAAGTATAGTTAAAGGAACTGCTTCTTAAACTTGAAACATGTTCCTCCTAAGGTGCTTTTCATAGAAAAAAGTCCTTCACACAGCTAGGACGTCATCTTTGACTGAATGAGCTTTAACATCCTAATTACTGGTGGACTTACTTCTGGTTTCATTTTATAAAAGCAAATCCAGGTGTCCCAAAGCAAGGAATTTAATCATTTTGTGTGACATGAAAGTAAATCCAGTCCTGCCAATGAGAAGAAAAAGACACAGCAAGTTGCAGCGTTTATAGTCTGCTTTTACATCTGAACCTCTGTTTTTGTTATTTAAGGTGAAGCAGCATCTGGGTGTGAGAGTGAAACAAGCGTCTCTGAAGACTGCTCAGGGCTATCCTCTCAGAGTGACATTTTAACCACTCAGGTAAAAAGCGTGTGTGTGTGTGCACATGCGTGTGTGTGGTGTCCTTTGCATTCAGTAGTATGTATCCCACATTCTTAGGTTTGCTGACATCATCTCTTTGAATTAATGGCACAATTGTTTGTGGTTCATTGTCTCCTTAAATTAGACTGTAAGCACCTTGATGGAACTCATACTACCTTTTATTTCACACACACGCACACGCGCACACACAGCCTACACATACACTGCCTAGCTCATTGTAGCATACTAAATACTGATTTTAATGAATAAGCTAAACCTTCGAAACCCATTTGCTAATCCCAGCACTTTGGGAGGCCAAGGTGGGTGGATCACCTCAGGTCAGAAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCCACATCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGCCAATGCCTTGTAATCCCAGCTATTCTGGAGGCTGAGACAGGAGAATCGCCTGAACCTGGGAGGCGGAGGTTGCACTGAGCTGGGATTGTACCACTGCACTCCAGCCTGGGTGACAAAGTGAGACTCCATCTCAAAAACAAACAAACAAAAACACATCATTTCCCCTATAGCAAAAACATGACGGCACTTACTGTATCAAGAGAGGTGAGAAAAAGGAGCCACAGCAGGATGATTCAAGGGACTCTGCATAGCTCCATTTTAAGAATATGCCTACTGCAGGTCAGAGAAGGTAAGCAAACTGCCTAAGGCCACACAGCCAGGTACAGAACTCTCACCAATATTATTGCCAGCAATCGCAATTTTGGTGTTTATTCTTGGTACCAAGTTGGAGACTATAGGGTTCTCTTCCTAATAGAGACCATCTAGCCTTTCACTGTTTTGTGGATACTTCTTTCTCTTCTTCTTTTTTTTTTTCCCTTTTAAAATCTAGTTATTTTTTTCTTTTTGGTTTCTTTGACACAGGGTCTCTTACTCTGTTACCCAGGCTGGAATGGAGTAGTGCAGTCATGGTTCACTGTAGCTTTGACTTCCTGGGCTCAAGCGATCCTCCTACCTCAGCTTCCCGAGTAGCTGGGACCACAGGCGCCCACCAACACCTCCAGCTAATTTTTAAGTTTTTACTAGAGACAACATCTCACTATGTTGCCCAGGCTGGTCTCAAAATCCTGGGCTCAAGTGATCCCACCTCAGCCTCCCAAAATGCTGGGATTACAGGTGTGTGCAACCACGCCTGGCCTATTTTTTTTTTAATTGCTCATAAATCATCTTTTTTCTTTAAAAAAAAGAAAGATGGGAGGCTAAAGCAGGAGAATCACTTGAACCCAGGAGGCGGAGGTTGCAGTGAGCTGAGATCATGCTGCTGCTCTCCAGCCTGGGCAACAAGAGTGAAACTCCATCTCAAAAAAAAAAAAAAAGAAAGTACACAATTTTACTTTCTGGACCTAATGGTCAAGGCCAATAATTTGGTCACCTATGAAATAAATAAAAGCTTTACCATATATATGACCATTTGATAATGTAATATGAAATGTTTATGTACTAAAGGCAGAATAGTCTAGAAAAAACATTCTGTATCACAACGTCTAAAAATGAATATCATCTTCATCATAGAACCAGGCTCTTTCTCCTAATTTTTTTTTTTGAGATGGAGTTTTGCTCTGTCACCCAGGCTGGAATGCAGTGGCACAATTTTGGCTCACTGCAACCTTCAGCTCCCAGGTTCAGGATCAAGTGATTTTCGTGCTTCAGCCTTCTAAGTAGCTGGGATTACAGGTGACTGCCACCACACCCAGCTCATTTTTTTGTATTTTTTTAGTAGAGAGAGGGTTTCACCATGTTGGCCAGGCTCGTCTCGAACTCCTGACCTCAAATAATCCACCCGTCTCAGCCTCCCAAAGTGCTGAGATTACAGGCGTGAGCCACCAGGCCTGGCCTCCTAATTTTTATTTGTAGAAGTGGCACCAAAATTTTCCAAGTTCTCATGCAAAAATTCAGGCTCATCTCAGTTTATTTTTTTCATTTATTTATCTCCCACTAAATTGACAACTTCTAATAATTAGGTTGGTTCTTTGTATTCCCAGCACAGGGTTCTATGCAGAATACACACACAGCAGTTGCTGGCAATAATATTGGTGAGAGTTCTGTACTGGGCTATGTGATCTTAGACAGTTTGCTTATGTTCTCTGACCTGCCGTAGGCACATTCTTAAAATGAAGCTGTTCAGACCCCCTCGATTCATCCTGCTGTGGCTTCTTTTTCCCACCTAAATCTTAAATACCCTTTTAGCTGCTAGTAAGTGAATGATGTTTTTTTATGAACTTTCTGAAGTCAGATTAGATGAAGTTGAGAAAAGCCTGATATTCTTATAAAGTTATATATGTGCATCATAGAAAACTTAGAAAATACAGATAAACAAAAATCATCCATGGACGAACCTTGAAGACATTGTGTTAACTGAAATAAACCGGACACCAAAGGACACATGTTATATGCTTCCACTTATATGAGATACCTAGAATAGTTACATTTGGTTACTCTGGGTACATTGCCTATAGATAAGCCTTGCTCCACAAGGAGCAGTTAAAAAAAAAAAAAAGATAAATTCATAGGATGGAAGGTAGAATAGTGGTTACTAGGGACTTGGGGAGGGGGAAATGGGGAGTTACTGTTTGATGAGTGCAGATTTCAGTTTGGGATGATGAAAAAGTTCTGGAGATAGATAGTGGCAATGGTAACACAACAGTGTGAAAATAATGCCACTGAACTGTACACTTAAAATGATTAAAATGATAAGTTAATTGTAATTTGTGTTATCCAGAAATGGTTAGCAATTTATTGGTGTATATTCTTTTAGTATTCCTGTGTGTGCACAGGGGTGCTTGTATATACTTTATCTTTAAAATATATCCAGGAAGCTAGGCACAGTGGCTTACACCTGTAATCCCAGCACTTTGGGAGGGTGAGGCAGGAAGATTGCCTGAGCCCCGGAGGTCAAGGCTGCAGTGAGTTGTGATCACGCTACTGCACTCTGTTCTGGGCAACCCCTGTCTGGGAAAAAAAAAAAAATTAGTGAGGCTTAGTGGTGCACACCTGTAGTCTCAGCTACTTGAGTGGCTGGGGTAGGATTGCTTGATCCCAGCAAGTTGAGGCCGTGGTGAGCCATGATGGTGCCACTGCACTCCATCCTGGGTGATATGGTGAGACCCTGTCTCAAAAACAAGAAATCCAGATAATTCTGTGCATTATAATCTAGCTTTTACTGGATCATTAAAATTCTTTTTTCTTTTTTTTTTTTTTTTTCTGAGATGGAGTTTCACTCTTGTTGCCCAGGCTGGAGTGCAGTGGTGTGACCTTGGCTCACCGCATCCTCTGCCTCCCGGGTTCATGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCATGTGCCACCATGCCCAGCTAACTTTGTATTTTTAGTAGAGACAGGGTTTCTCCATGTTGACCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATCCACCCACCTCGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCACCGCACTCAGCCTGGGTCGTTAAAATTCTTAAGTGACTTCATTTTTAATTACTATATGGGATTCTATCTTTCCAGTGTATCATGATTTATTTGACCTATTGCTGAATGTTGGAGGTTTCAGGGTAAGAGGCACAGTTTGCTATTATGTACATCACTATAGTGGCATCCTGATAGCTAAATATTTGCCTACATCCCTGATTATTTCCTTAGTCTAAATTACTGGGACTAGGATTTTGGTGTTTGATACATGTTACTAAATTGTTTTTTAGAAAGATTAAACCAGTTTATGCTCTTCCAGCCCCTGTGGTATATGATAGTTCCCATTTTCCTGTACCTTGCCAACACTGGGTGATATCCAGTTTTAAAATCTAAATCTTGCATTGCTATGAGAACTACAATTAGAGAAGGCTTATCTTCTACTGCCCATTCTCTGTACAGAGCAAATCCCTCTAGACCTGAAGCCCCTTGGAGTTGTCAAGAAACCTTTGAGATGACTCCCCACTCTGTATCTGAGCTGTCACCAGTATTCTCCACTTCTTCAGGATTGCCATGGCAACTAAATTGATGAAAAGATTTAGGAGGCCTTTTCTCTCTTTGCAATTCCTATGATCCTTTTTGAATGTGGGTTTGGGACTCTGTCAATATACCCATCATCTAATTCTGTCCATTGTGTTTTAAAGTTTAAGGTTGCAATTTCTGATTACATCTGCCTTAGCCATACTGTATTATATTTGACATTCAATATACAATGTCCTTGTTTTTCTGTATTTCTAATCTTATTCCCAGAGATGTGTCTATTTGTTCAGGATTCATTTTGCAACGTGTTTTTACTAAGCATCTACCCAAAACCGTTGAAGTCAGATTTCAGGCTGTCTTACGTCTAAAGTAGCACAGGCAGGAAAAACTATTGAAGTGGGATTTTTTTTTCCCTTTTTGTACTGAACCGAGAAAAAGTATATAGATGATAGAGAATTCCTAATTTGGTATCATTGATATCTGGGTTTTTGTTTGTTTTTACAGAAGACTGATTAACTATACTTATTTATTAATTTATCTTCTCATTAATAAACACTTGCTGAGTGCTTACTGTCTGCTAGGCATTAGGGAGACAAATATGATTAAGGGAAGCTTCCTCCTATCAAGGTCATGTGTTCCATTTGGGTATACTAATGCATTAGCAATGTAAATCAAGTAGTGAGAGATCATCTGTTCCCGATAGGAGATGGATTATTGGTGGGGACTTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTATGTGATAAAATAAATATAGGAAATGTTAATTATAGATTCTAAGTAGTAGATATATAAACACTCATTGCAAAGTTGCTTCAAGTTTTCTGTATATTTGAAAATATTCACAACATGTCGACAAAACTAGCATGATAAAGCCACTATTTGTGCTAAGACTTCAGCTTGTATCTGGATTAGGCTTATTATGTAGTAGTAGGAACATTAGAAATAGTTTTAACTCATTAAATACACATGTTTTATGGGAAGGTTTTATATATATATTTATATGTAATGAATGTGAACAAACAAGGGTCAGATATACACTCTGCTTCCCTCCAGACCAGTTCCGGCTGCTCTGCTGCACATTTCAGGAGTCTTATTAGAATTAGCCACATTCTGCCCACTTGCCCTTACTTCTCATATTTCACAACTCCTCCTGGTGGGGACTTAAGGAGACATTCAAACTAGGCCTTGAAAGATGAGAATTTTTCCAAGTGGAAAAAGAGGAGTGGCAGCAAGTAAGGTAAAGGTACAGAGTCATGGAATTCCCAGGAAACGTAAAGTTGTCATGTGTTATAGGAAAACAACTTGTGTGAGGGGTGTTGGGAGAAATGAGAGATAATACCAGGGTATAAAGGGCCTTTTGAATGCTATGTTGAGGAATTTTATCCTAATGGCAGTAATGACTAACAATTATATAGTGTTCAAAAAGTATAAATCAGCAGTGGTATACCACTAAGGGTTTTTTTCTTTTCTTTTTTTTTTTGAGACAGAGTTTTGCTCTGTTGCCCAGGCTGGAGTGCCGTGGCACGATCTCAGCTCACTGCACTTCCGCCACCTGGGTTCAAGTGATTCTTCTGCCTCAGCCAGTGTTTCACTGTGATGGCCAGGATGGAGCACTAAGGGTCTTTATGGAAGAAAAAGACATGATAAACAAGGCTTTTAGGGAACTTCTACAGTAATGTAGCTGTATTAAAAGTAGAGATCAGAGCAGCATAGTAGAAGTAGAAGGCTAGAGCTAATTGAAGGAGCACTTCAGAATTAGAATCAAGAAGTCTTAGAAACCTATTGGTTTTATTCTCCCTAATGTATTTGGCCACTTACCTGCTGGGGAATTTGTCTAAGTTATAAAAAATAATTCCTTTGGGAAACCCAAAGGAAAGTTATCTATTAATAATTACCCCACTACTTTTTCTGATTTATGTAATGGCCACGTAGAGGTTAGATGTGATGGTTGTGACAGTAGTGACTAATACAGCCTGTGAAGCATTTTGGTCAGATATCTATGTGCTTTCATTCCAGGTTGACTGAGGCAAGACTTTGGCTAGGGTTTGATCAGTGATGTAACTACTCACGAGTACCACGTGGTGGCAATGGCATTGCTGCAGACCTTGGCAGCAAAGCAGTGTTAGAGTAGCAGTAGAAACCTTTGTGAAGCTAGGAATACATTTTCTGGTCATAAAAACCTCCTGAAAATTGTGAACTCAGTGTAGCAGGAGAAAGAAGATGGCTTGTTTTTAGTAAAGGGCAAAGTCATTTTTAAGGATCAGAAGAAGAAACGGAGAGTGAAACAATGTGTTCCTGCCCTACTCCCCCACTGGACTTTTTGGCAACCATTGCTGTTCCTTCTAAAAGTGATTTTTAAACATGTATATTTTGAAGCCAGGCACAGTGACTCACGTCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGGCCAGGTGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGAGGATCATGTGGTCAGGAGATCCAGACCATCCTGGCTAACACGGTGAAACACCATTTCTACTAAAAATACAAAAAATTAGCTGGGCATGGTGGCGGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAAGCAGAAGAATGGCTTGAACCTGGGAGGCGGAGCTTGCAGTGAACCAAGATTGCGCCACTGCACTCCAGCCTGGGCAACAAAGTGAGACTCCGTCTCAAAAAAAAAAAAAAAAATTAGTCGGGCATGGTAACAGGTGCCTGTAATCCCAGCTACTTGAGAGGCTGAGGCAGGGAGAATTGCTTGAACCAGGTAGGCGGAGGTTGCAGTGAGCCAAGATCGCACCACTGCACTCCAGCCTGGGGCAACAGAGCAAGACTGTCTCAAAAAAAATAAATAAATAAAATAAATTCTTAAGAAGGATATTTTGGAAAACTCCTTACATACCTAAATTCTTTGTTTATCAAATACTTGGACTTAGCACACTCTTCTTTGAAATGGACCAATAAACAACAGGAGCCCATAAGCAAAAAGAACTCATTATTTTAAAAACAGTAACTATCCTTACAGGCTTTCTCAGGGCTCTTTCTGTTGGATCCTTCCCTCTCACAGGTCCTTGCTAATGATCTCTAGGTGGACACATTCTAGATGAGATGTCCCTGTCTAGAATGGCAGCACCATGAGGGCTATATCCTCAGTACTAGGACAGCGCCTGGTGCTTAATAGATAGTAAATAGTTGTCTAATTAACTGAGCAAACAGATAGATTCATGAATTAGCTTTTTGCTTTTTCTGTTAGAAACTAAAGGTTCAGGTCAGGCACAATGGCGCATGTCTCTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCTGATCACTTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATAGTAAAACCCTGTTTCTACAAAAATTACCAAAATTAGCCGGGCGTCTTGGCAAGCACCTGTAATGCCAGCTACTTGAGAGGCTGAGGTGGGAGAATCGCTTGAACCTGGGAGGAAGAGGTTGCAGTGAGCCGAGATGGTGCCAACCTGGGTGACAGAGGGAGACTTAAAAAAAAAAAGAAAGAAAGAAAGAAAAGAAACTAAAGGTTCAAAGAATCCCAGAAAAGGAAGAGTCCTCACAAGCCAGTAATCTAGGCAGGATTACTGATAGTATTTTTATATTTGTTGTATTTTTATAAAATGCCATAGATAGAGGGCTTTTTTCAACATTACATCAGTCTAAAAATCACACATTTTTATATGAACTAACCTAAATGTCTGATGAATCTCACAACACCAAGTCTTTGAAATGTGCCCATATAAATAAAATGTTAACAGATTCATGCTAATTTTAAATATCGATAGTGTTTAAATGCCTTAATTATTTTTTCACTCCCTAGCTTTAAAAGAAAATAACCAACTTCAAAAGGACATCACAATAACATCAAGTCTATTTGGGGGAATTTGAGGATTTTTTCCCTCACTAACATCATTTGGAAATAATTTCATGGGCATTAATTGCATGAATGTGGTTAGATTAAAAGGTGTTCAGCTAGAACTTGTAGTTCCATACTAGGTGATTTCAATTCCTGTGCTAAAATTAATTTGTATGATATATTTTCATTTAATGGAAAGCTTCTCAAAGTATTTCATTTTCTTGGTGCCATTTATCGTTTTTGAAGCAGAGGGATACCATGCAACATAACCTGATAAAGCTCCAGCAGGAAATGGCTGAACTAGAAGCTGTGTTAGAACAGCATGGGAGCCAGCCTTCTAACAGCTACCCTTCCATCATAAGTGACTCTTCTGCCCTTGAGGACCTGCGAAATCCAGAACAAAGCACATCAGAAAAAGGTGTGTATTGTTGGCCAAACACTGATATCTTAAGCAAAATTCTTTCCTTCCCCTTTATCTCCTTCTGAAGAGTAAGGACCTAGCTCCAACATTTTATGATCCTTGCTCAGCACATGGGTAATTATGGAGCCTTGGTTCTTGTCCCTGCTCACAACTAATATACCAGTCAGAGGGACCCAAGGCAGTCATTCATGTTGTCATCTGAGTACCTACAACAAGTAGATGCTATGGGGAGCCCATGGAAGATACATGGTATACAACATAGCTCTTGCTCTATTGGAAGCTAAGTGGAATGGGAGAAATTGGTGACAGGCAACCCCATAATTTCAGAAAGCTATGAAAAAGTACTCAGACATATTCCTTATAACACTGGTGTCACATCACAAAGACCTATTTAATGTGCTTCTGATTTATAGGGAGAGACATCCTATACTTCAGGAACTGCACTTTGATCCACAGAAAGCCTAGTGATGTAGAGCTCCTGTTAGTTCAAAAGGAAAAGAAAAGAACAACACAGAAAGCCTAATTATGCAATAGAGTCAAGTGCTTTATAGCAATGTTACAGTTATCAAAAAAAATCCAGATGGACCTCTGAGAGGATGCCATTGGAGTAACCAGGCAGATGCAGTTGATCAGAGCTGACTTCCTATAAGAAGTGAGCACTGAGCTGAGGAATAATGGCATAAATGAAGGAAAGTGAGATGGAAATTTGAGTTTTTAATTGGAAAGACAATACATCAGGCAGATTTTTAAATAGGGGCAAACAAACAGACACATAGGAGATGCTAGGCATGGGGTCCCCACTAGGATGCTGCTTAGAAACATGCAGGGGTGGTGAGTACTCCCAAAGTACACTTCATTCCTAGCTCAGTGATTCTTATCTGAGTGTTAAAGTTCCTTCTTCAGCACCCCGTTCCACAGTCCAACTGGGAACTTTAAGACCTTTCTTGGAGTCTTTCTAGGAACTCAAGTCTGCTACTTATACAGAACAGTGGCTTTGGTCCCCAGTTGTGCCTTGCAGTATTTTTGTGTTCAGGAAGAAACAGTAGCTCTTGGATAAAGAAGCTAGCTAGAAACTCTGTTGCTATGGCAGTGCTTCAAAATGTATTTCCTTAAATGCTTTCTTTGTAACTATCTTCATTTAGTTCATCTCTCAGATAATGAGAGATCAGAGTCCCATCCCCAGTATAATACTCTTCTTTAGGGTACTTTCACCATCTTCAGTCTAAACACAGACTAGACTTTCAATTATAATGTGTAAGATTTAAAATGTTATTATTGTGTGACTTTGAATATCTGTGTAAATCTACTATCTCCTCTTTGGTATATACGTGTGTTTATTTTTTTCTGGAGATCTGTAACTGAAATGCTTAATTTCTGAATTGTTTTGGATATCACAACTTAATACCAACATAAGTTTTGAGCCTTTTTCTCCCTAAATCTGGTGTGAGTCTAACTGAAACTCAAATGAACTTTTTAAAAATAATTTTTTCTTTTCTTTAATTTTTTTTTTAAGTAGAGACAGGGACGCACTGTTAACTAGGCTGGTCTTGAACTCCTGATCTTGAGCCATCCTCCCCGACCTGAGCCTCACCTTATAGAGAGGGTCTTGCTCTGTTGCCCAAGCTGGAGGGCAGTGGCATAATCACAGCTCACTGCAGCCTCTCGACCTCCTCAAGCGATCCTCCTGCCTTAGCCTCCCAAGTAGCTGGGACTATAGGCGTCCACCACCATACCCAGCTAATTTTTTTTTTTATTTTTTGTAGAGACAAGGTCTCCCTATGTTGCCCAAGTTGGTCTCAAACTCCTGGACTCAAGCAGTCCTCTCACCTCAGCCTCCCAAAGTGCTGGGGTTACAGGTGTGAGCCATGGCACCTGGCCAGAACTTCTAGTAAAAAGAATATTGTTGCCGGGTACGGTGGCTCACGCCTGTAACCCCAGCACTTTGGGAGGCCAAGGCAGGCGAATCACCTGAGGTCGGGAGCTCGAGACCAGCCTGACCAACATGGAGAAACCACATCTCTACTAAAACTACAAAAAATTAGCCGGGCGTGGTGGCACATGCCTGTAATCCCACCTACTTGGGAGCTACGGTGCCTGGCCTAGTTTATTATTTCTTAATATCTGTTGTCTTCCAGTGTCTTCCTTAATTCTTCACAATACCCTGTACAATGCTTAGCACACAGTGGGCAGTCTGTAAGTTTATTAAATGTTTGGTGTGGCCCATACTTCCTATCCACAAAGAATGTAACATGTTAAGACATCTAGATGAGGGAATGATTTAAGAGGAACTACAATAATATTCTGAAACTTGGACTCTGGATCTCTGCATTTAGACTTTCCTAAACCAGCCAGCAAGTAGATCATCATGTCACAAGGCTTAGGTTGGGCTTGCTGTTCAGAGAATGAATTAAGGATTAAGGAGAAAAAAAAGCAGAAAGGTTTTGCTCTGTTTTTCAGGTTCTATTGAGTTGTTAACTTCTAACAAGTTATCTTATTTGCTTCATTGCATGAGGCCCATTGTAGTAAGAAGAGGAATTTATATGCTAAATGTTCTGGTGATAGAATGACTTTTCTTTTTTTTTACAGTCCAAAGGTCTTTTTTTTTTTTTTTTAACACCTATTATGCCATGAATTCATAGGGAATAGGTTCCAGCTGCTCAGGCTCCTTCCCATTGGTTCTCACAAAGTGTGCTTCTCTGGGTGGAGCAGGCTGGTGCTTCAGTTGAACCCACGTACCTTTCTCTTTGGCTTCTTTCTTTTTCTGATCATTTTCCTTCACGCGTTTCAGGAAGCTGTCTTGGCTCTTAGAGTGTTTAATGTGCTCAATACGCACATTAATTCTCTTGGCAAGAATCTTGCCCTTAACTTGTTTACAGCGATGCCAACAGCATGCTGGGTCACGTTGTAGACTTTTCCAGTTTTGCCATGGTAACACTTGTGGGGCATTCCTTTTTGAACAGTACCCGTTCCCTTGATGTCTACAATTTCACCTTTCTTACAGATTCGCATATACATGGCCAAAGGAACAACTCCATGTTTTCTAAAAGGCCTAGAGAACATATATCAGGTGCCTCTCCTCTTTCCCTTTGTGTTCGTCATTTTGGCAAATTACTGAAAGATGGTGGTTCTGGCCAAAAGGAGGAATGACTTTTTAATAGCTGTGTTTGTATCTGAGCCTTCCCTCTGCCTTTCATTTTTTTTGTTTTGTTTTGTTTTGTTTTTGTTTGAGATGAAGTTTCACTTTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATTTCGGCTCATTACAATGTCCGCCTCAGCCTCCTGGGTAGCTGGGATTACAGGCACCCGCCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCGGGCTGGTCTCAAACTCCTGACCTCAGGTGATCTGTCCACCTCGGCCTCTCAAAGTGCTGGGATTGTAGGCGTGAGCCACATCACCTGGCCACTTTTTTAACTCTTTCCAATGGTTAATTCCGTTTGATATGGTTCCTTGGAACTTGCACATTACCCTTTATCAATTATCACCCTGTATTGGGGGTGGGGAGGATGATACCTCTCTTCATAGTTAGATCCTACTTACTTTCAACAGAGTTCTTAACAATCCTAGAAACTCACAGGTCCAGAAAAGACAAGCATAAAGGAAACTATAAATAATGCATTTGAAGACTAACTCAGGAAATCAATGATTATTTCCCCCCAGGCTACCCAGTGTCTTAAAAAAACAGTTTAATTAATACAATCTTTTGTTTCAATTTTCTACCTATATTTATGGCTTTTAGCTTTTCTAATAAAAGCTCAAAATGAATTACAGTCATCAGTGACTTTTTAATGAATAGAAGACTTTTGCAATTTTTAACTATTTGTTTTTACTTATTAAATATTTCCGCCTTGGCCAGGCATGGTGGCTCACGCCTATAATCCCAGCACTGTGAGATGCCAAGGCAGGAGGATCACTTGAGTTTAAGAGTTCTAGACCAGGCTGGGTATGGTGGCTCATGCCTATAATCCCAGCACTTTGTGAGGCCAAGGTTGGCGGATCACCTGAGGTCAGGAGTTTAAGACCAGCCTGGCCAACATGGTAAAACCCCATCTCTACAAAAAATACAAAAATTAGCCAAGGGGTGGTGGTGGGCACCTATAATCCCATCTTCTTGGGAGGCTAAGGCAGGAGAATCGCTTGAACCTGGAGGCAGAGGTTGCAGTGAGCCGAGATCATGCCACTGTATTCCAGCCTGGGTAACAGAGCAAGACTCTGTCTCAAAAAAAAAAAAAAGTTTGAAACCAGCCTGGTCAACACAGCAAGACACCCATCTCGTTGAAAAATAACGGTCGGGCGCAGTGGCTCACGCCTGTAATCCCATCACTTTGGGAGGCCGAGGCAGGCAGATCACCTGAGGTCGGGAGTTCGAGACCAGCGTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATTAGTTGGGCGAGGTGGTGCATACCTGTAATCCCAACTACTTGGGAGGCTGAGGCAGGAGAACAGCTTGAACCTGGGAGGCAGAGAGGTTGTGGTGAGCCAAGATCATGCCATTGCACTGCAGCCTGGGCAACAAGAGCAAACTCCATCTCAAAAAAAATAAATAAATAAAAATAAATAAATAAGTACTTCTGCCTTTAAGCCACTTCCTAGAAGGCAGTGGCACAAAGTGATACATTTGGAGGAGTAAATATATTACAAAATGAATTAGGCTGGGCGCAGTGGCTCATGTCTGTAATCCCCGCACTTTGGGAGGCCAAGGCGGGTGGATCACTTGAGGTCAGGAGTTCGAGACTAGCCTGATCAACAGGGTAAAATCCCATCTCTACTAAAAATACCAAAAAAACTAGCTGGGCGTGGTGGCAGGCACCTGTAATGTCAGCTACTAGGAAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCAAGATTGCACCATTGCACTTCAGCTTGGGCAACAGAGTGAGACTCCGTCTCAAAAAAAAAAAAAGAACTAACATGCCAGAACTTTGCCTTCAGTATGTTTTGTGATTTTTCCCTTCTTGTGCCATTTCATCATTAGTTCCATGTATTATTTAAGATTTCTTATCAACCAGCACCTTGGGATTTTTTTGTGTATGTGTTGGTTTAGGGGGTTTATTTGTTTTTTTCTTTTTTTTCGGTAATTGAAAATGTGAAGCAAAATGTCACCTGTTTTTTCTTTCATGTCTGACACTCATGTCTTGTTTACCCCCGACATGCAGAAGCTGAAATCCCCATTTCATACAGTCTTCAATGTGGAGGCAGTAGGGATGGAGAAAATAATGTACTTTGTGCTCTCCGGTACTCTTTCTTTCCTATTGTCTGAGGGGATTTGGGCATAATTTATTTTGCTGCAGAGATAAAAATTTGTTATATATATTTTTTATCATTCAGGGCCAAGGAATATAGATTTTTTTTTTCAGCCTTGTCTCAGCTGGGTGTCTTTATTTACTCTGTCTTAAAGTGTTCCTTTTATTATCATTATTATTTTTTAATCATTGAATTCCATTTGGTGCTAGCATCTGTCTGTTGCATTGCTTGTGTTTATAAAATTCTGCCTGATATACTTGTTTAAAAACCAATTTGTGTATCATAGATTGATGCTTTTGAAAAAAATCAGTATTCTAACCTGAATTATCACTATCAGAACAAAGCAGTAAAGTAGATTTGTTTTCTCATTCCATTTAAAGCAGTATTAACTTCACAGAAAAGTAGTGAATACCCTATAAGCCAGAATCCAGAAGGCCTTTCTGCTGACAAGTTTGAGGTGTCTGCAGATAGTTCTACCAGTAAAAATAAAGAACCAGGAGTGGAAAGGTAAGAAACATCAATGTAAAGATGCTGTGGTATCTGACATCTTTATTTATATTGAACTCTGATTGTTAATTTTTTTCACCATACTTTCTCCAGTTTTTTGCATACAGGCATTTATACACTTTTATTGCTCTAGGATACTTCTTTTGTTTAATCCTATATAGGTTTTTTGAACCTATAACATAAGCTACAACATGAGAAATGTGCGGTTAGATAGATATGTCCCTTCTGAAGGTCAGAAAAAAATATAATGGAGGTAAAACCTGAACAAGCTTGGAAACTGATGGTAGACTTCTTCAAGGCAGCCCTTGCCCTAATTAAAATTCTTGTCTTTCTAGAAAAAGTCTAGCTGTTGATTTACCACAGAAAATAATAATAATAATTACTATTATTATTATTTTTTGAGACAGGGTCGCCCTGTGTCACCTAGATTGCAGTGGTGCAGTCATGGCTCACTGCATCCTCCGTTTTTCAGGCTCAAGCAATCCTCCCACCTTAGCCTCCTGAGTAGCTGGGTCCACAAGCATGCGCCACCCACACCCACTAAGTTTTTGTATTTTTGGTAGAGATGGAGTTTTACCTTGTTGCCCAGGCTGGTCTCAAATTCCTGGACTCAAGTAGTCCGCCCGCCTTGCCCTCCCAAAGCCAGAAAACATTTAGAATATCTTTCAGAGATGTGTATTTACACCACTATTAACACAGGGCTGTATAGCAGTCCAGTACTGGACTATGTAGTCCAGTACTATTCTTTTCCTTACTGGAGGGCCAGGCGTGGTGGCAGGTGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCAGAGGTTGCAGTGAGCTGGGACCGTGCCATTGCACTCCAGCCTGGGCGACAGAGCAAGACTCCGTCTCAAAACAAAAAAAAAAAGAGAGAGAGAGCAGTAATTCAGGTCTCACCCATCTTCAATCCAGGGGGCCTAGCCTTAGTATTTGACCCATAGTAAGCACCCAATAATTGTTTAAATTAATTAACCTCTGAGGCCCTTTAAATCTGTTGATAAGTATCTTATTTTGCAAAGTCCTAAGCACTTGGAAGAGCAGAGGAACTATTTACTGGGTGTGTATGCTTTTCTAACAATATTTTATAGCTGGCTTTTGTTTTTAGAATGAATTTGAACATTGAAAAGGCAGGCAATAGGGATGATTCTGTGAATTCTGCTAAAACTGAGTAGAAAGAATGAGTGTAGAGATGTCGACATTGATCAACTTTCTATCTTCATAAGAGATCTGATTCTAACATATCCATTTAGACTCAAGTAGAATATTGTGTATAGAGTGAGTGGCAGTGAGTAATTTGGTAAAAATTTGCTGACCTGCTTTTATTCTTTCCTCCTTTCTTTCTTCCTTTCCTTCCTTCCTTCCTTCCGTCCTTTCCTTTCCTTTCCCTCCCTTCCTTCCTTCTTTCCTTCTTTCTTTCCTTTCTTTCCTTTCTTCCTTTCTTTCCTTCCTCCCTTCCTTTTCTTTTCTTTCTTTCCTTTCCTTTTCTTTCCTTTCTTTCCTTTCCTTTCTTTCTTGACAGAGTCTTGCTCTGTCACTCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGCAACCTCTGTCTCCCAGGTTCAAGCAATTTTCCTGCCTCAGCCTCCCGAGTAGCTGAGATTACAGGCGCCAGCCACCACACCCAGCTACTGACCTGCTTTTAAACAGCTGGGAGATATGGTGCCTCAGACCAACCCAACCCCATGTTATATGTCAACCCTGACATATTGGCAGGCAACATGAATCCAGACTTCTAGGCTGTCTTGCGGGCTCTTTTTTGCCAGTCATTTCTGATCTCTCTGACATGAGCTGTTTCATTTATGCTTTGGCTGCCCAGCAAGTATGATTTGTCCTTTCACAATTGGTGGCGATGGTTTTCTCCTTCCATTTATCTTTCTAGGTCATCCCCTTCTAAATGCCCATCATTAGATGATAGGTGGTACATGCACAGTTGCTCTGGGAGTCTTCAGAATAGAAACTACCCATCTCAAGAGGAGCTCATTAAGGTTGTTGATGTGGAGGAGCAACAGCTGGAAGAGTCTGGGCCACACGATTTGACGGAAACATCTTACTTGCCAAGGCAAGATCTAGGTAATATTTCATCTGCTGTATTGGAACAAACACTTTGATTTTACTCTGAATCCTACATAAAGATATTCTGGTTAACCAACTTTTAGATGTACTAGTCTATCATGGACACTTTTGTTATACTTAATTAAGCCCACTTTAGAAAAATAGCTCAAGTGTTAATCAAGGTTTACTTGAAAATTATTGAAACTGTTAATCCATCTATATTTTAATTAATGGTTTAACTAATGATTTTGAGGATGAGGGAGTCTTGGTGTACTCTAAATGTATTATTTCAGGCCAGGCATAGTGGCTCACGCCTGTAATCCCAGTACTCCAGGAGGCCGAGGCAGGTGGATCAGCTGAGGTCAGGAGTTCAAGACCTGTCTGGCCAACATGGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAACTGGGTGTGCTAGTGCATGCCCGTAATCCTAGCTACTCTGGAGGCTGAGGCAGCAGAATCACTTGAACCCGGGAGGCGGAGGTTGCGGTGAGCCAAGATCACACCACTGCACTCCAGTCTGGGTGACAGAGCAAGACTCCATCTCAAAAAATATATATATATATATATACACACATATATTTTATTTCAACTGTTAGACAAGAGTCCAAAGGCCAAAGAATAAAGTTTTAGGCCAGTCCTTTATTAGAAAATGAGTCAAATCCCAAAGCAAGTTTTTTTTATGAGTTAATGAATATAAATGACTACATATTTTATGCCTTAAAAATCACTTTTAATGAATGGTGTTTTATGGCTTGTAAATCAGAGTTTTAATCAGTAAAGAAAGTTTTTAATCCTCAAAAACACGTTATCATAAAAGACACTGTTTGGCATCAAATGTGGTATTTGGCCATGTTCATTAGGGTCATTTTAGGAATCTCATACATTCTACTTAGCTATGCTTAATTCCTGATACCATGGCATTTTCTGAAATGTTTCAAGGATGACATCTCTGCTGTTTTTAATTTGGTAATGATATCTGCTGATTTATTAAGTGAAAAAAGTAATGGTGTCATTACCTTGGATGAAGAAACAAAAATAAAGCATTTGCCACATTTTTCAACTTTTTTTTCCTTTCTTACAAAATTGCTATAAGCTCATTGCCCCCAAATTGGACAATATAGGGAATAAAAAAGATAATTTGGGGTGGGGTTAGACACGGGTCTTGTTATGTTGCCGAGGCTGGTCTCTAACTCCTGGCCTCATGCAATCTTCCTACCTTGGCCTCCCAAAGTGCTGGGATTATAGGTGTGAGCCACTTCACCAAGCTGAGATGCCACCTCTTAAAAGATAAAATAAGGACAGATTACAGCCACTGCTCATGCCTGTAATCTCAGTACTTTGGGAGGCCAAGGTGGGAGAATTGCTCGAGGCCAAGAGTTCAAGACCAGCCTGGGCAATGTAGCGAGACCTGATCTCTATAAAAAGTAAAAAAAAAAAAAAACTAGCTGGGGCCAGGCGTGGTGGGTGGCTTACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCACCTGAGGGCAGGAGTTCAGGACCAACCTGACCAATATGGAGAAACCCTGTCTCTACTAAAAATACAAAATTAGCCAGGCTTGGTGGCTTATGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGGCAGAGGTTTCAGTGAGCTGAGATCGCGCCATTGCACTCTAGCCTGGGCAACAAGAATGAAACTCCATCTCAAAAAAAAAAAAAATCAGCTGGAAGGTGGCAAACACCTGTGGTCCCAGCTACTCAGGAGGCTGAGACAGGAAGATCACTTGAGTCCAGGAGGTCAAGGCTGCAGGTGAGCCATGTTTGTGCCACTGCACTGCAGCCTGGATGACAGACCGAGACCCTTCTCAAAAAAAAAAATATTACCCGTACTCTTTTTTGAGACGGAGTTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGCGCAATCTCGGCTCAGCGCAACCTCCGCCTCCTGGGTTCGAGCGATTCTCCTGCTTCAGCCTCCCCAGTAGCTGGGATTACAGGCATGCACCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACAGGGTTTCTCCATGTTGGTCAGACTGGTCTCGACCTCTTGACCTCAGGTGATCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGCCTGAGCCACCGCACCCGGCCTGTACTCTTATTCTTTAATAATAAAATATTTCTGTGTTTCTTTAGTCATTTTACATAAACTTTTATTTATTTATTTATTTTTATTTATTTATTTTTTTGAGACGGAGTCTCGCTCTGTTGCCCAGGCTGGAATGCAATGGCTCAATCTCAGCTCACTGCAAGCTCTGCCTCCCGGGTACACGCCATTCCCCTGCCTCAGCCTCCCTAGTAGCCGGGACTACAGGCGCCCGCCACCACGCCCAGCTAATTTTTTTTTTTGTATTTTCAGTAGAGACAGGGTTTCACTGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTCGTGATCCACCCGTCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGTGCTCGGCCCATAAACTTTTATTTTTAAAATAATGTCATGATAAATAATATTGCTTAGGTGTCTTTAATATATTAGTAACATTTCTGTTTTATTGTACATCAACATTTATATTCAAATTAATGGGTGAAGAGTACTCCATTGGACTAGGTATATCGTAATTTAATCTCCTATTATTGGACAACTACATTGTTTCTAAAATTATACTATTCCTATGACTAAACCTTTGCATATATCTTTTATCTCCCTAGGATATATTTCTAAAACTAGCATTGTTGACTGAAAGTGTAAATACGTGTTAAGGTGTTTGCTACATAATGCCATATTTCCTTTTTAGGAAACTAAGCTACTTTGGATTTCCACCAACACTGTATTCATGTACCCATTTTTCTCTTAACCTAACTTTATTGGTCTTTTTAATTCTTAACAGAGACCAGAACTTTGTAATTCAACATTCATCGTTGTGTAAATTAAACTTCTCCCATTCCTTTCAGAGGGAACCCCTTACCTGGAATCTGGAATCAGCCTCTTCTCTGATGACCCTGAATCTGATCCTTCTGAAGACAGAGCCCCAGAGTCAGCTCGTGTTGGCAACATACCATCTTCAACCTCTGCATTGAAAGTTCCCCAATTGAAAGTTGCAGAATCTGCCCAGAGTCCAGCTGCTGCTCATACTACTGATACTGCTGGGTATAATGCAATGGAAGAAAGTGTGAGCAGGGAGAAGCCAGAATTGACAGCTTCAACAGAAAGGGTCAACAAAAGAATGTCCATGGTGGTGTCTGGCCTGACCCCAGAAGAATTTGTGAGTGTATCCATATGTATCTCCCTAATGACTAAGACTTAACAACATTCTGGAAAGAGTTTTATGTAGGTATTGTCAATTAATAACCTAGAGGAAGAAATCTAGAAAACAATCACAGTTCTGTGTAATTTAATTTCGATTACTAATTTCTGAAAATTTAGATCTAGATAAAGCTATAGTGTGGATTATTTTATGTATATTTACTTGAGAAAATAATTATTAAATATTAGTGGAAAAGCTATACTTTGGGTATGATATAGGACTTTCGAATTGGAATTTTCCTTTCTATCTGTAAAAGCAAGTAGGTATAGTTTTATTCCCCAGAAGGCATCTTTTTCTCCCCCTTGTCTCACATGGGTGAATTTACCAGCATATTTAACTAAATTCAGACTGGTTCCAAATGTACTGCCAGATAGTAGCATTTCTCTAGTGTTTGTTTTCATCCTGGCTTGTAAGAATGCCCTGCCACTTCTGCCCTGCAATATCCCTTGCTATTAGGATTTTGGCATCACCTTGGGTCCTTAATGCCAGAAATGGGAATTGCTTCATACTGTGGAAAAATACCCATTAAAATATTAAGACCAGTAAAACCTCGTTTCTGCTTGGGCTATTTGTGGATTTCAGACATCCTGAGAAGTTTACCACCCCTGTAATTAATTGTCATTGTCATCACTTCATAATAAAAATAATTGCATGGCCGGGCATGGTGGCTCAAGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGTCAGATCACCTAAGGTCAGGAGATCAAGACCAGCCTGACCAACATGAAGAAACCCCATCTTTACTAAAAATACACAATTAGCCGGGCGTGGTGGCGCATGCCTATAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCCGGGAGGCGGAGGTTGCGGTGAGCCGAGATTGCACCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTCAATAATAAGAAGAAGAATTGCGTGAATATTTCTTTAAAACTATGATGAGATAACATACCAGATTATCAAATGGATTCAGTAGTGGGTGTGCCATTTATTGCACACTGAGAGATGACCAAGTCATTCTGAAATATCTTTATTAATATATCCTTCCTAGGATTTTTCATCCTAACTTCTCCATAGGTAGTTACTTAGCATAACATCTCTGTGGCCAGATGTATCCCACTACTAAAAGGGCAAAGTAAGCTGTGGCTGCCCTGGTAGATACAATGAGTAAGTGCACAGTGATGGCTATAAATGTTTTCATCTCATAATCCCATGTCCAGACCAGCAATTTGCTCTGAAAGCTCTTACCTGTGTCTGTTTCAATGGCTCTTGATCACTTGCCTGCACGTCCAGAATTCCTTATTTATTCATTGAAAATTAGCGTTCTTTATCCCTTTGTTTTGCAAGTTCAGCTTTTTAGAGATGGCTAAAATGGTCTAATCTTTCTTGGCAAAGGCAATTCTGAGCTGCAGATTAGACTACAAGTGGCTTGGGTACATGTTGTCTTTAAACAAGCGAAGAGGAAAACTTTGAGCTCTATTCAGACTTGGTGAAGTGTGGTAAATTTATGATGAAAGCTACTGACTGTATTACACATGATTAATTCTGAAGCCCATATTAAGATGATCTTTTCAGCAGTTCAGCATTGCTCTTCTAACTGAACAGTTTCAAGGCTGGGATTTCAGCAATTAATCAGTTCAGAATTGCTAATGATCCTGGCGGAGGGTGGTAGCAAAAGGGGGAGGATGTCATTAGCTTCTCTAGCCTGCCTTTTTTCAGTGCCCTGTGGCAGTATGGAGTGAGGCAACATGAAAGAAAGATGGCCTGACCTTCATGGCAGTATTGTGCAACACGTAAATACTGGTGTGAGTGGCTGTGGCTATGGCTAGTAAATGATGGCCCTTGGTAAACAAAGTTATTTATCAGACAATACCTACCAGCTAGGTCAACTGTGCCCATAATTGATCTGGTTAATTTCTTTTGCTGCCTATTGATTTTTATTTGGTTGATAGATAATAGCTAGAGGACTCTAAATTTCTTTGGGGAAGAACATGAACCCCTTCTAAGCCTTCTTACGAGAGAATTGATCGCTTTTGCACTGACCTTTAGTAACATCCTGATTTCAGTGTTTTGTAACTATCAGAGGGTTGAGTCTTGGTTTTAAGCCATGTATATCTGTAGCATAACTTTCTGTGTAGGCTAGTTACCTCTCAGCTTATAAAGTGTAGGCTGATAAATTTATAGTACAGTAGAGTGTCACTATGCAAAGAAACGATCTTAGGGAATCGAATGATATCTGCTATTAAAGCAAAATTAATATATATTTTTTCTTTTTACTTTTTTTTTTTTTTAAAGACATGAAATCTCACTGTATTGCCCAGGCTGGTCTTGGTCTCAGACTCTTGAGCTCAAGCAGTCCTCCCACCTCAGCTTCCCAAAGTGCTGGGATTATAGGCATGAGCTGCCGTGTCTGGCCCAGTATATATTTTTTAAGTTTTAAGTTTTGTGGTACGTAGTAGGTTTATAATATTATTTTGAATCCTTAGTTGTAATTTTATGTCTGCTGATGTGTACATAATTTTTATTAAACTATTTATTTGAGACTTCAGGTATCTTTTTTTTTTTTTTGAGACGGAGTCTCGCACTCTCGCCCAGGCTAGAGTGCAGTGGCGCCATCTCGGCTTACTGCAAGCTCTGCTTCCTGGGTTCACGCCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGAGACTACAGGTGCCCGCCACCACGCCTGGCTAATTTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTTGTGATCTGCCCGCCTCAGCCTCCCAAAGTGCTGAGATTACAGGCGTGAGCCACCGCGCCCAGCCGAGACTTCAGGTGTCTTAGAATTTTTTAAATGTACCCTTTCTGAGAAAAACAGAGACTTAAAGCTAGGATAACTGGTATTCTATTTTTTTTTTTTTTTTTTTTTTACCTCCAGCCTGGGTGACAGAGCAAGACTCTGTCTAAAAAAAAAAAAAAAAAAATTCACTTTAAATAGTTCCAGGACACGTGTAGAACGTGCAGGATTGCTACATAGGTAAACATATGCCATGGTGGAATAACTAGTATTCTGAGCTGTGTGCTAGAGGTAACTCATGATAATGGAATATTTGATTTAATTTCAGATGCTCGTGTACAAGTTTGCCAGAAAACACCACATCACTTTAACTAATCTAATTACTGAAGAGACTACTCATGTTGTTATGAAAACAGGTATACCAAGAACCTTTACAGAATACCTTGCATCTGCTGCATAAAACCACATGAGGCGAGGCACGGTGGCGCATGCCTGTAATCGCAGCACTTTGGGAGGCCGAGGCGGGCAGATCACGAGATTAGGAGATCGAGACCATCCTGGCCAGCATGGTGAAACCCCGTCTCTACTAAAAAATAAAAAAATTAGCTGGGTGTGGTCGCGTGCGCCTGTAGTCCCAGCTACTCGTGAGGCTGAGGCAGGAGAATCACTTGAACCGGGGAGATGGAGGTTGCAGTGAGCCGAGATCATGCCACTGCATTCCAGCCTGGCGACAGAGCAAGGCTCCGTCTCAAAAAAAAAAAAAAAAAACGTGAAAAAATAAGAATATTTGTTGAGCATAGCATGGATGATAGTCTTCTAATAGTCAATCAATTACTTTATGAAAGACAAATAATAGTTTTGCTGCTTCCTTACCTCCTTTTGTTTTGGGTTAAGATTTGGAGTGTGGGCCAGGCACGGTGGCTCACACCTGTAATCTCAGCACTTTGGGAGGCCGAGGCGGGTGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACGTGTTGAAACCCCGTCTCTACTAAAAATATAAAAATTAGGTGGGCGTGGTGGCAGGCACCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGCAGAATCGCTTGAACCCAGGAGGTGGAGGTTGCAGTGACCCAAGATCGCACCATTGCACTCCAGCCTGGGGACAAGAGCGAGATTCTTGTCTCAAAAAAAAAAAAAAAAAAAAAAGGTTTGGAGGGTGGTGAGCTGAGATAGTCAACTATTAACTCCTATCTACCTGCTGGGACTACACTGGTGAGGTGGAGCCTAAGTCCTAAAACAACAAGTGAGGCAGCTGGACGCGGTGGCTCGCATCAGTAATCCCAGCACTTTGGGAGCCTGAGGCGGGCAGATCACAAGGTCAGGAGTTCGAGACCAGCCTGGCCAATATGGTAAAACCCAGTCTCTACTAAAAATACATAAATTGGCTGGGCGTGGTGGTGTGCACCTGTAATCCCAGCTACTTGGGAGGCTGACACAGAAGAATTGCTTGAACTCTGGAGGCTGAGGTTGCAGTCAGCTGAGATCCTGCCACTGCACTCCAGCCTGGCGACAGAGTGAGACTCTGTCTCAACAACAACAAAAGAAAGAACAAGTGAGGCAAAACCTGGAGACCCCAGCTTCATGTAACACCTAGTTTGAGTATTGTTGAGAGTTTTTCAGGAAAAAAGTCTGATAACAGCTCCGAGATAGTCTTAACATATGAAAAAGCAAAAAAGGGAGGAGACAGATCATTTGTCCTATACCTTTCTCTTTTAAGGTTTTAATTATAACTTGTGTAATACAGGAGACCTCTGGGTGTTTTTAGTTGACTATAAACTAAATCTGAGTACACATTTCAGGGCTGCTAAAAATGCTTATTTGAAACTGGGCCGTATTAACACAAGCAGAGGCTCTGGAGCAAGTGAAGTACAGATCCAGAGCCCCACTGTATTCTCCAATGGAGTGATTGCCTGAAAGATGATGTCAGTTTTAAGCACCGTGCTTGGTTTTTAACATGGTCACTGACAAATTGGAGAGTGTTTATCCAGAGGTAGATGGTAAAGATACATAAAAGTAACTTGAAATACTGTCTTTTGAAGAAGAAATGAGAAGATTTAAGGAAATAAGACACTGTCTTCAAGTATCTGAAGAACCGTTACCCGGAAGAGAACTGTTATCTGGAACAGGATTAAGACTCACTCATGGGGCTCCAGAAAGCAGACGAGTGCATGGAGGACGCAGAAGATGCAGATTGTGTGGCTCAACTCTAAAATCTTTCTAACAAAATTAGTTCTCTGGATGTGTTCCAGTTCACTTGATGATGATTCTTTTGTTTTTGTTTTTGTTTTTGAGGTGTAGTTTTTCACTCTTGTTGCCCAGGCTGCTGGAGTGCAATGGCACGATCTTGGCTTGCTGCAACCTCCCCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGAATGCACCACCATACCTAATTTTGTATTTTTAGTAGAGACAGGGTTTTTCCATGTCAGTCAGGCTGGTCTTGGACTCCCGACCTCAGGTGATCCACCTACCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCATCGCGCCTAGCCTATGATGATTCTTTTCACAGAGATACAGGCACTTAAGGAGAGGATCTAAACCCCTTGGACACATTGCCGTTGAACTTCTAAGATCTTAGGTTTCCACTTACTCATGAAAATTATACCACAGGGTCAGAGGGTAGTGTTCATTGGAGCCAGGTGCCAGAACAAGTTATTACAAACTACTATTTTAGAGAAAAATGTCATTAAAGTTTAAGATACCTTAAGCTATAGGTTTGCATCAAAGTTAATGAAAGGTAAAAAGATGCCAAGCGTGGTGGCTCAGGCCTGTAATCCCAGCGCTTTGGGGGGCCAAGGCGGGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTAGTAAAAATACAAAAAATTAGCCGGGCATGGTGGCGGGCATCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCCGTGAGCTGAGATCCAGCCACTGCACTCCAGCCTGGCTGACAGAGCAAGACTGCATCTCAAAAAAAAAAAAAAAAAAAAATGCAAATCAAATCTAAAGTAGTTCAGTCTTTAAACTCAAAGCCAATACATTTGCTTTGAACTACAAATGAACTGAAGTTTTTAAGTGTAATAAATGTTACTAAATCGGCTTTTGTAGCAGTTAAACAAAAAACTTCAAAAATTGTAAGGATTCTGTGAGGGAGCATGGCTGCTGCTGCTGCTGCTGCTTGCAGATAGCCTGCTGTGTTTAGGATTTAGTTAAATACATTTCTCCTGTTTAAAACTAAATGGTCTTTCCTTAGTTTGCTTAGTTCTTCAGAAGGGCCTTTGAAACACTGGGAAATAAACAAGTGATTCTTTAGCTACTGCTTTCTGAAATACTTATATAAAAGCTCTGCACTGTATTCTCCCATCCCTCTCAGGGGAATATTAGAGGGTTAGGACTCCCCAGGTAGACATTCTAGGGGTGAAAATTTGTCATTACATTGACATTTCAGATTTAGGTTTTCAACAATACTGTTTTCTTCTTTCACATATTGCCATCTAGTAATATAGATGTTCTCCGTCCACATTAATCAAAACTATTGACATGGATAATTCCTAATTCCTTGAACACTATAATGGAGATCTATAGCTAGCCTTGGCGTCTAGAAGATGGGTGTTGAGAAGAGGGAGTGGACAGATATTTCCTCTGGTCTTAACTTCATATCAGCCTCCCCTAGACTTCCAAATATCCATACCTGCTGGTTATAATTAGTGGTGTTTTCAGCCTCTGATTCTGTCACCAGGGGTTTTAGAATCATAAATCCAGATTGATCTTGGGAGTGTAAAAAACTGAGGCTCTTTAGCTTCTTAGGACAGCACTTCCTGATTTTGTTTTCAACTTCTAATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTATTTCTCCCCTCCTCCCTTTAACACCTCAGAATTGCATTTTTACACCTAACGTTTAACACCTAAGGTTTTTGCTGATGCTGAGTCTGAGTTACCAAAAGGTCTTTAATTGTAATACTAAACTACTTTTATCTTTAATATCACTTTGTTCAGATAAGCTGGTGATGCTGGGAAAATGGGTCTCTTTTATAACTAATAGGACCTAATCTGCTCCTAGCAATGTTAGCATATGAGCTAGGGATTTATTTAATAGTCGGCAGGAATCCATGTGCAGCAGGCAAACTTATAATGTTTAAATTAAACATCAACTCTGTCTCCAGAAGGAAACTGCTGCTACAAGCCTTATTAAAGGGCTGTGGCTTTAGAGGGAAGGACCTCTCCTCTGTCATTCTTCCTGTGCTCTTTTGTGAATCGCTGACCTCTCTATCTCCGTGAAAAGAGCACGTTCTTCTGCTGTATGTAACCTGTCTTTTCTATGATCTCTTTAGGGGTGACCCAGTCTATTAAAGAAAGAAAAATGCTGAATGAGGTAAGTACTTGATGTTACAAACTAACCAGAGATATTCATTCAGTCATATAGTTAAAAATGTATTTGCTTCCTTCCATCAATGCACCACTTTCCTTAACAATGCACAAATTTTCCATGATAATGAGGATCATCAAGAATTATGCAGGCCTGCACTGTGGCTCATACCTATAATCCCAGCGCTTTGGGAGGCTGAGGCGCTTGGATCACCTGATGTCGGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCGTTTCTACTAAAAATACAAAATTAGCCGGGCTTGGTGGCACTTGCCTGTAATTCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGGGGTTGCAGTGAGCTGAGATCGCATCATTGCACTCTAACCTGGGCAACAAGAGCAAAACTCCATCAAAAGAAAAAAAAAATCGGGTGCAGTGGCTCATGCCTGTAATCCTAACACTGTGGGAGGCCAAGACAGGCAGATTGCCTGAGCTCAGGAGTTCGAGATCAGCCTGGGCAACATGGTGAAACCCTGTCTCTACTAAAATACAAAAAATTACTCAGCGTGGTGGCATGCGCCTTTAGTTCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCTCTTGAACCCGGGAGGTGGAGGTTGCAATGAGCCAAGATCGTGCCACTGCACTCCAACCTGGCAACAGAGCGAGACTCCGTCTTAAAAAAAAAAAAAATTTTGCAGCGCAAACCAGGATATCCTCTGTTCTCATTTGTTCTAGATTTCAAAAGAAACAGTCCTTTCTTTGGGGAAAAGAGAAAGGAAAAGGAGTTTTATAAAAGGAAAGAAAAGATTCATAAGAACAAGAAGTGGGCCCACTTGCATATACCTTTGTAGAAAACTGTTCACTGTTGTTGAAGAAAAGCTCTTCATATTAATATGCAGTCCAGATGCAGTGGCTCACACTTATAATCTCAGCCCTTTGGGAGGCTGAGACAGGAAGATTACTTGAGGCCAGGAGTTTGAAACCAGCCTGGGCAACATAGTGAGACTCTGTCTCCACAAAATTTTTTTTTAATTAGCCGGGCATGGCAGTGTGCTTCTGTAGTCTTAGCTACTGAGGAAGCTAAGCCAGAAGAATCACTTGAGCCCAGGAGTTCAAGGCTGCAGTGAGCTATGATCATACCATTGCACTCTTGCACTTGCACAGAGCAAGACCCTGTCTCTTAAAAAAAAAAAAGTGTGTGTGTGCATATGCATATATACATATATATACATGCAAATGTATCTGTTTATAATTCAGATTGCTTCAAAAAGATGTTGCACTTTATGATACTGAGAACAGTGAGAAGTAAATAAGATAGAGTGTAGGAGGAGGAATAATTTCAGAACAGCCATCTGAGAACTTCTGTGACAACAGATCAGGCAAAATGAAATGTGAAAGTAATTTTATAGGCCAGGCGTGGTGGCTCATGCCTATAATCCCAGCACTTTGAGTGGCCAAGGCAGGTGGATCACTTGAGGTCAGGAGTTCGAGACCAGCCTGGTCAACATGGTGAAACCTTGTCTCTACTAAAAACACAAAAAAATTAGTCGAGCGTGGTGGCATGTGCCTGTAATCCTAGCTGCTGGGGAGGCTGAGGCAGGAGAATCACTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCTAGATTGCACCACTGCACTCCAGCCTGTGAGACAGAATGAGACCCTGTCTTAAAAAAAAAAAAAAAGTAATTTTATAAACTATTGTGCACAATTCGATGTATTCATAATTAATTAAATGATTATTTTTGTTGGTTTTAACTTTTATTCAGTGGCTATTTATTGGGAGCCTACTGTGTTCTGGGCACTAGGAATGCAACAGTAAATAAGACTAACTAAGTCCCTGGTAGGATTCAGGTTCTGTCGAGGGGAGATACACAATAAAGATGAATTTAAGATAACAATAAATGCTATGGAGAAATATACAGAACAGTGGAATAGTATTAGCTGTCAAAGGTTGTTGATTACTTTCGTTTAAGGAGGCCAGGGAAAGCCTTTCTGAAAAAATTGAGCTGAGACCTAAATAACAAGAAATAATTGTCCTTGAAAAATGAAGGGAATGCATCTTATAGGCAGAGGAATAGCAAACATAAAGGTCTTGAGGTAATAATGAGTGTGGTTTTTTGATTTCTGTATTTTGGTTTTTTTGAGATGGTGTCTCCCTCTATCCCCCAGGCTGGAGTGCAGTGGCACAATCTTGGCTCACTGCAAACTCTGTCTCCTGGGTTCAAGCAATTCTCCTGCCTTGGCCTCCTGAGTAGCTGGTATTACAGGCACGCGTGCTACCACACCCGACTAGTTTTTATTTTTAGTAGAGATGGGGTTTTACCACGTTGGTCAGGCTGGTCTCAAACTCCTGAACTCAAGTGATCCAACCACCTCAACCTCCCAAAGTGCTGGGATCACAGGTGTGAGCCACTGTGCCCGGCCAGAGCTTGGTTTATTTTTTAAAAGATAGGCCAATGTTGGTCGTGTGTGGTGGCTCGTGCCTATAATCCCAGCACTTTGGGAAGCCAAGGCAGGCAAATCACTTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAAACTAGCATGGTGTGGTGGTGTGTGCCTGTAATCCCAGTGCCTGTAATCCCAGCTACTCCAGAGGCTGAGGCAGGAGAATCACTTGAACCGAAAGGTAGGAGTTACAGTGAGCCAAGATCGCATCACTGCACTCCAGCCTGAACGACAGAGCAAGACTCCTGTCTCAAGAAATAATAATGATAAAAGGTTCGGGCACAGTGGCTCACACCTGTAATTCCAGCACTCTAGGAGGCCGAGGCAGGCAGATCCCCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACGTGGCAAAACCCCATCTCTACTAAAAAATGCAAAAATTAGCTGGGCACGGCTGGGTGTGGTGGCTCATTCCTGTAATCCCAGCACTTTGGGAGGTCAAGGCGGACAGATCACTGAGGTAGAAACCCTGTCTCTACTAAAAATACAAAAATTTGCCCAGCGTGGTGGCGCGTGCCTCTAATCCCAGCTACACGGGAGGCTGAGACAAGAGAATCACTTCATCAACCCGGGAGGTGGAGGTTGTGGTGAGCTGAGATCGCACCATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCCATCTCAAAAACAAAAAAAAATTAGCTGGGAATGGTGGCATGTGCCTGTAATCACAGCTACTTGGGAGGCTGGGGCAGGAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCTGAGATTGCGCCACTGCACTCCAGGCTGGGCGAAAGAGCAAGACTCCGTCTCAAAAATAATAATAATAATAATAATAGGCCAGTGTAGCTGGAGTAATTTGCAAATTATGTGTGGAGGCAGAGATTACACAAGGAATGGGAGAAGGTCATAGATGAGGGCCAGATCACATAGTATTTGGTGGTAAGGAATTCAGATTTTATCCTTGTGGTAATTGGTGGTGTGGAGATGGTTAAAAACAAGGTTGGTTTGGGATGGGTTTGAAGAGAGGACTTGCTAATGGATTAAATTTGGAGGATAAGGTAAAGAGAAATTGAAGGAGTGACACTTGGGTTTTGGCTTGAACAATAGATCTTGTTAGTAATATTAAATTAGATGAAGAAGGCATGGTAGGGAATATGGGGGAGTGGGAAAGGCAGGAAGCAGGAATGGAACCAGGAACTCTGTTTTAGATGTGAGAATTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTTGTGACAGCATCTCGTTCTGTTGCCCAGGCTAGAGTGCATGGAGTGCGGTAGCACGATCTCAGCTCACTCCAACCTCCGCCTCCCGGTTCAAGTGATTTTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCACCTGCCACAATGCCTGGCTAATACTTGTATTTTTAGTAGAGATGGGGTTTTACCATGTTGGCCAGGCTGGTCTTAAACTCCTGACCTCAGGTAATCCACCCACCTCGGCCTCCCAAAGTGCTGGGATTGCAGGTGTGAGCCACTGTGCCCGGCCAGATGCATGAATTTTGAGATGTATACTAGACTTCTGGATAGAGAAGTTAAGTAGGCAGTTGGACACATTGTATGAAGCTCAGGGGTACAAGGAGGACTATGAACATGGGAGTCTTCTGACAAATTTATCACTAGACTCCTCATTCAAGTAACTAGGAAATGTCAGATATTCTTCCCCTAGTAATAGCCAGTGGTTATACTCTTGCCTTTAGTTTTCTTCACAATACTCTTGGCAACACATAAGGCCTTCCCTACAATCTGAGTTTCAGTCAGAATTGTTTCTGAGCGTTCTTCCTCAAATTTCTCCCCAGTCTCATTATTCTTTATTCTCATGTCCATGACCAGTCATAATAGTAATTATGAAAAACCTCTAACTTTCTTTAGTGCATTGAATGTATATTTTATCATTTTGGTTGTGTTAACTGTAAATCTCTCAGTGGAAATCTGAAAAGCCTTTATTTCCTTAGATGATAATATACAATTGATTTAGGAGATAGGGAATTTTTCAGTTACCTTTATAACAGCACAGTATTAGCAGTCTAATCTAAATGCTAAGTGAATGTTTTGAGAGGAGATAGATGTTGAAAATTAAAATACATTAAGTCCCAGTGAGGTGAAAAGCCGATTGTTAAGTTCTGCACACAAAAGATTTGCTTCAGTGAATTGATTTCAACAGCTGAGATCCTAGTCATTTCACCTGGTCTACCAAAAAGAATGATTTTACTTGCTTTTGGTCAAATCTCTGCCCAGCAATTCTTTTTCTTTCTTTCTTTTTTTTGTTTTATGTGTGTGTGTGTGTGTGTTTTTTTTTAGCAGAGTCTCACTTTGTCACCCAGGCGGGAGTGTGGTGGTATGATCACAGTTCACTGCAGCCTCCAACTCCTGGGCTCAAGTGATCCTCCAGCTTCAGCTTTTCAAGAAATTGGGACTGCAGGCACATGCAACTATGCCTGGCTGAGGTTTTATGTATCTTTTTTCTAGAGAAGGGGTCTCACTGTGTTGCCCAGCTGGGTCTCCAGCTCCTGGTCTCAAGCTGTCCTCCTGCCTCAGCCTCCCAAAGTGCCAAAGTGCTAGGGTTATAGGTGTGAGCCATTGGTGCCCAGCTACTGCCTGCCTGGCAATTCTGAATGCCTTAAATTTTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTTTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCATGATCGTGGCTCACAGCAACCTCTGCCTCCTGGATTCCAGCAATTCTCATGCCTCAGCTTCCCGAGTAGCTGGGACTACAGGTGCATGCCACCACGCCCAGCTAATTTTTGGTTTTTTTGTTTGTTTGTTTGTTTGTTTTGAGACGGAGTCTCGCTCAGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTCCGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCTGCCACTACACCCGGCTAATTTTTTTGTATTTTAAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCTGTCTCGGCCTCCCAAAGTCCTGGGATTACAGGCGTGAGCCACCACACCCGGCCTAATTTTTTTTTTTTTAATTTTATTTTTAATTTTTTGAGATGCGAGATGGAGTCTCGCTCTGTTACCCAGGCTGGAGTGCAGTGGCACCATCTCAGCTCACTGCAACCTCCACCTCCTGCATTCAAAAGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGTGCCTGCCACCACGCCCAACTAATTTTTTGTATTTTTAGTAGAGATGAGGTTTCACCATGTTGGTCAGACTGGTGTCGAACTCCTGACCTCAAGTGATCTGCCTGCCTCAGTCTCCCAAAGTGCTAGGATTACAGGGGTGAGCCACTGCGCCTGGCCTGAATGCCTTAAATATGACGTGTCTGCTCCACTTCCATTGAAGGAAGCTTCTCTTTCTCTTATCCTGATGGGTTGTGTTTGGTTTCTTTCAGCATGATTTTGAAGTCAGAGGAGATGTGGTCAATGGAAGAAACCACCAAGGTCCAAAGCGAGCAAGAGAATCCCAGGACAGAAAGGTAAAGCTCCCTCCCTCAAGTTGACAAAAATCTCACCCCACCACTCTGTATTCCACTCCCCTTTGCAGAGATGGGCCGCTTCATTTTGTAAGACTTATTACATACATACACAGTGCTAGATACTTTCACACAGGTTCTTTTTTCACTCTTCCATCCCAACCACATAAATAAGTATTGTCTCTACTTTATGAATGATAAAACTAAGAGATTTAGAGAGGCTGTGTAATTTGGATTCCCGTCTCGGGTTCAGATCTTAGCTGATAAGTGGAAGAGCTGGGACTTTAAGCAGATGAGAATCTAAAGACTTTGCTCTTTTCACTTCACTGGGGTGTCTTTCTCTCTCTCTCTCTTGCTCTCTCTCTCTCTTTTTTTTTTTCCCAAGACGGAGTCTCACTCCATTGCCCAGGCCAGAGTGCAGTGGTGCGATCTCAGCTCACTGAAAACTCATCTTGCCCAGGCTGGTCTTGAACCCCTGACCTTGTGATCCTCCCGCCTTGGCCTCCCCAAGTGCTGGGATAGGCGTGAGCCACCGTGCCCAGCCAATAATAGCTAAAATTTATATAATGTTCACTGGGCCAGGCACAGCGGCTCGTTCCTGTTATCCCAGCACTTTGGGAAGCTGAGGCAGGCAGATCGCTTGAGCCAAGGAGTTCGATACCAGCCTGGGCAACATGGCAAAACCCCATCTCTACCAAAAAAAATATACAAAAATTAGCCAGGCGTGGTGGCATGTACTTGTAGTTCCAGCTACTCGGAAGGCTGAGTTGAGAGTATCTCTTGAGCCCAAGAAGAGGGGACTACAGTGAACGGAGATTGCGCCACTGCACTCCAGCCTAGACGACAGACAGAAGATCTCAAAAGAAAAAAAAAAAAAAAAGATCACTTTATGCTGGGACTGCTCTAAAGGCCCAACCATGTTTTAACTAATTAACAATTTTATGACAACTCTATGAGCTATGTACTGTAATTATGCCTATATTACAGATGTGAAAATTGAGGCTCAGAGAGGTTGAATAAGTTGCTCAAAGTCACACAGGTAATAAGTGATGGAACTAGAAGTTGAACTCAGGAAGTCTAGCTCCAAGTCTAAATTCTTTGTTAATTTATTTTTCGGGCCAGAGTCTTACTCTGTCACCCAGGCTGGAGTGCAGTGCCACTATCTCTGCTCACTGCAACCTTCACCTCCCAAGTTCAAACCTTGTTCAATTCTTGTGCCTTGGCCTCCCAAGTGGCTAGGATTACAGGCATGTGCCACAACAACTAGCTAATTTTTTGTCTGATTCTGTTGGCCAGTCTGGAGTGCAGTGGCGCAATCTCAGCTCACTGCAGTCTCCAGCTCCCAGGTTCAAGTGATTCTCGTGCCTTAGCCTCCCAAATAGCTGGGATTACAGGCACGTGCCACCACACCGAGATAGTTTTTTGTATTTTTAATAGAAACAAGGTTTCAACATGTTGGCCAGGCTGGTCTCAAATTCCAGACCTCAGATCATCTGCCCGCCTCAGGCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACTGCACCCGGCCTTAATTTTTATATTTTTATTAGAGATGGGGTTTTGCCATGTTGGCCAGGCTGGCCTTGATCTCCTGGCCTCCAGTGATCCACCCGCCTTGGCTTCCCAAAGTGCTGGGATTACAAGCATGAGCCACTGCACCCGGCCTCCAATTCTAAACTCTTAACAACAATACTATAGTTTCTTGAAAAGTTGTTGAAGGCTTCACGGAGGGAAAAAAAATGGAGCATTCTAACAACTTTGCAGATGAGACCCAAGAAGACTCAATGACTTTCTCCTGATCATATTGTAGCAGATGACTTAGCCAGAACTCTGACTTCCTCACAGGGAGAAAGTCTGCAAGATTTCACACTTACCTGTCAGGCCTGAGCTGGCTGCTTTCTCAGCTCCCTAAGTGCTATGTTCCCAGTCTGCTTTTCTTCCTTTTTCAAGTGTGCACTACCAGGCATTTCAGAACATCCCAGGCTGGTCGCGGTGGCTCACACCTGTGATCCCAGCACTTTGGGAGCCCAAGGCGGGTGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAGTTAACTGGGCGTGGTGGTAGGCACCTGTAATCCTAGCTCAGGATTACTCGGGAGGCTGAGGCTAGAGAATCGGTTGAACCCAGGAGGCGGAGGTTGCAGTGAGCCAAGATTGCGCCACTGCACTCTAGCCTGGGGACAAGAGGGAGACTTCATCTCAAAAAAAAAAAAAAAATCCCAGCTGGGCACAGCGGCTCACTTCTGTAATCCCAGCACTTTAGGAGGCCAAGGCAGGAGGATCACTTGAGCCCAGGAGTTCAAGACTAGCCTGGGCAACATAGTAAGACCCTGTCTCTACAAAAAAATTTAAAAATTAATTGGGTGTCGTAGCACACTCTTGTATTCCCAGCTACTCAGGAGGCTGAGGTGAGAAGAATGCTTGAGTCTGGGAGGTCGAGGCTGCAGTGAGCCATGATGGTGCTACTGCACTCCAGCCTGGCCAACATTGTGAGACCTTGTCTCAAAACAAAACAAAACATCCTTCTACTGAGCACTTTCTGTCCCTTTATAGAAACTTAAGAGGGAACCAGTAGAGGTAATTTCCTAAGGAAAACTGCTTTGGGACATGATCACAAATGAAGCCTGGAGTTTTGAACTGCTGAGGTCAGCCTGTTTTTACCTTCTGAGCCTATCAAGTAATTGTTCCAGATGCCAAGAAAAGCTGCTGGCCTTATTTCTGCTTCTGCCTTTACCACAGGGGAGCGCCATGTGAGCCAGTCCTCTGTTTTTCCTCCACTGTATGCTAGGCAGTATTAGCACCAGATTCTTCCCCTCTTTAAAAAGAAATTCTAGTGCTTTGGATTTTTTCCTCCATGCAGAATAGCAATGATGGAAAGTATGTGGTCAAAGTAATGACATTCTGAAAATACTAAATGTCACCATAGTATTTTTCTCTGGAAGAGAAATGTATATGTAGAGGTGAAACTTCAAATTTCTTTTTTTTTTTTTTTAAGACGAAGCTTTGCTCTTCTTGCCCAGGCTGAAGTACAATGGCGTGATCTTGGCTCACCGCAATCTCTGCCTCCAGGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTAAGTAGCTAGGATTACAGGCATGTGCCACCACGCCCAGCTGATTTTGTATTTTTAGTAGAGATGGGGTTTCTCCATGTTGGTCACGCTGGTCTTGAACTCCCGACCCCAAGTGATCCACCCACCTCGGCCTCCCAAAGTGCTAGGATTACAGGCCACCGCGCCCGGCCTGAAACTTCAAATTTCTTTTTTTTTTTGAGACAGAGTCTCGCTATGTCACCCAGGCTGGAGTGCAGTGGCGCCGTCTCGGCTCACTACCAGCTCCACTCCACCTCCTGGGTTCACACCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGTGCCCGCCACCATGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGTTTTCACTGTGTTAGACGGGATGGTCTCCATCTCTTGACCTCGTGATCCGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACTACGCCAAGCCCGAAACTTCAAATTTCTTATCTCATAACTAGGCATCCTCATCACTGAGTGTTAGCCTGGATATAAACATTCCTAATCTTTTGTACTTTTCATGTCAGCATTTGGCTCCACTTGGCTGCCTGGGGAGAACTTCTAGCATTATGAGCATGCAGGTCCTATCAACAGGTTGGGGGTGCGGTTTATTCATACAGGTAGTGAGAGTGGCACAGATGGATGCTGTCCCTTAAAACAAACAGACTTGTCTTTGGGAGCCTGAGGCGGGTGGATCATGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATAGTGAAACCCCGTTTCTACTAAAAATACAAAAAATTAGCCGGGTGTGGTGGTGTGCACCTGTAATCCCAGCTACTAGGGAGGCTGAGGCAGGAGAATCACTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCGAGATGGCACCATTGCACTCCAGCCCAGGCGACAGTGCAAGACTGCGTCTCAAAAAAAAAAAAAAAACACACAGACTTGTCCTACTGCCATTTCTTTTCACTCTGGCGGTAAAGTAAGAGAGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTTTCTGTCCGTCTGTCTGTCAAGGGGGAGGGTGACCACTTTCTAAAAGGCCATCCGTGTATTTTTAGCTTCCTGATTTTTTTCTCTATCGCAGTCTCTTTGAAGCCAGGTGAATTTTAGGCCTTGGCAATTTTCTTTTTATTGCAATGGGAAGGTCAAGACACTGAGAGTCACCCAAAACATATCCATCCAAAATGATACAATTTTAGGGTTTATTTTTAAGTGATACCCAAGTTATTTGCTAAGAACCTATGCCAGTGTGTTTATGAGAATTTGCACTGTCCCACACTGTTGCCACCAGCCACATGGGACTGTTTAAATTTAAATTTTACAAATTAGCCAGTCATGGTGGTGTGCACTTGTAGTCCCAGCTACTTAGGAGGCTGAGGCAAGAGGATTGCTTGAGCCCAGAAGTTCAATACTGCAGCAAGCTATGATCGTGCCACTGTACTCCAGCCTGAGTGACAGAATGAGACCTCATCTCTTCAAAAAAAAAGAAAAAAATTAAAATATGAAGTTTAGTTCTTCATTCACCCTAACCACATCTCCAGTGCTCAATAACTATATGTGACTCATGGCTACCTTATTAGCATAGATATAGAACATTGTGACTATCACAGAAAGTTGTTTTGAACAGTGTTGCCAAGCCCTGTAAGTGGAAGAGGCAGTGCAGTGTGATCTGTGTCTTCAGGAAACCAGGTAGTCAGACTAGTTCAATGAGGAGAGGCAGAACCTGGCTTCACTTCTAGATTAAAAACTGCTTAGGTGGCCTAAAGATACAATGGCCATTCTCAGAGTAGTGAGAAGGAAGGAACAGATGTTTAGGGGGCTAGAAGAAAGTCAGAGAGGGCCGGGCGCAGTGGCTTATGCCTGTAATCCCAGCACTTTGGAAGGCCAAGACAGGCAGATCACGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGTGCGCGCCTGTAATCCCAGCTACTCAGGAGGCTAAGGCAGGAGAATCGCTTGAACCCTGGAGGCAGAGGTTGCAGTGAGCCCAGATCGCACCACTACGCTCCAGCCTAGGTGACAGAGAGAGACTCCGTCTCAAAAAAAAAAAAAAAAGTCAGAGGAGACAAGGAGCATGTACACCTAAAATCAACATAGACCCCTCTGTTGATGGGGTCATAGTGAGTACTTGAGGTACCAAGTCTGGATAAACATCAAACTTCAGCCAATAACTTTGAGTTTCTAGCCATCCAAGCCTCTTATTAAACATACAGAAGGACCTTTTTTCCCTTGCATCTAACAAGTTAAAGCACCTGCAGAGATCATTAGGGAGGAGCCTTGGCCTGATTGGTGACAAAAGTGAGATGCTCAGTCCTTGAATGACAAAGAATGCCTGTAGAGTGCAGGTCAACTACATATGCACTTCAAGAAGATCTTCTGAAATCCAGTAGTGTTCTGGACATTGGACTGCTTGTCCCTGGGAAGTAGCAGCAGAAATCATCAGGTGGTGAACAGAAGAAAAAGAAAAGCTCTTCCTTTTTGAAAGTCTGTTTTTTGAATAAAAGCCAATATTCTTTTATAACTAGATTTTCCTTCTCTCCATTCCCCTGTCCCTCTCTCTTCCTCTCTTCTTCCAGATCTTCAGGGGGCTAGAAATCTGTTGCTATGGGCCCTTCACCAACATGCCCACAGGTAAGAGCCTGGGAGAACCCCAGAGTTCCAGCACCAGCCTTTGTCTTACATAGTGGAGTATTATAAGCAAGATCCCACGATGGGGGTTCCTCAGATTGCTGAAATGTTCTAGAGGCTATTCTATTTCTCTACCACTCTCCAAACAAAACAGCACCTAAATGTTATCCTATGGCAAAAAAAAACTATACCTTGTCCCCCTTCTCAAGAGCATGAAGGTGGTTAATAGTTAGGATTCAGTATGTTATGTGTTCAGATGGCGTTGAGCTGCTGTTAGTGCCAACATGTTAGTGAGAAAATATCTTTGGATAGGTAAAAATCAAGGAGGAGTTCTCCTCTTCCTAAACCATCTTAATTTACTTACATAGAAGAAAGCACAGCAGCTGGCCCACCACGGACGGGCCCAGAGCAGGGGAAGATTCTCGGTGAACATTTCTTTTTTTTTTTCTTTTTTTTTGAGGTCGAGTCTCTGTTGCCCAGGCCAGAGTGCAATGGCGCGATCTCGGCTCACTGCAACCTCCACCTCCCGGGTTCAAGTGATTCTCCTGCCTCAGCCTCCCAAATAGCTGAGACTACAGGCGTGTGCCACCACGCCCGACTAATTTTTTGTATTTTTTTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAATATGGTCCCGATCTCCTGACCTCGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTAGGATTACAGGCATGAGCCACTGTGCCCAGCCCTCTCCATGAACATTTTCTAATTAAACTTGACACTTAATACAATGTTATGCTTAGGACTGCTATAAAGCTTACCTCTGGAGTTGCGCAGCACAAAGGCCTTGGTGTGTGTATAAATTTGGTTTGTTCTTTTCACAGCAAAAGCTACCCACCTTTGCCTCCTGTGCCTGCTTCTGCCCAGGGACTTAGGTCCTCTTACACCTTAGAGAAAGGCCTTAGCATCTGGTCACAGGCAGATGAGTGACAGCAAGAAAACCTGGCTGCAATGTAATTTTGTTTCCATCCTCTTTATTAGTTATCAATTGGATTTTTATGAAATTTCCAAGTTCCACTCAAGGATTTCTCAGTGTTTTTTTACTTTGGTATAGTGGAAACCAGGGTTGCCAGAAAGTATTATTTTGGGGGTGAGTTAGTCAACCTTCGTTCAGTCAGACAGACAGGAGCACCTCAGCAATTCCCAGAAACGGGCTGATGGGAAAGAGCAACATACATGAATGTCTTGAAGAACACAGCCAACAGAGCCCATTGGGCAGTTCTGATTTTCCAGGTACACAGCATCTCCACAGTCTCTTCTGATTTTTATTCCCCTGAGTATATGGATTCCAGCTCAGCATGTAGCCTTTCCCTGCTGAGTCTCTAACCAGGATAACATGTATTTTTTTGACTGGATGAATTATCTTCCCATCTCTTGACATTTACAGTAATTACCACCAAGTATGGTATTTTCAGTGGCCGTGATTATCAGTTACCAACACAGAATTAGGATGAAGGGAGGAAGGGAGGGAAGGAAGGTGGGTGTTTTTTCACACAGTGTCTTAGCCAGCAATTTAGCAAATTAATGGAAATTAGATCTTTGATTTTTTTTTCTTTCAAGCATTTTATTTGAGAGACTATCAAACCTTATACCAAGTGGCCTTATGGAGACTGATAACCAGAGTACATGGCATATCAGTGGCAAATTGACTTAAAATCCATACCCCTACTATTTTAAGACCATTGTCCTTTGGAGCAGAGAGACAGACTCTCCCATTGAGAGGTCTTGCTATAAGCCTTCATCCGGAGAGTGTAGGGTAGAGGGCCTGGGTTAAGTATGCAGATTACTGCAGTGATTTTACATCTAAATGTCCATTTTAGATCAACTGGAATGGATGGTACAGCTGTGTGGTGCTTCTGTGGTGAAGGAGCTTTCATCATTCACCCTTGGCACAGTAAGTATTGGGTGCCCTGTCAGAGAGGGAGGACACAATATTCTCTCCTGTGAGCAAGACTGGCACCTGTCAGTCCCTATGGATGCCCCTACTGTAGCCTCAGAAGTCTTCTCTGCCCACATACCTGTGCCAAAAGACTCCATCTGTAAGGGATGGGTAAGGATTTGAGAACTGCACATATTAAATATACTGAGGGAAGACTTTTTCCCTCTAACTCTTTTTCCCATATGTCCCTCCCCCTCCTCTCTGTGACTGCCCCAGCATACTGTGTTTCAACAAATCATCAAGAAATGATGGGCTGGAGGCTGGGCATGGTGGCTCATGTCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGTGGATCACTTGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTGTACTAAAAAAAAAAAAACAAAAAGTAGCCAGGCCTGGTGGAGCATGCCTGTAATGCCAGCTATTTGGGAAGTTGAGGTGTGAGCATCGCTTGAACGTGGGAGGCAGAGGTTGCAGTGAGCCAAGATTGCACCACTGCACTCCAGACTGGGTGACAGAGTGAGACTTTGTCTAAAAAAAAAAAAAAAGAGAGAGAGAGAAAAGCTAGGTGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCAGATCACGAGGTCAAGAGATCGAGACCATCCTGGCCAACCAACATGGCGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTGGGCGTAGGGGCGCACGCCTGTAGTCCCAGCTACTTGAGGGGCTGGGGCAGGAGAATCGCTTGAACCCCGGAGGCGGAGGTTGCAGTGAGCCAAGATCGCGCCACTGCACTCCAGCCCGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAGAGAGAGAGAGAAATGATGGGCTGGGCCAGTGCCCCACCCCTGTAATCACAACACTGGGAGGCCAAGGTGGGAGAATCGCTTGAGCCTGGGAGCTGAAGACCAGCCTGGGCAATACAGTAGGACCTCATGTCTACAAAAAAATTATTAAAAATTAGCCAAGGCTGGGTGCGGTGGCTCATGCCTATAATCCCGGGGGTGAAGTTGAGCCCAGGAGTTTGAGACCAGCCTGGGCAACATGGCAAAACCCTGTCTCTACCAAAAATACAAAAAAATTAGCCAGGGGTGGTGGTACGTGTCTGTAGTTCCAGCTACTTAGGAGGCTGAGATGGAAGGATTGCTTGAGCCCAGGAGGCAGAGGTGGCAGTGAGCTGAGATCACACCACTGCACTCCAGCCTGGGTGACAGAGCAAGACCCTGTCTCAAAAACAAACAAAAAAAATGATGAAGTGACAGTTCCAGTAGTCCTACTTTGACACTTTGAATGCTCTTTCCTTCCTGGGGATCCAGGGTGTCCACCCAATTGTGGTTGTGCAGCCAGATGCCTGGACAGAGGACAATGGCTTCCATGGTAAGGTGCCTGCATGTACCTGTGCTATATGGGGTCCTTTTGCATGGGTTTGGTTTATCACTCATTACCTGGTGCTTGAGTAGCACAGTTCTTGGCACATTTTAAATATTTGTTGAATGAATGGCTAAAATGTCTTTTTGATGTTTTTATTGTTATTTGTTTTATATTGTAAAAGTAATACATGAACTGTTTCCATGGGGTGGGAGTAAGATATGAATGTTCATCACAAAAACATAAATCAAGGCCGGGCATGGTGGCTCATGCCTATAATTCCAGCACTTTGGGAGGTCAAGATGGAGGTCAAGGTGGGAGCCTAGAAGTTCGAGACCAGCCTGGGCAACATAAGGAGACTTCATCTGTACAACAAATTTAAAAAGTAGCTGGGTGTGGTGGCAGATGCCTGTAGTCGCAGCTACTTGGGAAGCTGAGGTGGGAGGATCACTTGAGCTCAGGAGGTTGATGCTTCAGTGAGCCACGATCACACCACTGTACTCCAGCCTGGGCGACAGAGCGAGACCGTGTCTCAAAAAGAAAAAAGAAAGTATAAATTTACACAAAAACAATAAAATAATCCCAGTAATTCCACCACTTGGAGATGATCACCATAAAACTCCACCAGGCATATGTGCGTATATATACACGTGTATTTTATAAAATGTGATCATAATTACACTGTTTTGCTTTTTTCCTTAAGATATTACATACATTTTTCCACATCGTTAAATTACAGTGCTGTTTTCCTGGTGGCTTTCCTTTAACAGATTGAAGTTCATGTTAATACAGTTGCCAGAGGCTGTGGGCTTTCACTGTCACCAGGAGTCACTCCTAGGGCCTCTTCAGAGCAAGGCCTTATGTCCTGAAGCATTGCCTTTTTTTTTTTTTTTTGAGGTGGAGTCTCACTCTGTCACTTAGCAGGCTGGAGTGCAGTGGCCCAGTCTTGGCTCACTGCAACCTCCGCCTCCTGGGTTTAAATGATTCTCCTGCCTCAGCCTCAGGGCGGATCACCTGACATCAGGAGTTTGAGACCAGCCTGGCCAATATGGCGAAACCCCATCTCTACTAAAAATACTAAAAAAAATTAGCCAGGCATGGTGGCACGCACTTGTAGTCCCAGCTACTTGGGAGACTGAGGCAGGAGAATCGCTTGAACCCAGGATGTTGAGGTTGCAGTGAGCTGAGATCACACCATCACAATCCAGCCTGAGTGACAGAGTGAGACTCCATCTGAAAAAAAAGAAAAAACAATTAGCCTGGCATGGTGGCAGGCACCTGTAATCCCTGCTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATCGTGCCATTGCATTCCAGGCTGAGCAACAAGAGCAAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAGGCCAGGTGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGTGGATCACCTGAGGTCAGGAGTTCCAGAGCAGCCTGGCCAACATTGTGAAACCCCCGTCTCTACTAAAAATACAAAAATTAGCTGGGTGTGATGGCATGTGCCTGTAATTCCAGCTACTCAGGAGGCAGAGACAGGAGAATTGCTTGAACCCAGGAGGCGGAGGTTGAATGAGCCGAGATTGCGCCATCACACTCTAGCCTCGGCGACAGAGCAAGACTCCGTCTCAAAAAAAAAAAAAAAAAATTAGCTTCTACCTCATTAATCCTAAGAACTCATACAACCAGGACCCTGGAGTCGATTGATTAGAGCCTAGTCCAGGAGAATGAATTGACACTAATCTCTGCTTGTGTTCTCTGTCTCCAGCAATTGGGCAGATGTGTGAGGCACCTGTGGTGACCCGAGAGTGGGTGTTGGACAGTGTAGCACTCTACCAGTGCCAGGAGCTGGACACCTACCTGATA,dna,BRCA1 Locus,GCF_000001405.13,,Protein coding +47,urn:mavedb:00000097-h-1,TTAATTTCAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTGTAAGTGTTG,dna,BRCA1 Exon 5,GCF_000001405.13,,Protein coding +48,urn:mavedb:00000061-h-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +49,urn:mavedb:00000066-a-1,ATGGACGAGGAATACGATGTGATCGTGCTGGGGACCGGTCTCACCGAATGCATCCTGTCGGGCATCATGTCTGTGAACGGGAAGAAGGTGCTGCACATGGACCGGAACCCCTACTACGGGGGCGAGAGCTCCTCCATCACACCCCTGGAGGAGCTGTATAAGCGTTTTCAGTTGCTGGAGGGGCCCCCTGAGTCGATGGGCCGAGGCCGAGACTGGAATGTTGACCTGATTCCCAAATTCCTCATGGCTAACGGGCAGCTGGTAAAGATGCTACTGTATACAGAGGTGACTCGCTACCTGGACTTCAAGGTGGTGGAGGGCAGCTTTGTCTACAAGGGGGGCAAGATCTACAAAGTGCCGTCCACTGAGACTGAGGCCTTGGCTTCCAATCTGATGGGCATGTTTGAGAAACGGCGCTTCCGCAAGTTCCTGGTGTTTGTGGCAAACTTCGATGAGAATGACCCCAAGACCTTTGAGGGCGTTGACCCCCAGACTACCAGCATGCGTGACGTCTACCGGAAGTTTGATCTGGGCCAGGATGTCATCGATTTCACTGGCCATGCCCTGGCGCTCTACCGCACTGATGACTACCTGGACCAGCCCTGCCTTGAGACCGTCAACCGCATCAAGTTGTACAGTGAGTCCCTGGCCCGGTATGGCAAGAGCCCATATTTATACCCGCTCTACGGCTTGGGCGAGCTGCCCCAGGGTTTTGCAAGATTGAGTGCCATCTATGGGGGGACATATATGCTGAACAAACCTGTGGATGACATCATCATGGAGAACGGCAAGGTGGTGGGCGTGAAGTCTGAGGGAGAGGTGGCCCGCTGCAAGCAGCTGATCTGTGACCCCAGCTACATCCCGGACCGTGTGCGGAAGGCTGGCCAGGTTATCCGCATCATCTGTATCCTTAGCCACCCCATCAAGAACACCAACGACGCCAACTCCTGCCAAATAATCATCCCCCAGAACCAGGTCAACAGGAAGTCAGACATCTACGTGTGCATGATCTCCTATGCACACAACGTGGCGGCCCAGGGCAAGTACATAGCTATTGCCAGCACTACTGTGGAGACCACGGACCCTGAAAAGGAGGTGGAGCCGGCTCTGGAGCTGTTGGAGCCCATTGACCAGAAGTTTGTGGCTATCAGTGACTTGTATGAGCCCATTGATGATGGTTGTGAGAGCCAGGTGTTCTGTTCCTGCTCCTACGATGCCACCACACACTTTGAGACAACCTGCAACGACATCAAAGACATCTACAAACGCATGGCTGGCACGGCCTTTGACTTTGAGAACATGAAGCGCAAACAGAACGACGTCTTTGGAGAAGCTGAGCAGTGA,dna,GDI1,GCF_000001405.26,P31150,Protein coding +50,urn:mavedb:00000021-a-1,GGCGTGTCCGCCTGTTGGAATATGCTTCTCAGGTCTTCTGGGAAACAGATGTTTTGTGGAAGTGGAAGATTTTGGAAGTAGTGCCTTATCATGTGAAACCACAGGGCAGCTGATCTCTTCAGGCTTTCTTGATGTGAATGACAGCTTTGTTTCATCCACTTTGGTGGGTAAAAGAAGGCAAATTCCCCTGTGGTACTTTTGGTGCCAGGTTTAGCCATATGACGAAGCTTTACATAAAACAGTACAAGTATCTCCATTGTCCTTTATGATCCTCCATGAGTGTTTTCACTTAGTCTGATGAAGGGTTCACTCCAGTCTTTTCGGATGATAAAATGCTTCGGCTGTCAGTCTAATAAGGGATTCCCTGAGGAGTTTGGAGGCTGTAAGAGCACCCCCCGTCTCAATGCCAGTGCTTCTTATCTCAGCCTCTCCTGCACTCCTTTACCCCCGT,dna,IRF4 enhancer,GCF_000001405.26,,Regulatory +51,urn:mavedb:00000108-a-3,ATGTCTGGTAACGGCAATGCGGCTGCAACGGCGGAAGAAAACAGCCCAAAGATGAGAGTGATTCGCGTGGGTACCCGCAAGAGCCAGCTTGCTCGCATACAGACGGACAGTGTGGTGGCAACATTGAAAGCCTCGTACCCTGGCCTGCAGTTTGAAATCATTGCTATGTCCACCACAGGGGACAAGATTCTTGATACTGCACTCTCTAAGATTGGAGAGAAAAGCCTGTTTACCAAGGAGCTTGAACATGCCCTGGAGAAGAATGAAGTGGACCTGGTTGTTCACTCCTTGAAGGACCTGCCCACTGTGCTTCCTCCTGGCTTCACCATCGGAGCCATCTGCAAGCGGGAAAACCCTCATGATGCTGTTGTCTTTCACCCAAAATTTGTTGGGAAGACCCTAGAAACCCTGCCAGAGAAGAGTGTGGTGGGAACCAGCTCCCTGCGAAGAGCAGCCCAGCTGCAGAGAAAGTTCCCGCATCTGGAGTTCAGGAGTATTCGGGGAAACCTCAACACCCGGCTTCGGAAGCTGGACGAGCAGCAGGAGTTCAGTGCCATCATCCTGGCAACAGCTGGCCTGCAGCGCATGGGCTGGCACAACCGGGTGGGGCAGATCCTGCACCCTGAGGAATGCATGTATGCTGTGGGCCAGGGGGCCTTGGGCGTGGAAGTGCGAGCCAAGGACCAGGACATCTTGGATCTGGTGGGTGTGCTGCACGATCCCGAGACTCTGCTTCGCTGCATCGCTGAAAGGGCCTTCCTGAGGCACCTGGAAGGAGGCTGCAGTGTGCCAGTAGCCGTGCATACAGCTATGAAGGATGGGCAACTGTACCTGACTGGAGGAGTCTGGAGTCTAGACGGCTCAGATAGCATACAAGAGACCATGCAGGCTACCATCCATGTCCCTGCCCAGCATGAAGATGGCCCTGAGGATGACCCACAGTTGGTAGGCATCACTGCTCGTAACATTCCACGAGGGCCCCAGTTGGCTGCCCAGAACTTGGGCATCAGCCTGGCCAACTTGTTGCTGAGCAAAGGAGCCAAAAACATCCTGGATGTTGCACGGCAGCTTAACGATGCCCATTAA,dna,HMBS,GCF_000001405.26,P08397,Protein coding +52,urn:mavedb:00000094-a-15,MAAASSPPRAERKRWGWGRLPGARRGSAGLAKKCPFSLELAEGGPAGGALYAPIAPGAPGPAPPASPAAPAAPPVASDLGPRPPVSLDPRVSIYSTRRPVLARTHVQGRVYNFLERPTGWKCFVYHFAVFLIVLVCLIFSVLSTIEQYAALATGTLFWMEIVLVVFFGTEYVVRLWSAGCRSKYVGLWGRLRFARKPISIIDLIVVVASMVVLCVGSKGQVFATSAIRGIRFLQILRMLHVDRQGGTWRLLGSVVFIHRQELITTLYIGFLGLIFSSYFVYLAEKDAVNESGRVEFGSYADALWWGVVTVTTIGYGDKVPQTWVGKTIASCFSVFAISFFALPAGILGSGFALKVQQKQRQKHFNRQIPAAASLIQTAWRCYAAENPDSSTWKIYIRKAPRSHTLLSPSPKPKKSVVVKKKKFKLDKDNGVTPGEKMLTVPHITCDPPEERRLDHFSVDGYDSSVRKSPTLLEVSMPHFMRTNSFAEDLDLEGETLLTPITHISQLREHHRATIKVIRRMQYFVAKKKFQQARKPYDVRDVIEQYSQGHLNLMVRIKELQRRLDQSIGKPSLFISVSEKSKDRGSNTIGARLNRVEDKVTQLDQRLALITDMLHQLLSLHGGSTPGSGGPPREGGAHITQPCGSGGSVDPELFLPSNTLPTYEQLTVPRRGPDEGS,protein,KCNQ1,GCF_000001405.26,,Protein coding +53,urn:mavedb:00000031-a-1,TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG,dna,TERT promoter,GCF_000001405.26,,Regulatory +54,urn:mavedb:00000094-a-2,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +55,urn:mavedb:00000094-a-4,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +56,urn:mavedb:00000059-a-1,ACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGGCATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAA,dna,p53,GCF_000001405.26,P04637,Protein coding +57,urn:mavedb:00000094-a-1,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +58,urn:mavedb:00000083-i-1,AGTTACCACCGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - AC-rich enhancer,GCF_000001405.13,,Other noncoding +59,urn:mavedb:00000045-g-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +60,urn:mavedb:00000097-i-1,AGTGTGAGCAGGGAGAAGCCAGAATTGACAGCTTCAACAGAAAGGGTCAACAAAAGAATGTCCATGGTGGTGTCTGGCCTGACCCCAGAAGAATTTGTGAGTGTAT,dna,BRCA1 Exon 15,GCF_000001405.13,,Protein coding +61,urn:mavedb:00000097-w-1,TCCTGGGGATCCAGGGTGTCCACCCAATTGTGGTTGTGCAGCCAGATGCCTGGACAGAGGACAATGGCTTCCATGGTAAGGTGCCTGCATGTACCTGTGC,dna,BRCA1 Exon 22,GCF_000001405.13,,Protein coding +62,urn:mavedb:00000045-j-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +63,urn:mavedb:00000103-b-1,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,protein,MAPK1,GCF_000001405.26,,Protein coding +64,urn:mavedb:00000097-g-1,TTAATTTCAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTGTAAGTGTTG,dna,BRCA1 Exon 5,GCF_000001405.13,,Protein coding +65,urn:mavedb:00000026-a-1,GGTAAGTCAACATGAAATTATAAACCATGTTTAACTAACCCACAAGAAAAACAGAAAAAGAAAACAGATACATGAAAATCTGAGAGGAAAAAAAAAAAACAGAGAACACAATGGGAAGCTTCATTCAATGTAAGGGTACTAGAAGTTCTAGCCAGTGCAATTAAGAGGAAAAAAATAAATAAAAAGGCATATGTGTTGAAAGGAAGAAATTAAACTGTCTTTATTTGCAAATGACATGATTATCAGCACAGATAATCAAGATAAATATATAAAAAGATTTCTGAAACTAATAAGTTAGTTCAGTAAGGTCGTAAGCTATAAGACAAACAAAGGAAAATCAATTGTATTTGAATGTATCGACAGTAAACATATGGACATTAAAATTAACAATACAATATAATTTATATTTATTAAAAATATAAAATGCTTAGGCATAAATCTAACAAAACCCCCACAGTACTTG,dna,MYC enhancer (rs11986220),GCF_000001405.26,,Regulatory +66,urn:mavedb:00000083-j-1,AGTTTCTTTTGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - pyrimidine sequence,GCF_000001405.13,,Other noncoding +67,urn:mavedb:00000049-a-4,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +68,urn:mavedb:00000083-e-1,AGTTATATATGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - hnRNP D binding site,GCF_000001405.13,,Other noncoding +69,urn:mavedb:00000014-a-1,CCTAACACAGTAGCTGGTACCTGATAGGTGCCTATATGTGATGGATGGGTGGACAGCCCGACAGATGAAAAATGGACAATTATGAGGAGGGGAGAGTGCAGACAGGGGAAGCTTCACCTCCTTTACAATTTTGGGAGTCCACACGGCATGGCATACAAATTATTTCATTCCCATTGAGAAATAAAATCCAATTCTCCATCACCAAGAGAGCCTTCCGAAAGAGGCCCCCCTGGGCAAACGGCCACCGATGGAGAGGTCTGCCAGTCCTCTTCTACCCCACCCACGCCCCCACCCTAATCAGAGGCCAAACCCTTCCTGGAGCCTGTGATAAAAGCAACTGTTAGCTTGCACTAGACTAGCTTCAAAGTTGTATTGACCCTGGTGTGTTATGTCTAAGAGTAGATGCCATATCTCTTTTCTGGCCTATGTTATTACCTGTATGGACTTTGCACTGGAATCAGCTATCTGCTCTTACTTATGCACACCTGGGGCATAGAGCCAGCCCTGTATCGCTTTTCAGCCATCTCACTACAGATAACTCCCAAGTCCTGTCTAGCTGCCTTCCTTATCACAGGAATAGCACCCAAGGTCCATCAGTAC,dna,BCL11A enhancer,GCF_000001405.26,,Regulatory +70,urn:mavedb:00000097-b-1,AAGTTCATTGGAACAGAAAGAAATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGGTAA,dna,BRCA1 Exon 2,GCF_000001405.13,,Protein coding +71,urn:mavedb:00000097-k-1,TTAATTTCAGATGCTCGTGTACAAGTTTGCCAGAAAACACCACATCACTTTAACTAATCTAATTACTGAAGAGACTACTCATGTTGTTATGAAAACAGGTATACCAAG,dna,BRCA1 Exon 16,GCF_000001405.13,,Protein coding +72,urn:mavedb:00000102-0-1,ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA,dna,PTEN,GCF_000001405.26,P60484,Protein coding +73,urn:mavedb:00000001-b-2,ATGTCTGACCAGGAGGCAAAACCTTCAACTGAGGACTTGGGGGATAAGAAGGAAGGTGAATATATTAAACTCAAAGTCATTGGACAGGATAGCAGTGAGATTCACTTCAAAGTGAAAATGACAACACATCTCAAGAAACTCAAAGAATCATACTGTCAAAGACAGGGTGTTCCAATGAATTCACTCAGGTTTCTCTTTGAGGGTCAGAGAATTGCTGATAATCATACTCCAAAAGAACTGGGAATGGAGGAAGAAGATGTGATTGAAGTTTATCAGGAACAAACGGGGGGTCATTCAACAGTTTAG,dna,SUMO1,GCF_000001405.26,P63165,Protein coding +74,urn:mavedb:00000094-a-3,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +75,urn:mavedb:00000061-g-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +76,urn:mavedb:00000001-d-1,ATGGAGCATGCCTTTACCCCGTTGGAGCCCCTGCTTTCCACTGGGAATTTGAAGTACTGCCTTGTAATTCTTAATCAGCCTTTGGACAACTATTTTCGTCATCTTTGGAACAAAGCTCTTTTAAGAGCCTGTGCCGATGGAGGTGCCAACCGCTTATATGATATCACCGAAGGAGAGAGAGAAAGCTTTTTGCCTGAATTCATCAATGGAGACTTTGATTCTATTAGGCCTGAAGTCAGAGAATACTATGCTACTAAGGGATGTGAGCTCATTTCAACTCCTGATCAAGACCACACTGACTTTACTAAGTGCCTTAAAATGCTCCAAAAGAAGATAGAAGAAAAAGACTTAAAGGGAAAGCACAGGTTGCATGTAGACACTGGAATGGAGGGTGATTGGTGTGGCCTTATTCCTGTTGGACAGCCTTGTATGCAGGTTACAACCACAGGCCTCAAGTGGAACCTCACAAATGATGTGCTTGCTTTTGGAACATTGGTCAGTACTTCCAATACCTACGACGGGTCTGGTGTTGTGACTGTGGAAACTGACCACCCACTCCTCTGGACCATGGCCATCAAAAGCTAA,dna,TPK1,GCF_000001405.26,Q9H3S4,Protein coding +77,urn:mavedb:00000097-p-1,TGTAACCTGTCTTTTCTATGATCTCTTTAGGGGTGACCCAGTCTATTAAAGAAAGAAAAATGCTGAATGAGGTAAGTACTTGATGTTACAAACTAACCAGA,dna,BRCA1 Exon 18,GCF_000001405.13,,Protein coding +78,urn:mavedb:00000097-x-1,TCCTGGGGATCCAGGGTGTCCACCCAATTGTGGTTGTGCAGCCAGATGCCTGGACAGAGGACAATGGCTTCCATGGTAAGGTGCCTGCATGTACCTGTGC,dna,BRCA1 Exon 22,GCF_000001405.13,,Protein coding +79,urn:mavedb:00000055-a-1,ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG,dna,NUDT15,GCF_000001405.26,Q9NV35,Protein coding +80,urn:mavedb:00000094-a-10,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +81,urn:mavedb:00000045-i-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +82,urn:mavedb:00000032-a-1,ATACAGATAAATGCACACATGTATACGCGAAAGGTTAACTCGGCGGAGGACTCGCCCAAATAAGCACCGGGATTGCATTTAAAATAATAATAATAAATAAATAAATAAACTAGGAAGGAAAGCGGGGGGAGGGAAGCAGAAGTCGGGAAGAAAAGAGAAAAGCAGCAGGCTGATTACGAGGTGTCAAAACTGCCAGGAGCAAGAAGGTGATAGCAATCAGGGGTGAGAAGAGTGCGGCATTCGTGCGGGGCAACTAATTATCCGTCTCATTTGAGAAGAGCAGCATTTGAGGCAGCAGCGTTCGCCTGCTGAACGGTGACAGATTGGCGCGGAGGAGAGGGGAGGTGTTAAAACAATGGAGCCGGGCGCGCGAGCGCTGCTGCATGCTAATCAGCCCTCCCTCCGCCTGCCTGCCGCGCTCCCTCCTTCCTCCCGGCCTCCCTCCTCCGCGCTCCCTCCTCCCGCCTGCGGCGCTCCCTCCTTTCCAGCGGGCCCCGCGCCGCCGCCGCCACCCGCTTCCTGCTCCCTCGCTTTCCCGCGCGTCCTTCCCGCCGCTGGCGAGTGGAACCCAGCCACCGCCACCGAGTCCCA,dna,UC88 enhancer,GCF_000001405.26,,Regulatory +83,urn:mavedb:00000101-a-1,ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA,dna,PTEN,GCF_000001405.26,P60484,Protein coding +84,urn:mavedb:00000053-a-1,CCACGCCGCATCGTCATCCACCGTGGGTCAACGGGGTTAGGCTTCAATATCGTCGGTGGAGAGGATGGTGAGGGAATCTTCATCTCATTCATTCTGGCGGGAGGACCGGCCGATTTAAGCGGAGAACTTCGCAAAGGTGACCAGATCCTTTCGGTGAATGGCGTAGATTTGCGCAACGCATCACACGAACAGGCGGCCATCGCATTAAAGAACGCCGGCCAGACCGTTACGATTATCGCGCAGTATAAA,dna,PSD95 PDZ3,GCF_000001405.26,P78352,Protein coding +85,urn:mavedb:00000097-v-1,ATGTCCATTTTAGATCAACTGGAATGGATGGTACAGCTGTGTGGTGCTTCTGTGGTGAAGGAGCTTTCATCATTCACCCTTGGCACAGTAAGTATTGGGTGCCCT,dna,BRCA1 Exon 21,GCF_000001405.13,,Protein coding +86,urn:mavedb:00000091-a-1,HGDVAVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHHLHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDF,protein,BRAF,GCF_000001405.13,P15056,Protein coding +87,urn:mavedb:00000048-c-1,GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC,dna,CXCR4,GCF_000001405.26,P61073,Protein coding +88,urn:mavedb:00000031-c-1,TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG,dna,TERT promoter,GCF_000001405.26,,Regulatory +89,urn:mavedb:00000051-b-1,CTGACGTCTATCATCTCTGCGGTGGTTGGCATTCTGCTGGTCGTGGTCTTGGGCGTGGTCTTTGGCATCCTG,dna,ErbB2,GCF_000001405.26,P04626,Protein coding +90,urn:mavedb:00000097-m-1,CATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAA,dna,BRCA1 Exon 17,GCF_000001405.13,,Protein coding +91,urn:mavedb:00000001-b-1,ATGTCTGACCAGGAGGCAAAACCTTCAACTGAGGACTTGGGGGATAAGAAGGAAGGTGAATATATTAAACTCAAAGTCATTGGACAGGATAGCAGTGAGATTCACTTCAAAGTGAAAATGACAACACATCTCAAGAAACTCAAAGAATCATACTGTCAAAGACAGGGTGTTCCAATGAATTCACTCAGGTTTCTCTTTGAGGGTCAGAGAATTGCTGATAATCATACTCCAAAAGAACTGGGAATGGAGGAAGAAGATGTGATTGAAGTTTATCAGGAACAAACGGGGGGTCATTCAACAGTTTAG,dna,SUMO1,GCF_000001405.26,P63165,Protein coding +92,urn:mavedb:00000103-a-1,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,protein,MAPK1,GCF_000001405.26,,Protein coding +93,urn:mavedb:00000097-f-1,TATAATTTATAGATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGTATATAATT,dna,BRCA1 Exon 4,GCF_000001405.13,,Protein coding +94,urn:mavedb:00000057-b-1,ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC,dna,Ras,GCF_000001405.26,P01112,Protein coding +95,urn:mavedb:00000083-a-1,AGTTGCTGCTGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - Wilms’ tumor gene,GCF_000001405.13,,Other noncoding +96,urn:mavedb:00000097-s-1,CTCTCTTCCTCTCTTCTTCCAGATCTTCAGGGGGCTAGAAATCTGTTGCTATGGGCCCTTCACCAACATGCCCACAGGTAAGAGCCTGGGAGAACCCCAG,dna,BRCA1 Exon 20,GCF_000001405.13,,Protein coding +97,urn:mavedb:00000024-a-1,AAGCAGGACTCCTTATAGACAGGTACATCCAGGCAAAGCTGCATCAAACTTTTATACTGGAGAGGGCAACCACGCATTCGACATCATTGGGAATGATAACAAGGTGAGAACACATTGAGTGTTTACAGAGGGTTGACTATCAAAAGGAAAGTATGGTTTCAGATGATTTATGAGTTATTAATCAATCACATCTCCACCCAAGTTATGTGTTTACCTTGTCCCCACCCCGATCTGCATTTGCTGCTGACCTAAAAGAGCAGAGGAGGAGAGAACTTCTCTTTGCTGAGCCTTCGCCCTATGCTGGTGGATTGCTAGCACGTGATGTGTCCTTATTCGATTTTATACACAAAGAAACAGCCTCAGCGATGCCAGTCACAACAGCCAGGTGGTAAAGGTGGGTTTCCTGAAGGCATAAGAATAGCCCACAGGTCAGCCCACCAGGCCCACCGGGCCTGGGTCTGTGGCCCAGATGACCCTGCCCTTAGGGTCTGACATGAGAACAGAAATGGGTCAAGCCATCCTGCAGGGCACAGGAGAGCAAGAACTACATACAGCTCGGGGTTCCCTGACAAGCTGAATTGCTGGACCTTT,dna,MSMB promoter,GCF_000001405.26,,Regulatory +98,urn:mavedb:00000045-f-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +99,urn:mavedb:00000072-a-1,GQPREPQVYTLPPSRDELTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPG,protein,IGHG1,GCF_000001405.26,P01857,Protein coding +100,urn:mavedb:00000095-b-1,ATGGATTCTCTTGTGGTCCTTGTGCTCTGTCTCTCATGTTTGCTTCTCCTTTCACTCTGGAGACAGAGCTCTGGGAGAGGAAAACTCCCTCCTGGCCCCACTCCTCTCCCAGTGATTGGAAATATCCTACAGATAGGTATTAAGGACATCAGCAAATCCTTAACCAATCTCTCAAAGGTCTATGGCCCTGTGTTCACTCTGTATTTTGGCCTGAAACCCATAGTGGTGCTGCATGGATATGAAGCAGTGAAGGAAGCCCTGATTGATCTTGGAGAGGAGTTTTCTGGAAGAGGCATTTTCCCACTGGCTGAAAGAGCTAACAGAGGATTTGGAATTGTTTTCAGCAATGGAAAGAAATGGAAGGAGATCCGGCGTTTCTCCCTCATGACGCTGCGGAATTTTGGGATGGGGAAGAGGAGCATTGAGGACCGTGTTCAAGAGGAAGCCCGCTGCCTTGTGGAGGAGTTGAGAAAAACCAAGGCCTCACCCTGTGATCCCACTTTCATCCTGGGCTGTGCTCCCTGCAATGTGATCTGTAGTATCATATTTCACAAGCGGTTCGACTATAAGGACCAGCAATTCTTGAACCTTATGGAGAAACTGAATGAGAATATCAAAATTCTCTCAAGTCCGTGGATTCAGATTTGCAACAACTTTAGTCCCATTATTGATTATTTTCCGGGAACACACAACAAATTGCTGAAAAATGTAGCTTTTATGAAAAGTTACATTCTCGAGAAAGTCAAAGAGCATCAAGAGTCAATGGATATGAACAATCCTCAAGACTTCATTGATTGCTTCTTGATGAAAATGGAAAAGGAGAAACACAACCAACCATCTGAGTTCACAATCGAGTCACTGGAAAACACGGCCGTAGATCTCTTTGGTGCGGGGACGGAGACAACGAGTACGACTCTTAGATACGCGCTCCTTTTGTTGCTGAAGCACCCAGAAGTCACCGCGAAGGTGCAAGAAGAAATCGAGCGGGTAATCGGACGAAACAGATCCCCTTGTATGCAGGATAGAAGTCACATGCCCTACACAGATGCTGTGGTGCACGAGGTCCAGAGATACATTGACCTTCTCCCCACCAGCCTGCCCCATGCAGTGACCTGTGACATTAAATTCAGAAACTATCTCATTCCCAAGGGCACAACCATATTAATTTCCCTGACTTCTGTGCTACATGACAACAAAGAATTTCCCAACCCAGAGATGTTTGACCCTCATCACTTTCTGGATGAAGGTGGCAATTTCAAGAAGAGCAAGTATTTCATGCCTTTCTCAGCAGGAAAACGGATTTGTGTGGGAGAAGCCCTGGCCGGCATGGAGCTGTTTTTATTCCTGACCTCCATTTTACAGAACTTTAACCTGAAATCTCTGGTTGACCCAAAGAACCTTGACACCACTCCAGTTGTCAATGGATTTGCCTCTGTGCCGCCCTTCTACCAGCTGTGCTTCATTCCTGTC,dna,CYP2C9,GCF_000001405.26,P11712,Protein coding +101,urn:mavedb:00000001-a-1,ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA,dna,UBE2I,GCF_000001405.26,P63279,Protein coding +102,urn:mavedb:00000023-a-1,AGCTCTTCACCGGAGACCCAAATACAACAAATCAAGTCGCCTGCCCTGGCGACACTTTCGAAGGACTGGAGTGGGAATCAGAGCTTCACGGGTTAAAAAGCCGATGTCACATCGGCCGTTCGAAACTCCTCCTCTTGCAGTGAGGTGAAGACATTTGAAAATCACCCCACTGCAAACTCCTCCCCCTGCTAGAAACCTCACATTGAAATGCTGTAAATGACGTGGGCCCCGAGTGCAATCGCGGGAAGCCAGGGTTTCCAGCTAGGACACAGCAGGTCGTGATCCGGGTCGGGACACTGCCTGGCAGAGGCTGCGAGC,dna,LDLR promoter,GCF_000001405.26,,Regulatory +103,urn:mavedb:00000049-a-2,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +104,urn:mavedb:00000062-b-1,CTCTGTCTCTCATGTTTGCTTCTCCTTTCAATCTGGAGACAGAGCTCTGGGAGAGGAAAACTCCCTCCTGGCCCCACTCCTCTCCCAGTGATTGGAAATATCCTACAGATAGATATTAAGGATGTCAGCAAATCCTTAACCAATCTCTCAAAAATCTATGGCCCTGTGTTCACTCTGTATTTTGGCCTGGAACGCATGGTGGTGCTGCATGGATATGAAGTGGTGAAGGAAGCCCTGATTGATCTTGGAGAGGAGTTTTCTGGAAGAGGCCATTTCCCACTGGCTGAAAGAGCTAACAGAGGATTTGGAATCGTTTTCAGCAATGGAAAGAGATGGAAGGAGATCCGGCGTTTCTCCCTCATGACGCTGCGGAATTTTGGGATGGGGAAGAGGAGCATTGAGGACCGTGTTCAAGAGGAAGCCCGCTGCCTTGTGGAGGAGTTGAGAAAAACCAAGGCTTCACCCTGTGATCCCACTTTCATCCTGGGCTGTGCTCCCTGCAATGTGATCTGCTCCATTATTTTCCAGAAACGTTTCGATTATAAAGATCAGCAATTTCTTAACTTGATGGAAAAATTGAATGAAAACATCAGGATTGTAAGCACCCCCTGGATCCAGATATGCAATAATTTTCCCACTATCATTGATTATTTCCCGGGAACCCATAACAAATTACTTAAAAACCTTGCTTTTATGGAAAGTGATATTTTGGAGAAAGTAAAAGAACACCAAGAATCGATGGACATCAACAACCCTCGGGACTTTATTGATTGCTTCCTGATCAAAATGGAGAAGGAAAAGCAAAACCAACAGTCTGAATTCACTATTGAAAACTTGGTAATCACTGCAGCTGACTTACTTGGAGCTGGGACAGAGACAACAAGCACAACCCTGAGATATGCTCTCCTTCTCCTGCTGAAGCACCCAGAGGTCACAGCTAAAGTCCAGGAAGAGATTGAACGTGTCATTGGCAGAAACCGGAGCCCCTGCATGCAGGACAGGGGCCACATGCCCTACACAGATGCTGTGGTGCACGAGGTCCAGAGATACATCGACCTCATCCCCACCAGCCTGCCCCATGCAGTGACCTGTGACGTTAAATTCAGAAACTACCTCATTCCCAAGGGCACAACCATATTAACTTCCCTCACTTCTGTGCTACATGACAACAAAGAATTTCCCAACCCAGAGATGTTTGACCCTCGTCACTTTCTGGATGAAGGTGGAAATTTTAAGAAAAGTAACTACTTCATGCCTTTCTCAGCAGGAAAACGGATTTGTGTGGGAGAGGGCCTGGCCCGCATGGAGCTGTTTTTATTCCTGACCTTCATTTTACAGAACTTTAACCTGAAATCTCTGATTGACCCAAAGGACCTTGACACAACTCCTGTTGTCAATGGATTTGCTTCTGTCCCGCCCTTCTATCAGCTGTGCTTCATTCCT,dna,CYP2C19,GCF_000001405.26,P33261,Protein coding +105,urn:mavedb:00000094-a-12,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +106,urn:mavedb:00000057-c-1,ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC,dna,Ras,GCF_000001405.26,P01112,Protein coding +107,urn:mavedb:00000094-a-7,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +108,urn:mavedb:00000061-f-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +109,urn:mavedb:00000001-a-2,ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA,dna,UBE2I,GCF_000001405.26,P63279,Protein coding +110,urn:mavedb:00000083-d-1,AGTTAGGGATGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - hnRNPA1 binding site,GCF_000001405.13,,Other noncoding +111,urn:mavedb:00000003-b-2,GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC,dna,BRCA1 RING domain,GCF_000001405.26,,Protein coding +112,urn:mavedb:00000027-a-1,TGCTTTCAGTGTGGGCCTGGGGCTGCGGGACCATGGAATGAGAGGGAGAGGATGACAAAACTGCTGGTCTTATCTAAGGGAGACAGAGAAGAGAAAAGGGGCACACCCAGTAGGCCACCCTGTCCCCACAGAATCCCTCCCCCAGAACGGCCTGCTCTCTGCCCTCATCTCCTGGCATTTCCTCTCATCCTTTTTTCCTGATAAATTTTCAATCCATTCATACTATCTGGTCATCCACGTGAATAGATATTTTTTTTTTGGCCAGTCATATGGCCCCATTTTCTTTGTACTTTACTGAAGTTAGCTCTAGTGAATCCAGGGAGCAGGGGCTGTAGGGTGGGGCTGGAGCCTGAAGAAAGACAAAAGGGATCACTGTGATAATATGGTGGGGGGAGGGTTACCCAGTTCTGACCACTTTTTTTCTCTGTCTCAACCAAGAAATGCAGAGTGCCTTCACCACTCTGTAACCT,dna,PKLR promoter,GCF_000001405.26,,Regulatory +113,urn:mavedb:00000062-a-1,TCTCTTGTGGTCCTTGTGCTCTGTCTCTCATGTTTGCTTCTCCTTTCACTCTGGAGACAGAGCTCTGGGAGAGGAAAACTCCCTCCTGGCCCCACTCCTCTCCCAGTGATTGGAAATATCCTACAGATAGGTATTAAGGACATCAGCAAATCCTTAACCAATCTCTCAAAGGTCTATGGCCCTGTGTTCACTCTGTATTTTGGCCTGAAACCCATAGTGGTGCTGCATGGATATGAAGCAGTGAAGGAAGCCCTGATTGATCTTGGAGAGGAGTTTTCTGGAAGAGGCATTTTCCCACTGGCTGAAAGAGCTAACAGAGGATTTGGAATTGTTTTCAGCAATGGAAAGAAATGGAAGGAGATCCGGCGTTTCTCCCTCATGACGCTGCGGAATTTTGGGATGGGGAAGAGGAGCATTGAGGACCGTGTTCAAGAGGAAGCCCGCTGCCTTGTGGAGGAGTTGAGAAAAACCAAGGCCTCACCCTGTGATCCCACTTTCATCCTGGGCTGTGCTCCCTGCAATGTGATCTGCTCCATTATTTTCCATAAACGTTTTGATTATAAAGATCAGCAATTTCTTAACTTAATGGAAAAGTTGAATGAAAACATCAAGATTTTGAGCAGCCCCTGGATCCAGATCTGCAATAATTTTTCTCCTATCATTGATTACTTCCCGGGAACTCACAACAAATTACTTAAAAACGTTGCTTTTATGAAAAGTTATATTTTGGAAAAAGTAAAAGAACACCAAGAATCAATGGACATGAACAACCCTCAGGACTTTATTGATTGCTTCCTGATGAAAATGGAGAAGGAAAAGCACAACCAACCATCTGAATTTACTATTGAAAGCTTGGAAAACACTGCAGTTGACTTGTTTGGAGCTGGGACAGAGACGACAAGCACAACCCTGAGATATGCTCTCCTTCTCCTGCTGAAGCACCCAGAGGTCACAGCTAAAGTCCAGGAAGAGATTGAACGTGTGATTGGCAGAAACCGGAGCCCCTGCATGCAAGACAGGAGCCACATGCCCTACACAGATGCTGTGGTGCACGAGGTCCAGAGATACATTGACCTTCTCCCCACCAGCCTGCCCCATGCAGTGACCTGTGACATTAAATTCAGAAACTATCTCATTCCCAAGGGCACAACCATATTAATTTCCCTGACTTCTGTGCTACATGACAACAAAGAATTTCCCAACCCAGAGATGTTTGACCCTCATCACTTTCTGGATGAAGGTGGCAATTTTAAGAAAAGTAAATACTTCATGCCTTTCTCAGCAGGAAAACGGATTTGTGTGGGAGAAGCCCTGGCCGGCATGGAGCTGTTTTTATTCCTGACCTCCATTTTACAGAACTTTAACCTGAAATCTCTGGTTGACCCAAAGAACCTTGACACCACTCCAGTTGTCAATGGATTTGCCTCTGTGCCGCCCTTCTACCAGCTGTGCTTCATTCCT,dna,CYP2C9,GCF_000001405.26,P11712,Protein coding +114,urn:mavedb:00000097-e-1,TATAATTTATAGATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGTATATAATT,dna,BRCA1 Exon 4,GCF_000001405.13,,Protein coding +115,urn:mavedb:00000005-a-5,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +116,urn:mavedb:00000033-a-1,GTTCATGTTTCCCCCGTATGTGCGTGCGCATGCACATATATAAACACATCTAGCTTCACACCTGTGTACTCACACCTGTGTGTACACACTCCTCCTGTACATGTGTATACACGCACACCCTCCCACCAGCATGAGCAGTATGTGGATTTTTTCTTCCAGCAGCCCTTTCAAGAACATACACCAGATGGGATTGGGTCTGCTACATGACTGAGCAGCCCCCACATCAGAAAGTCATCATCATTGAATCCATCATGTGGTGCAGCCTTGGCAAGGGCACAGCTCCTCGTCCTGCCCAATGGGCCGTGGAGGAATGCCTCCTCCTCCAGAGCAGAGAGCAGATATGGGCAGGCAGTTGCCAGGCAGTTGAGCCGGGAGAACAGATGGCAGCTGCACAGCCTCCTTCTAGCCCAGTTTCCTGGCTGCCGGGGAAGGAGTTGCAGGCGGCAAGGATCCTGCAGAAACCGCCCTCCCTATAACAGGCCATCTTGCCCTCTATCCTCTTACTGGGCTTTGGAGGTCCCAATCCTGCCCTGGTTCACCACCTGTGTGGCTTACCAAGCTGTGCAACTTGGGGCAGGA,dna,ZFAND3 enhancer,GCF_000001405.26,,Regulatory +117,urn:mavedb:00000057-d-1,ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC,dna,Ras,GCF_000001405.26,P01112,Protein coding +118,urn:mavedb:00000068-a-1,ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCCTCTGAGTCAGGAAACATTTTCAGACCTATGGAAACTACTTCCTGAAAACAACGTTCTGTCCCCCTTGCCGTCCCAAGCAATGGATGATTTGATGCTGTCCCCGGACGATATTGAACAATGGTTCACTGAAGACCCAGGTCCAGATGAAGCTCCCAGAATGCCAGAGGCTGCTCCCCGCGTGGCCCCTGCACCAGCAGCTCCTACACCGGCGGCCCCTGCACCAGCCCCCTCCTGGCCCCTGTCATCTTCTGTCCCTTCCCAGAAAACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAGCACTGCCCAACAACACCAGCTCCTCTCCCCAGCCAAAGAAGAAACCACTGGATGGAGAATATTTCACCCTTCAGATCCGTGGGCGTGAGCGCTTCGAGATGTTCCGAGAGCTGAATGAGGCCTTGGAACTCAAGGATGCCCAGGCTGGGAAGGAGCCAGGGGGGAGCAGGGCTCACTCCAGCCACCTGAAGTCCAAAAAGGGTCAGTCTACCTCCCGCCATAAAAAACTCATGTTCAAGACAGAAGGGCCTGACTCAGACTAG,dna,TP53 (P72R),GCF_000001405.26,,Protein coding +119,urn:mavedb:00000049-a-6,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +120,urn:mavedb:00000104-a-1,CGGTGGCCGGTGCGGCGTGTTCGGTGGCGGCTCTGGCCGCTCAGGCGCCTGCGGCTGGGTGAGCGCACGCGAGGCGGCGAGGCGGCAGCGTGTTTCTAGGTCGTGGCGTCGGGCTTCCGGAGCTTTGGCGGCAGCTAGGGGAGGATGGCGGAGTCTTCGGATAAGCTCTATCGAGTCGAGTACGCCAAGAGCGGGCGCGCCTCTTGCAAGAAATGCAGCGAGAGCATCCCCAAGGACTCGCTCCGGATGGCCATCATGGTGCAGTCGCCCATGTTTGATGGAAAAGTCCCACACTGGTACCACTTCTCCTGCTTCTGGAAGGTGGGCCACTCCATCCGGCACCCTGACGTTGAGGTGGATGGGTTCTCTGAGCTTCGGTGGGATGACCAGCAGAAAGTCAAGAAGACAGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAAGTCCAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGTGGACCCGGAGAAGCCACAGCTAGGCATGATTGACCGCTGGTACCATCCAGGCTGCTTTGTCAAGAACAGGGAGGAGCTGGGTTTCCGGCCCGAGTACAGTGCGAGTCAGCTCAAGGGCTTCAGCCTCCTTGCTACAGAGGATAAAGAAGCCCTGAAGAAGCAGCTCCCAGGAGTCAAGAGTGAAGGAAAGAGAAAAGGCGATGAGGTGGATGGAGTGGATGAAGTGGCGAAGAAGAAATCTAAAAAAGAAAAAGACAAGGATAGTAAGCTTGAAAAAGCCCTAAAGGCTCAGAACGACCTGATCTGGAACATCAAGGACGAGCTAAAGAAAGTGTGTTCAACTAATGACCTGAAGGAGCTACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCTGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAAGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAACAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTTCAGCAGATAAGCCATTATCCAACATGAAGATCCTGACTCTCGGGAAGCTGTCCCGGAACAAGGATGAAGTGAAGGCCATGATTGAGAAACTCGGGGGGAAGTTGACGGGGACGGCCAACAAGGCTTCCCTGTGCATCAGCACCAAAAAGGAGGTGGAAAAGATGAATAAGAAGATGGAGGAAGTAAAGGAAGCCAACATCCGAGTTGTGTCTGAGGACTTCCTCCAGGACGTCTCCGCCTCCACCAAGAGCCTTCAGGAGTTGTTCTTAGCGCACATCTTGTCCCCTTGGGGGGCAGAGGTGAAGGCAGAGCCTGTTGAAGTTGTGGCCCCAAGAGGGAAGTCAGGGGCTGCGCTCTCCAAAAAAAGCAAGGGCCAGGTCAAGGAGGAAGGTATCAACAAATCTGAAAAGAGAATGAAATTAACTCTTAAAGGAGGAGCAGCTGTGGATCCTGATTCTGGACTGGAACACTCTGCGCATGTCCTGGAGAAAGGTGGGAAGGTCTTCAGTGCCACCCTTGGCCTGGTGGACATCGTTAAAGGAACCAACTCCTACTACAAGCTGCAGCTTCTGGAGGACGACAAGGAAAACAGGTATTGGATATTCAGGTCCTGGGGCCGTGTGGGTACGGTGATCGGTAGCAACAAACTGGAACAGATGCCGTCCAAGGAGGATGCCATTGAGCACTTCATGAAATTATATGAAGAAAAAACCGGGAACGCTTGGCACTCCAAAAATTTCACGAAGTATCCCAAAAAGTTCTACCCCCTGGAGATTGACTATGGCCAGGATGAAGAGGCAGTGAAGAAGCTGACAGTAAATCCTGGCACCAAGTCCAAGCTCCCCAAGCCAGTTCAGGACCTCATCAAGATGATCTTTGATGTGGAAAGTATGAAGAAAGCCATGGTGGAGTATGAGATCGACCTTCAGAAGATGCCCTTGGGGAAGCTGAGCAAAAGGCAGATCCAGGCCGCATACTCCATCCTCAGTGAGGTCCAGCAGGCGGTGTCTCAGGGCAGCAGCGACTCTCAGATCCTGGATCTCTCAAATCGCTTTTACACCCTGATCCCCCACGACTTTGGGATGAAGAAGCCTCCGCTCCTGAACAATGCAGACAGTGTGCAGGCCAAGGTGGAAATGCTTGACAACCTGCTGGACATCGAGGTGGCCTACAGTCTGCTCAGGGGAGGGTCTGATGATAGCAGCAAGGATCCCATCGATGTCAACTATGAGAAGCTCAAAACTGACATTAAGGTGGTTGACAGAGATTCTGAAGAAGCCGAGATCATCAGGAAGTATGTTAAGAACACTCATGCAACCACACACAATGCGTATGACTTGGAAGTCATCGATATCTTTAAGATAGAGCGTGAAGGCGAATGCCAGCGTTACAAGCCCTTTAAGCAGCTTCATAACCGAAGATTGCTGTGGCACGGGTCCAGGACCACCAACTTTGCTGGGATCCTGTCCCAGGGTCTTCGGATAGCCCCGCCTGAAGCGCCCGTGACAGGCTACATGTTTGGTAAAGGGATCTATTTCGCTGACATGGTCTCCAAGAGTGCCAACTACTGCCATACGTCTCAGGGAGACCCAATAGGCTTAATCCTGTTGGGAGAAGTTGCCCTTGGAAACATGTATGAACTGAAGCACGCTTCACATATCAGCAAGTTACCCAAGGGCAAGCACAGTGTCAAAGGTTTGGGCAAAACTACCCCTGATCCTTCAGCTAACATTAGTCTGGATGGTGTAGACGTTCCTCTTGGGACCGGGATTTCATCTGGTGTGAATGACACCTCTCTACTATATAACGAGTACATTGTCTATGATATTGCTCAGGTAAATCTGAAGTATCTGCTGAAACTGAAATTCAATTTTAAGACCTCCCTGTGGTAATTGGGAGAGGTAGCCGAGTCACACCCGGTGGCTCTGGTATGAATTCACCCGAAGCGCTTCTGCACCAACTCACCTGGCCGCTAAGTTGCTGATGGGTAGTACCTGTACTAAACCACCTCAGAAAGGATTTTACAGAAACGTGTTAAAGGTTTTCTCTAACTTCTCAAGTCCCTTGTTTTGTGTTGTGTCTGTGGGGAGGGGTTGTTTTGGGGTTGTTTTTGTTTTTTCTTGCCAGGTAGATAAAACTGACATAGAGAAAAGGCTGGAGAGAGATTCTGTTGCATAGACTAGTCCTATGGAAAAAACCAAGCTTCGTTAGAATGTCTGCCTTACTGGTTTCCCCAGGGAAGGAAAAATACACTTCCACCCTTTTTTCTAAGTGTTCGTCTTTAGTTTTGATTTTGGAAAGATGTTAAGCATTTATTTTTAGTTAAAAATAAAAACTAATTTCATACTATTTAGATTTTCTTTTTTATCTTGCACTTATTGTCCCCTTTTTAGTTTTTTTTGTTTGCCTCTTGTGGTGAGGGGTGTGGGAAGACCAAAGGAAGGAACGCTAACAATTTCTCATACTTAGAAACAAAAAGAGCTTTCCTTCTCCAGGAATACTGAACATGGGAGCTCTTGAAATATGTAGTATTAAAAGTTGCATTTGAAATTCTTGACTTTCTTATGGGCACTTTTGTCTTCCAAATTAAAACTCTACCACAAATATACTTACCCAAGGGCTAATAGTAATACTCGATTAAAAATGCAGATGCCTTCTCTA,dna,PARP1,GCF_000001405.26,,Protein coding +121,urn:mavedb:00000083-c-1,AGTTGACGACGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - SRSF7 (9G8) binding site,GCF_000001405.13,,Other noncoding +122,urn:mavedb:00000049-a-1,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +123,urn:mavedb:00000028-a-1,CAGAGGCACCAGGGTCAAAGCCAGTGGTGATGCCCTGGCCCCGTTGCCCCAGGCCAGGGCCAGTGAACAATGTAATCAGCTGGGGCAGACTCTACAGCCCTGCAGCCAAGGGGGCCAGTGACCCTTACATGGTCATCCACAGGCCACTTGGGTGGCCAGTCCTGTTCAGCCAGGCCTTGCCCTAGGAAAGAAATTAATTATAACCTAATTGGCAGTTTCCTTTGCATAGAAGCCGGAAGCAACTGCCAGTGAGGCTGGTGATTAACTCTGCAGCAGCTGGGAAATTGCAGTTGGGCAGGAGCGCCCATCATCCTGGCCAGGCCGCTGCAGCTGGTCTGGGTATGGAAGTGTGGGTGGTGGCCATCGTGCAGCTTAGGGCCTGGGCCCCTCAGAGCAGAAGGCTGGGTCTGTGTGCAGAAGGTAGCCTTGGGCTGCCAGGTCCCCCAGTGCCCAGTTGCGGACCTCCTTCTCCCAAACCTCAGCCATGCCTGGACCCCTTCCCCACTCACGGCAGAAGGCTGCATGGCTTTTAGGGGTGTCAGGAGGCTCATGTCCTGGTTGCAGTTCTACCATGGGTGTGCAGCGGGAATTCTGGGCTTC,dna,RET enhancer,GCF_000001405.26,,Regulatory +124,urn:mavedb:00000029-a-1,GAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGCCCCTGGACCCCCAAATGCTGCATGAACACATTTTCAGGGGAGCCTGTGCCCCCAGGCGGGGGTCGGGCAGCCCCAGCCCCTCTCCTTTTCCTGGACTCTGGCCGTGCGCGGCAGCCCAGGTGTTTGCTCAGTTGCTGACCCAAAAGTGCTTCATTTTTCGTGCCCGCCCCGCGCCCCGGGCAGGCCAGTCATGTGTTAAGTTGCGCTTCTTTGCTGTGATGTGGGTGGGGGAGGAAGAGTAAACACAGTGCTGGCTCGGCTGCCCTGAGGGTGCTCAATCAAGCACAGGTTTCAAGTCTGGGTTCTGGTGTCCACTCACCCACCCCACCCCCCAAAATCAGACAAATGCTACTTTGTCTAACCTGCTGTGGCCTCTGAGACATGTTCTATTTTTAACCCCTTCTTGGAATTGGCTCTCTTCTTCAAAGGACCAGGTCCTGTTCCTCTTTCTCCCCGACTCCACCCCAGCTCCCTGTGAAGAGAGAGTTAATATATTTGTTTTATTTATTTGCTTTTTGTGTTGGGATGGGTTCGTGTCCAGTCCCGGGGGTCTG,dna,SORT1 enhancer,GCF_000001405.26,,Regulatory +125,urn:mavedb:00000013-b-1,ATGGATGGTACAAGAACTTCACTTGACATTGAAGAGTACTCGGATACTGAGGTACAGAAAAACCAAGTACTAACTCTGGAAGAATGGCAAGACAAGTGGGTGAACGGCAAGACTGCTTTTCATCAGGAACAAGGACATCAGCTATTAAAGAAGCATTTAGATACTTTCCTTAAAGGCAAGAGTGGACTGAGGGTATTTTTTCCTCTTTGCGGAAAAGCGGTTGAGATGAAATGGTTTGCAGACCGGGGACACAGTGTAGTTGGTGTGGAAATCAGTGAACTTGGGATACAAGAATTTTTTACAGAGCAGAATCTTTCTTACTCAGAAGAACCAATCACCGAAATTCCTGGAACCAAAGTATTTAAGAGTTCTTCGGGGAACATTTCATTGTACTGTTGCAGTATTTTTGATCTTCCCAGGACAAATATTGGCAAATTTGACATGATTTGGGATAGAGGAGCATTAGTTGCCATTAATCCAGGTGATCGCAAATGCTATGCAGATACAATGTTTTCCCTCCTGGGAAAGAAGTTTCAGTATCTCCTGTGTGTTCTTTCTTATGATCCAACTAAACATCCAGGTCCACCATTTTATGTTCCACATGCTGAAATTGAAAGGTTGTTTGGTAAAATATGCAATATACGTTGTCTTGAGAAGGTTGATGCTTTTGAAGAACGACATAAAAGTTGGGGAATTGACTGTCTTTTTGAAAAGTTATATCTACTTACAGAAAAGTAA,dna,TPMT,GCF_000001405.26,P51580,Protein coding +126,urn:mavedb:00000093-a-1,ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGTCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAG,dna,BRCA1 translation start through RING domain,GCF_000001405.13,P38398,Protein coding +127,urn:mavedb:00000001-a-3,ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA,dna,UBE2I,GCF_000001405.26,P63279,Protein coding +128,urn:mavedb:00000045-d-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +129,urn:mavedb:00000108-a-1,ATGTCTGGTAACGGCAATGCGGCTGCAACGGCGGAAGAAAACAGCCCAAAGATGAGAGTGATTCGCGTGGGTACCCGCAAGAGCCAGCTTGCTCGCATACAGACGGACAGTGTGGTGGCAACATTGAAAGCCTCGTACCCTGGCCTGCAGTTTGAAATCATTGCTATGTCCACCACAGGGGACAAGATTCTTGATACTGCACTCTCTAAGATTGGAGAGAAAAGCCTGTTTACCAAGGAGCTTGAACATGCCCTGGAGAAGAATGAAGTGGACCTGGTTGTTCACTCCTTGAAGGACCTGCCCACTGTGCTTCCTCCTGGCTTCACCATCGGAGCCATCTGCAAGCGGGAAAACCCTCATGATGCTGTTGTCTTTCACCCAAAATTTGTTGGGAAGACCCTAGAAACCCTGCCAGAGAAGAGTGTGGTGGGAACCAGCTCCCTGCGAAGAGCAGCCCAGCTGCAGAGAAAGTTCCCGCATCTGGAGTTCAGGAGTATTCGGGGAAACCTCAACACCCGGCTTCGGAAGCTGGACGAGCAGCAGGAGTTCAGTGCCATCATCCTGGCAACAGCTGGCCTGCAGCGCATGGGCTGGCACAACCGGGTGGGGCAGATCCTGCACCCTGAGGAATGCATGTATGCTGTGGGCCAGGGGGCCTTGGGCGTGGAAGTGCGAGCCAAGGACCAGGACATCTTGGATCTGGTGGGTGTGCTGCACGATCCCGAGACTCTGCTTCGCTGCATCGCTGAAAGGGCCTTCCTGAGGCACCTGGAAGGAGGCTGCAGTGTGCCAGTAGCCGTGCATACAGCTATGAAGGATGGGCAACTGTACCTGACTGGAGGAGTCTGGAGTCTAGACGGCTCAGATAGCATACAAGAGACCATGCAGGCTACCATCCATGTCCCTGCCCAGCATGAAGATGGCCCTGAGGATGACCCACAGTTGGTAGGCATCACTGCTCGTAACATTCCACGAGGGCCCCAGTTGGCTGCCCAGAACTTGGGCATCAGCCTGGCCAACTTGTTGCTGAGCAAAGGAGCCAAAAACATCCTGGATGTTGCACGGCAGCTTAACGATGCCCATTAA,dna,HMBS,GCF_000001405.26,P08397,Protein coding +130,urn:mavedb:00000096-a-1,ATGTTAGATGACAGAGCAAGAATGGAAGCAGCCAAGAAGGAAAAGGTAGAACAAATATTAGCAGAATTTCAATTACAAGAAGAAGATTTGAAGAAAGTTATGAGAAGAATGCAAAAGGAAATGGATAGAGGTTTGAGATTAGAAACTCATGAAGAAGCTTCTGTTAAGATGTTGCCAACTTACGTTAGATCTACACCAGAAGGTTCAGAAGTTGGTGACTTTTTGTCATTAGATTTGGGTGGTACTAACTTCAGAGTTATGTTGGTTAAGGTTGGTGAAGGTGAAGAAGGTCAATGGTCTGTTAAGACAAAGCATCAAATGTACTCAATCCCAGAAGATGCTATGACTGGTACAGCAGAAATGTTGTTCGATTACATCTCTGAATGTATCTCAGATTTCTTGGATAAGCATCAAATGAAGCATAAGAAATTGCCATTGGGTTTTACTTTTTCTTTCCCAGTTAGACATGAAGATATCGATAAGGGTATCTTGTTGAACTGGACAAAGGGTTTTAAAGCTTCAGGTGCAGAAGGTAATAATGTTGTTGGTTTGTTGAGAGATGCTATTAAAAGAAGAGGTGACTTTGAAATGGATGTTGTTGCTATGGTTAACGATACTGTTGCAACAATGATCTCTTGTTACTACGAAGATCATCAATGTGAAGTTGGTATGATTGTTGGTACTGGTTGTAACGCATGTTACATGGAAGAAATGCAAAACGTTGAATTGGTTGAAGGTGACGAAGGTAGAATGTGTGTTAATACAGAATGGGGTGCTTTTGGTGACTCTGGTGAATTGGATGAATTCTTGTTGGAATACGATAGATTGGTTGATGAATCTTCAGCAAATCCAGGTCAACAATTGTACGAAAAGTTGATCGGTGGTAAATACATGGGTGAATTAGTTAGATTGGTTTTGTTGAGATTGGTTGATGAAAATTTGTTGTTCCATGGTGAAGCTTCTGAACAATTGAGAACTAGAGGTGCATTCGAAACAAGATTCGTTTCTCAAGTTGAATCAGATACTGGTGACAGAAAGCAAATCTATAACATCTTGTCTACATTAGGTTTAAGACCATCAACTACAGATTGTGATATTGTTAGAAGAGCTTGTGAATCTGTTTCAACTAGAGCTGCACATATGTGTTCTGCTGGTTTGGCAGGTGTTATTAATAGAATGAGAGAATCTAGATCAGAAGATGTTATGAGAATCACAGTTGGTGTTGATGGTTCTGTTTACAAGTTGCATCCATCTTTTAAAGAAAGATTCCATGCATCTGTTAGAAGATTGACTCCATCATGTGAAATCACTTTTATTGAATCTGAAGAAGGTTCAGGTAGAGGTGCCGCTTTAGTCAGTGCTGTTGCTTGTAAGAAGGCTTGTATGTTGGGTCAATAG,dna,GCK,GCF_000001405.26,,Protein coding +131,urn:mavedb:00000003-a-2,GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC,dna,BRCA1 RING domain,GCF_000001405.26,,Protein coding +132,urn:mavedb:00000097-c-1,TTTCTCCCCCCCTACCCTGCTAGTCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAGTAAGTTTGAATGTGTTATGTGG,dna,BRCA1 Exon 3,GCF_000001405.13,,Protein coding +133,urn:mavedb:00000106-a-1,MDSLLMNRRKFLYQFKNVRWAKGRRETYLCYVVKRRDSATSFSLDFGYLRNKNGCHVELLFLRYISDWDLDPGRCYRVTWFTSWSPCYDCARHVADFLRGNPNLSLRIFTARLYFCEDRKAEPEGLRRLHRAGVQIAIMTFKDYFYCWNTFVENHERTFKAWEGLHENSVRLSRQLRRILLPLYEVDDLRDAFRTLGL,protein,AID,GCF_000001405.26,,Protein coding +134,urn:mavedb:00000083-g-1,AGTTCACACAGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - hnRNP L binding site,GCF_000001405.13,,Other noncoding +135,urn:mavedb:00000036-a-2,ATGGACGCGCTCAAGTCGGCGGGGCGGGCGCTGATCCGGAGCCCCAGCTTGGCCAAGCAGAGCTGGGGGGGCGGTGGCCGGCACCGCAAGCTGCCTGAGAACTGGACAGACACGCGGGAGACGCTGCTGGAGGGGATGCTGTTCAGCCTCAAGTACCTGGGCATGACGCTAGTGGAGCAGCCCAAGGGTGAGGAGCTGTCGGCCGCCGCCATCAAGAGGATCGTGGCTACAGCTAAGGCCAGTGGGAAGAAGCTGCAGAAGGTGACTCTGAAGGTGTCGCCACGGGGAATTATCCTGACAGACAACCTCACCAACCAGCTCATTGAGAACGTGTCCATATACAGGATCTCCTATTGCACAGCAGACAAGATGCACGACAAGGTGTTTGCATACATCGCCCAGAGCCAGCACAACCAGAGCCTCGAGTGCCACGCCTTCCTCTGCACCAAGCGGAAGATGGCACAGGCTGTTACCCTCACCGTAGCCCAGGCCTTCAAAGTCGCCTTTGAGTTTTGGCAGGTGTCCAAGGAAGAGAAAGAGAAGAGGGACAAAGCCAGCCAAGAGGGAGGGGACGTCCTGGGGGCCCGCCAAGACTGCACCCCCCCCTTGAAGAGCTTGGTCGCCACTGGGAACCTGCTGGACTTAGAGGAGACGGCTAAGGCCCCGCTGTCCACGGTCAGCGCCAACACCACCAACATGGACGAGGTGCCGCGGCCACAAGCCTTGAGTGGCAGCAGTGTTGTCTGGGAGCTGGATGATGGCCTGGATGAAGCGTTTTCGAGGCTTGCCCAGTCTCGGACAAACCCTCAGGTCCTGGACACTGGCCTGACAGCCCAGGACATGCATTACGCCCAGTGCCTCTCGCCTGTCGACTGGGACAAGCCTGACAGCAGCGGCACAGAGCAGGATGACCTCTTCAGCTTCTGA,dna,LDLRAP1,GCF_000001405.26,,Protein coding +136,urn:mavedb:00000097-t-1,CTCTCTTCCTCTCTTCTTCCAGATCTTCAGGGGGCTAGAAATCTGTTGCTATGGGCCCTTCACCAACATGCCCACAGGTAAGAGCCTGGGAGAACCCCAG,dna,BRCA1 Exon 20,GCF_000001405.13,,Protein coding +137,urn:mavedb:00000097-l-1,TTAATTTCAGATGCTCGTGTACAAGTTTGCCAGAAAACACCACATCACTTTAACTAATCTAATTACTGAAGAGACTACTCATGTTGTTATGAAAACAGGTATACCAAG,dna,BRCA1 Exon 16,GCF_000001405.13,,Protein coding +138,urn:mavedb:00000094-a-11,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +139,urn:mavedb:00000027-b-1,TGCTTTCAGTGTGGGCCTGGGGCTGCGGGACCATGGAATGAGAGGGAGAGGATGACAAAACTGCTGGTCTTATCTAAGGGAGACAGAGAAGAGAAAAGGGGCACACCCAGTAGGCCACCCTGTCCCCACAGAATCCCTCCCCCAGAACGGCCTGCTCTCTGCCCTCATCTCCTGGCATTTCCTCTCATCCTTTTTTCCTGATAAATTTTCAATCCATTCATACTATCTGGTCATCCACGTGAATAGATATTTTTTTTTTGGCCAGTCATATGGCCCCATTTTCTTTGTACTTTACTGAAGTTAGCTCTAGTGAATCCAGGGAGCAGGGGCTGTAGGGTGGGGCTGGAGCCTGAAGAAAGACAAAAGGGATCACTGTGATAATATGGTGGGGGGAGGGTTACCCAGTTCTGACCACTTTTTTTCTCTGTCTCAACCAAGAAATGCAGAGTGCCTTCACCACTCTGTAACCT,dna,PKLR promoter,GCF_000001405.26,,Regulatory +140,urn:mavedb:00000002-a-2,GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTCCTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT,dna,hYAP65 WW domain,GCF_000001405.26,P46937,Protein coding +141,urn:mavedb:00000061-c-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +142,urn:mavedb:00000035-a-2,ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA,dna,HMGCR,GCF_000001405.26,,Protein coding +143,urn:mavedb:00000031-b-1,TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG,dna,TERT promoter,GCF_000001405.26,,Regulatory +144,urn:mavedb:00000035-a-3,ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA,dna,HMGCR,GCF_000001405.26,,Protein coding +145,urn:mavedb:00000006-a-1,TCTTCAATCTGGGTATGCTGACTCAACCAGAATAACAGTGAAAATGATAATTCAAACTAATACTGTTTACAGGGAGTTAAACTTCTACAGTGGGATTAAAGGTCTGTACCACGTTAGCACAAATGTCACCTCTCTGTTAATCATAAAACAGGGTCACAGGCCAATGTTCACCACAAGGAGACAGGAGGACAACCTGGGATGGGTAATGACAAAGAACGATTTCCGTACTCCTAAGCCTCTGCTCTCTCAGATCTCAAGC,dna,ALDOB enhancer,GCF_000001405.13,,Regulatory +146,urn:mavedb:00000094-a-13,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +147,urn:mavedb:00000045-h-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +148,urn:mavedb:00000051-c-1,CTCATTATTTTTGGGGTGATGGCTGGTGTTATTGGAACGATCCTG,dna,Glycophorin A,GCF_000001405.26,P02724,Protein coding +149,urn:mavedb:00000097-u-1,ATGTCCATTTTAGATCAACTGGAATGGATGGTACAGCTGTGTGGTGCTTCTGTGGTGAAGGAGCTTTCATCATTCACCCTTGGCACAGTAAGTATTGGGTGCCCT,dna,BRCA1 Exon 21,GCF_000001405.13,,Protein coding +150,urn:mavedb:00000097-d-1,TTTCTCCCCCCCTACCCTGCTAGTCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAGTAAGTTTGAATGTGTTATGTGG,dna,BRCA1 Exon 3,GCF_000001405.13,,Protein coding +151,urn:mavedb:00000036-a-1,ATGGACGCGCTCAAGTCGGCGGGGCGGGCGCTGATCCGGAGCCCCAGCTTGGCCAAGCAGAGCTGGGGGGGCGGTGGCCGGCACCGCAAGCTGCCTGAGAACTGGACAGACACGCGGGAGACGCTGCTGGAGGGGATGCTGTTCAGCCTCAAGTACCTGGGCATGACGCTAGTGGAGCAGCCCAAGGGTGAGGAGCTGTCGGCCGCCGCCATCAAGAGGATCGTGGCTACAGCTAAGGCCAGTGGGAAGAAGCTGCAGAAGGTGACTCTGAAGGTGTCGCCACGGGGAATTATCCTGACAGACAACCTCACCAACCAGCTCATTGAGAACGTGTCCATATACAGGATCTCCTATTGCACAGCAGACAAGATGCACGACAAGGTGTTTGCATACATCGCCCAGAGCCAGCACAACCAGAGCCTCGAGTGCCACGCCTTCCTCTGCACCAAGCGGAAGATGGCACAGGCTGTTACCCTCACCGTAGCCCAGGCCTTCAAAGTCGCCTTTGAGTTTTGGCAGGTGTCCAAGGAAGAGAAAGAGAAGAGGGACAAAGCCAGCCAAGAGGGAGGGGACGTCCTGGGGGCCCGCCAAGACTGCACCCCCCCCTTGAAGAGCTTGGTCGCCACTGGGAACCTGCTGGACTTAGAGGAGACGGCTAAGGCCCCGCTGTCCACGGTCAGCGCCAACACCACCAACATGGACGAGGTGCCGCGGCCACAAGCCTTGAGTGGCAGCAGTGTTGTCTGGGAGCTGGATGATGGCCTGGATGAAGCGTTTTCGAGGCTTGCCCAGTCTCGGACAAACCCTCAGGTCCTGGACACTGGCCTGACAGCCCAGGACATGCATTACGCCCAGTGCCTCTCGCCTGTCGACTGGGACAAGCCTGACAGCAGCGGCACAGAGCAGGATGACCTCTTCAGCTTCTGA,dna,LDLRAP1,GCF_000001405.26,,Protein coding +152,urn:mavedb:00000105-a-1,QYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTEKPEVIDASELTPAVTTYKLVINGKTLKGETTTKAVDAETAEKAFKQYANDNGVDGVWTYDDATKTFTVTEMVTEVPGDAPTEPEKPEASIPLVPLTPATPIAKDDAKKDDTKKEDAKKPEAKKDDAKKAETLPTTGEGSNPFFTAAALAVMAGAGALAVASKRKED,protein,GB1,GCF_000001405.26,,Protein coding +153,urn:mavedb:00000081-a-2,MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEISLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENSSLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPCSENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLLASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKRKRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEKESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSEEIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKEFVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLGKAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVSKRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKDKPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSMSPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKLNAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDDLLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELPCFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLFSSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEAASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALEDLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHSCSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRAPESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSGLTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKMLNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTLGTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY,protein,BRCA1,GCF_000001405.13,,Protein coding +154,urn:mavedb:00000053-a-2,CCACGCCGCATCGTCATCCACCGTGGGTCAACGGGGTTAGGCTTCAATATCGTCGGTGGAGAGGATGGTGAGGGAATCTTCATCTCATTCATTCTGGCGGGAGGACCGGCCGATTTAAGCGGAGAACTTCGCAAAGGTGACCAGATCCTTTCGGTGAATGGCGTAGATTTGCGCAACGCATCACACGAACAGGCGGCCATCGCATTAAAGAACGCCGGCCAGACCGTTACGATTATCGCGCAGTATAAA,dna,PSD95 PDZ3,GCF_000001405.26,P78352,Protein coding +155,urn:mavedb:00000060-a-1,ATGTCTGAATATATTCGGGTAACCGAAGATGAGAACGATGAGCCCATTGAAATACCATCGGAAGACGATGGGACGGTGCTGCTCTCCACGGTTACAGCCCAGTTTCCAGGGGCGTGTGGGCTTCGCTACAGGAATCCAGTGTCTCAGTGTATGAGAGGTGTCCGGCTGGTAGAAGGAATTCTGCATGCCCCAGATGCTGGCTGGGGAAATCTGGTGTATGTTGTCAACTATCCAAAAGATAACAAAAGAAAAATGGATGAGACAGATGCTTCATCAGCAGTGAAAGTGAAAAGAGCAGTCCAGAAAACATCCGATTTAATAGTGTTGGGTCTCCCATGGAAAACAACCGAACAGGACCTGAAAGAGTATTTTAGTACCTTTGGAGAAGTTCTTATGGTGCAGGTCAAGAAAGATCTTAAGACTGGTCATTCAAAGGGGTTTGGCTTTGTTCGTTTTACGGAATATGAAACACAAGTGAAAGTAATGTCACAGCGACATATGATAGATGGACGATGGTGTGACTGCAAACTTCCTAATTCTAAGCAAAGCCAAGATGAGCCTTTGAGAAGCAGAAAAGTGTTTGTGGGGCGCTGTACAGAGGACATGACTGAGGATGAGCTGCGGGAGTTCTTCTCTCAGTACGGGGATGTGATGGATGTCTTCATCCCCAAGCCATTCAGGGCCTTTGCCTTTGTTACATTTGCAGATGATCAGATTGCGCAGTCTCTTTGTGGAGAGGACTTGATCATTAAAGGAATCAGCGTTCATATATCCAATGCCGAACCTAAGCACAATAGCAATAGACAGTTAGAAAGAAGTGGAAGATTTGGTGGTAATCCAGGTGGCTTTGGGAATCAGGGTGGATTTGGTAATAGCAGAGGGGGTGGAGCTGGTTTGGGAAACAATCAAGGTAGTAATATGGGTGGTGGGATGAACTTTGGTGCGTTCAGCATTAATCCAGCCATGATGGCTGCCGCCCAGGCAGCACTACAGAGCAGTTGGGGTATGATGGGCATGTTAGCCAGCCAGCAGAACCAGTCAGGCCCATCGGGTAATAACCAAAACCAAGGCAACATGCAGAGGGAGCCAAACCAGGCCTTCGGTTCTGGAAATAACTCTTATAGTGGCTCTAATTCTGGTGCAGCAATTGGTTGGGGATCAGCATCCAATGCAGGGTCGGGCAGTGGTTTTAATGGAGGCTTTGGCTCAAGCATGGATTCTAAGTCTTCTGGCTGGGGAATG,dna,TARDBP,GCF_000001405.13,Q13148,Protein coding +156,urn:mavedb:00000013-a-1,ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA,dna,PTEN,GCF_000001405.26,P60484,Protein coding +157,urn:mavedb:00000022-a-1,GTAAATGGTGAGTAGGAAGTTGATTCTGCCCGATCTGTCTGACTCCATGCCCTTTCTATTAGGTCATGAAGGGGAACCTGAGGATTGGAGCTTTGGAATGTTAATCTTACCCAAAGGCCTGAAGTAATACCCCAGAATGTGAACATGTGTGACCATCTGCCTGTCCTGGGGGTGGGAAGAAGGCAGCATGCTCTATCCTTGACCCTGATTGAGCCCAGGGGCTGAATCTGGAGCTTTGGGGCCTGGGAACCTCTCTACCTGCGTCAATGTCTGGAGGCCCTGAGAGTTTCGCTCAGGCTCAGAGCAGGCATCGCAACCTCCCAGTTACTATTCTGTGCTGTGGCAAGTGCCAGCTTGTCCTCTCTTCCCCACCCAGCCCGGGAAACCGGCAGCATTTCTAGTTCAGGCCCAGACCCGTCCTGGCAGCCTGGATTCCACTGCCTAGGCAGGAAGCTCATCTCAGCCCAGTGACCTTTTCTCTCTGTTTTTTGTCACAGAGGAATTTCCATGCCAGCAGTATGGGGCAATGGGGGTGGGTGGCCAAAGGTTTCCCCCTTAAGCCACAAGAGCCATGGAGTGGAGGTAAGCTAAGCAAACAGAG,dna,IRF6 enhancer,GCF_000001405.26,,Regulatory +158,urn:mavedb:00000097-z-1,CTGTCTCCAGCAATTGGGCAGATGTGTGAGGCACCTGTGGTGACCCGAGAGTGGGTGTTGGACAGTGTAGCACTCTACCAGTGCCAGGAGCTGGACACCTACCTGATA,dna,BRCA1 Exon 23,GCF_000001405.13,,Protein coding +159,urn:mavedb:00000094-a-9,MAEAPPRRLGLGPPPGDAPRAELVALTAVQSEQGEAGGGGSPRRLGLLGSPLPPGAPLPGPGSGSGSACGQRSSAAHKRYRRLQNWVYNVLERPRGWAFVYHVFIFLLVFSCLVLSVLSTIQEHQELANECLLILEFVMIVVFGLEYIVRVWSAGCCCRYRGWQGRFRFARKPFCVIDFIVFVASVAVIAAGTQGNIFATSALRSMRFLQILRMVRMDRRGGTWKLLGSVVYAHSKELITAWYIGFLVLIFASFLVYLAEKDANSDFSSYADSLWWGTITLTTIGYGDKTPHTWLGRVLAAGFALLGISFFALPAGILGSGFALKVQEQHRQKHFEKRRMPAANLIQAAWRLYSTDMSRAYLTATWYYYDSILPSFRELALLFEHVQRARNGGLRPLEVRRAPVPDGAPSRYPPVATCHRPGSTSFCPGESSRMGIKDRIRMGSSQRRTGPSKQHLAPPTMPTSPSSEQVGEATSPTKVQKSWSFNDRTRFRASLRLKPRTSAEDAPSEEVAEEKSYQCELTVDDIMPAVKTVIRSIRILKFLVAKRKFKETLRPYDVKDVIEQYSAGHLDMLGRIKSLQTRVDQIVGRGPGDRKAREKGDKGPSDAEVVDEISMMGRVVKVEKQVQSIEHKLDLLLGFYSRCLRSGTSASLGAVQVPLFDPDITSDYHSPVDHEDISVSAQTLSISRSVSTNMD,protein,KCNQ4,GCF_000001405.26,,Protein coding +160,urn:mavedb:00000045-e-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +161,urn:mavedb:00000097-j-1,AGTGTGAGCAGGGAGAAGCCAGAATTGACAGCTTCAACAGAAAGGGTCAACAAAAGAATGTCCATGGTGGTGTCTGGCCTGACCCCAGAAGAATTTGTGAGTGTAT,dna,BRCA1 Exon 15,GCF_000001405.13,,Protein coding +162,urn:mavedb:00000047-b-1,GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG,dna,CCR5,GCF_000001405.26,P51681,Protein coding +163,urn:mavedb:00000003-b-1,GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC,dna,BRCA1 RING domain,GCF_000001405.26,,Protein coding +164,urn:mavedb:00000054-a-1,ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA,dna,PTEN,GCF_000001405.26,P60484,Protein coding +165,urn:mavedb:00000061-a-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +166,urn:mavedb:00000047-a-1,GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG,dna,CCR5,GCF_000001405.26,P51681,Protein coding +167,urn:mavedb:00000035-a-1,ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA,dna,HMGCR,GCF_000001405.26,,Protein coding +168,urn:mavedb:00000015-a-1,GTCCCACTGATGAACTGTGCTGCCACAGTAAATGTAGCCACTATGCCTATCTCCATTCTGAAGATGTGTCACTTCCTGTTTCAGACTCAAATCAGCCACAGTGGCAGAAGCCCACGAAATCAGAGGTGAAATTTAATAATGACCACTGCCCATTCTCTTCACTTGTCCCAAGAGGCCATTGGAAATAGTCCAAAGACCCATTGAGGGAGATGGACATTATTTCCCAGAAGTAAATACAGCTCAGCTTGTACTTTGGTACAACTAATCGACCTTACCACTTTCACAATCTGCTAGCAAAGGTTA,dna,F9 promoter,GCF_000001405.26,,Regulatory +169,urn:mavedb:00000078-b-1,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,protein,VKOR,GCF_000001405.26,,Protein coding +170,urn:mavedb:00000025-a-1,CTGCATCGCTCCATAGAGCCTGCAGAGGGCACTAGACTGGGAATTAGAAAACCTGATTTCCCTTCCAGCTCCACCTCTGACCAATTGCCTGACCCTGGTCAAATTGCTTAACCTCTTCCTATCTCAGCTCCCTATCCATAAAACAGAGGGACGAATAAACTCTCCTCCTACCACTAAGAGGTGTAGCCAGAGTTAATACCCTCATCGTCCTTTGAGCTCAGCAGATGAAAGGCACTGAGAAAAGTACAAAGAATTTTTATGTGCTATTGACTTTATTTTATTTTATGTGGGGGAGGGAGCCGGCCCCAGCTGGAAAGCTGCTTTCTCTGAATCAAAGGGCAGGAACCCAGCAAGTTTCTCAGGATTGGGGCCTTAGACTGGGCTGTGTATACAGACAGTGCCAGCCAACCCCACAGTTCAGTTTCCTTTAACCTGGTGCTCCAGGCAATAACTGTGCAACTCTGCAATTTAACAATGTGTTCTTTGTCCCACAACTGTTCTCGTTTCTCAACTGCCCAGGTAATATGTTTGGGCCTGTAGGAAGAGTCAAATAGTTAATAAGGGAAGGGTTTGGCATGCCCTACGTAAGTTCTACCAGCA,dna,MYC enhancer (rs6983267),GCF_000001405.26,,Regulatory +171,urn:mavedb:00000045-l-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +172,urn:mavedb:00000065-a-1,ATGGGGAAATCCAACAGCAAGTTGAAGCCCGAAGTTGTGGAGGAGCTGACCAGGAAGACCTACTTTACCGAGAAGGAGGTCCAGCAGTGGTACAAAGGCTTCATCAAGGACTGCCCCAGTGGGCAGCTGGATGCGGCAGGCTTCCAGAAGATCTACAAGCAATTCTTCCCGTTCGGAGACCCCACCAAGTTTGCCACATTTGTTTTCAACGTCTTTGATGAAAACAAGGACGGGCGAATTGAGTTCTCCGAGTTCATCCAGGCGCTGTCGGTGACCTCACGGGGAACCCTGGATGAGAAGCTACGGTGGGCCTTCAAGCTCTACGACTTGGACAATGATGGCTACATCACCAGGAATGAGATGCTGGACATTGTGGATGCCATTTACCAGATGGTGGGGAATACCGTGGAGCTCCCAGAGGAGGAGAACACTCCTGAGAAGAGGGTGGACCGGATCTTTGCCATGATGGATAAGAATGCCGACGGGAAGCTGACCCTGCAGGAGTTCCAGGAGGGTTCCAAGGCAGACCCGTCCATTGTGCAGGCGCTGTCCCTCTACGACGGGCTGGTATAG,dna,NCS1,GCF_000001405.26,,Protein coding +173,urn:mavedb:00000046-a-3,GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG,dna,CD86,GCF_000001405.10,P42081,Protein coding +174,urn:mavedb:00000068-c-1,ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCCTCTGAGTCAGGAAACATTTTCAGACCTATGGAAACTACTTCCTGAAAACAACGTTCTGTCCCCCTTGCCGTCCCAAGCAATGGATGATTTGATGCTGTCCCCGGACGATATTGAACAATGGTTCACTGAAGACCCAGGTCCAGATGAAGCTCCCAGAATGCCAGAGGCTGCTCCCCGCGTGGCCCCTGCACCAGCAGCTCCTACACCGGCGGCCCCTGCACCAGCCCCCTCCTGGCCCCTGTCATCTTCTGTCCCTTCCCAGAAAACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAGCACTGCCCAACAACACCAGCTCCTCTCCCCAGCCAAAGAAGAAACCACTGGATGGAGAATATTTCACCCTTCAGATCCGTGGGCGTGAGCGCTTCGAGATGTTCCGAGAGCTGAATGAGGCCTTGGAACTCAAGGATGCCCAGGCTGGGAAGGAGCCAGGGGGGAGCAGGGCTCACTCCAGCCACCTGAAGTCCAAAAAGGGTCAGTCTACCTCCCGCCATAAAAAACTCATGTTCAAGACAGAAGGGCCTGACTCAGACTAG,dna,TP53 (P72R),GCF_000001405.26,,Protein coding +175,urn:mavedb:00000005-a-2,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +176,urn:mavedb:00000061-e-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +177,urn:mavedb:00000019-a-1,GGCGTCTGGACTAGGAGCTTATTGATAACCTCAGACGTTCCAGAAGCGAGTGTGTGGAACTGCTGAAGGGTGCTTCCTTTTATTCTTCATCCCTAGCCAGCCGCCGGCCCCTGGCCTCACTGGATACTCTAAGACTATTGGTCAAGTTTGCCTTGTCAAGGCTATTGGTCAAGGCAAGGCTGGCCAACCCATGGGTGGAGTTTAGCCAGGGACCGTTTCAGACAGATATTTGCATTGAGATAGTGTGGGGAAGGGGCCCCCAAGAGGATACTGC,dna,HBG1 promoter,GCF_000001405.26,,Regulatory +178,urn:mavedb:00000055-b-1,ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG,dna,NUDT15,GCF_000001405.26,Q9NV35,Protein coding +179,urn:mavedb:00000057-a-1,ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC,dna,Ras,GCF_000001405.26,P01112,Protein coding +180,urn:mavedb:00000097-n-1,CATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAA,dna,BRCA1 Exon 17,GCF_000001405.13,,Protein coding +181,urn:mavedb:00000001-d-2,ATGGAGCATGCCTTTACCCCGTTGGAGCCCCTGCTTTCCACTGGGAATTTGAAGTACTGCCTTGTAATTCTTAATCAGCCTTTGGACAACTATTTTCGTCATCTTTGGAACAAAGCTCTTTTAAGAGCCTGTGCCGATGGAGGTGCCAACCGCTTATATGATATCACCGAAGGAGAGAGAGAAAGCTTTTTGCCTGAATTCATCAATGGAGACTTTGATTCTATTAGGCCTGAAGTCAGAGAATACTATGCTACTAAGGGATGTGAGCTCATTTCAACTCCTGATCAAGACCACACTGACTTTACTAAGTGCCTTAAAATGCTCCAAAAGAAGATAGAAGAAAAAGACTTAAAGGGAAAGCACAGGTTGCATGTAGACACTGGAATGGAGGGTGATTGGTGTGGCCTTATTCCTGTTGGACAGCCTTGTATGCAGGTTACAACCACAGGCCTCAAGTGGAACCTCACAAATGATGTGCTTGCTTTTGGAACATTGGTCAGTACTTCCAATACCTACGACGGGTCTGGTGTTGTGACTGTGGAAACTGACCACCCACTCCTCTGGACCATGGCCATCAAAAGCTAA,dna,TPK1,GCF_000001405.26,Q9H3S4,Protein coding +182,urn:mavedb:00000017-a-1,GTTGTGAATGCCGCGTCCTGTCCTGGTGACAGGAGAACAATGTTGGTGAACGTCGCAGCGGGTGTCCGAGTGCTCCGTGTGCCCCTGAGAGCGGGTGGGAGCGGAAGCCTGAGCGGCCTGCGGCCTCCGGCGATAGTGTGCTATCTGCCGCTGCAGCGCGCGTCCGCGCGGCCTCTGGGCTATTTCTGGCCAGGCCGCAGCACTGTGGTCGGTGCGGGCGTGGCAGGGGCGGGGCGGCCTTATCGCTCGGCTCTCCCGCCTACGCCTCCCGCTGCAGAGTAAGCCGGGCTGCCGTCTTCTCGCCATGGGCTCCGGTGAGTCTGGAGTCCGGTCGGGCCCCCGGCTGCTCCCTAGGCCGACCCGGGTTGAGAGGAGCTCTGGTCGT,dna,GP1BB promoter,GCF_000001405.26,,Regulatory +183,urn:mavedb:00000005-a-3,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +184,urn:mavedb:00000083-f-1,AGTTCTTCTCGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA,dna,Minigene exon - hnRNP I (PTB) binding site,GCF_000001405.13,,Other noncoding +185,urn:mavedb:00000031-d-1,TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG,dna,TERT promoter,GCF_000001405.26,,Regulatory +186,urn:mavedb:00000106-c-1,MDSLLMNRRKFLYQFKNVRWAKGRRETYLCYVVKRRDSATSFSLDFGYLRNKNGCHVELLFLRYISDWDLDPGRCYRVTWFTSWSPCYDCARHVADFLRGNPNLSLRIFTARLYFCEDRKAEPEGLRRLHRAGVQIAIMTFKDYFYCWNTFVENHERTFKAWEGLHENSVRLSRQLRRILLPLYEVDDLRDAFRTLGL,protein,AID,GCF_000001405.26,,Protein coding +187,urn:mavedb:00000020-a-1,CCCCAGAGTGCAGGACTAGGACCCGAGTGGACCTCAGGTCTGGCCAGGTCGCCATTGCCATGGAGACAGCAACAGTCCCCAGCCGCGGGTTCCCTAAGTGACTGGTTACTCTTTAACGTATCCACCCACCTTGGGTGATTAGAAGAATCAATAAGATAACCGGGCGGTGGCAGCTGGCCGCACTCACCGCCTTCCTGGTGGACGGGCTCCTGGTGGCTGTGCTGCTGCTGTGAGCGGGCCCCTGCTCCTCCATGCCCCCAGCTCTCCGGCTGGGTGGGCTTGGCC,dna,HNF4A promoter,GCF_000001405.26,,Regulatory +188,urn:mavedb:00000007-a-1,CTCTGAAGCTCAAAAGCAATGATTTGATAAGGCTTCGATTTTTAACACTTGAATTCCAACACCTTTAAAAATACTAAATGTTTCCCATTTTAAACAAGCCAAGTGAATGACTGAATTCTTAACCAAAAATAAATGTGAAGTAGATTGATATCACTCTTTGTCCATACAGAACATTATATAAATATTCTCTGGCCTTACTATCTAGCAAGGCAGGAAAAATAGATCAATTTGTTCTCACTCATAGGTGGGAATTGAACAATGAGAACACATGGACACAGGAAGGGGAACATCACACATCGGGGCCTGTTGTGGGGTGGGGGGAGGGGGGAGGGATAGCATTAGGAGATATATCTAACGTTAAATGACGTGTTAATGGGAGCAGCACACCAACATGGCACATGTATACATATGTAACAAACTGCATGTTGTGCACATGTACCCTAAAACTTAAAGTATAATAAGAAAAATAGATCAATTTACTCTACATCTGAGATTAAAAAGCAGAAAGACTCACTCACAGAGTTTCAGTATTTGACATTCAGAACCAGAAATAGAGTAACAGCGAGAACTTGAACTATTTCAGTTTAGCCTCCCACCCTCTCTGCTATCACTTCCCAAAA,dna,ECR11 enhancer,GCF_000001405.13,,Regulatory +189,urn:mavedb:00000045-b-1,ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA,dna,alpha-synuclein,GCF_000001405.10,P37840,Protein coding +190,urn:mavedb:00000048-b-1,GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC,dna,CXCR4,GCF_000001405.26,P61073,Protein coding +191,urn:mavedb:00000095-a-1,ATGGACTCCTTGGTTGTGTTGGTTCTGTGTTTATCTTGCTTACTATTGCTGAGCCTTTGGAGACAGTCTTCAGGAAGAGGTAAGTTGCCCCCGGGTCCTACACCCCTGCCTGTGATAGGTAATATATTACAGATCGGGATCAAAGATATCAGTAAGAGTCTGACTAATTTGTCCAAAGTGTATGGTCCAGTGTTTACTCTGTACTTTGGCTTGAAACCCATTGTGGTTTTGCACGGATACGAAGCTGTTAAGGAGGCACTAATCGATTTAGGCGAAGAATTCTCTGGAAGAGGCATTTTTCCATTGGCTGAAAGGGCAAACAGAGGTTTCGGTATCGTTTTTAGTAACGGAAAGAAGTGGAAGGAAATTAGAAGATTTAGTTTAATGACTCTGAGAAATTTCGGTATGGGTAAAAGATCAATTGAGGATAGAGTGCAAGAAGAAGCTCGTTGTCTTGTGGAAGAATTAAGGAAAACCAAAGCATCTCCTTGTGACCCCACTTTCATTCTAGGCTGCGCACCGTGTAACGTTATTTGCTCTATTATTTTTCATAAGAGATTTGACTATAAGGATCAACAATTCTTGAATTTAATGGAAAAGTTGAACGAAAATATTAAAATATTGTCATCACCTTGGATTCAGATTTGCAACAACTTCTCACCAATTATCGACTATTTCCCAGGTACCCACAACAAGCTACTGAAGAATGTGGCTTTCATGAAATCTTACATTCTTGAAAAAGTCAAAGAACACCAAGAATCCATGGACATGAACAATCCACAAGATTTCATTGACTGTTTCCTTATGAAAATGGAGAAGGAGAAGCACAACCAACCCTCAGAATTTACCATAGAATCCCTTGAAAATACCGCTGTCGATTTATTTGGCGCTGGAACAGAGACCACTTCAACTACCTTGCGTTACGCATTACTACTATTACTAAAACACCCAGAAGTGACCGCGAAGGTGCAAGAAGAGATTGAGAGAGTGATCGGCAGAAATAGATCTCCTTGTATGCAAGATAGATCCCATATGCCTTATACTGATGCAGTCGTACATGAAGTTCAACGTTACATCGACTTGTTGCCTACCAGCTTGCCGCATGCAGTTACTTGTGATATTAAGTTTAGGAATTACCTTATTCCGAAAGGTACCACTATTTTGATATCTCTTACCTCCGTCTTACACGATAATAAGGAGTTTCCTAATCCAGAAATGTTCGATCCTCATCACTTCCTTGACGAGGGAGGGAATTTCAAAAAAAGTAAATACTTTATGCCATTCTCAGCGGGAAAAAGAATATGCGTAGGCGAAGCCCTGGCGGGAATGGAATTGTTCTTATTCCTGACGTCCATTCTTCAAAACTTTAACTTGAAATCTCTTGTAGATCCTAAAAATTTAGATACAACTCCGGTTGTTAATGGATTTGCATCAGTGCCTCCTTTTTATCAGCTTTGTTTTATACCCGTT,dna,CYP2C9,GCF_000001405.26,,Protein coding +192,urn:mavedb:00000016-a-1,CTCGCCAGCGGTCCGCAGGGCTGGAGACCCACGCCGTGGAGAGGACCAGCCTCAGGTCGCCCCGCCTGGGCCCGCGCCCCGACCTCGCTGCCCCCGCCTCGCCTCTCTGCCCGTGGCGCTTACGGCCACCTTGGCCTCGGGGGCAGGGCATGGGCGGCCCCCGCCAGATCGCCCAGCGCCAGTACTAACTGCCCTCGCTCTGGCCTTCGAGCCCGAAGCCTCTTCTGCGCGCACAACCTAGGCAGTAATCCTAAACTAGCGGGCACCACAGACCAGCTGCAGCCACCCCAACCCAGGGATCACTTCCGGACCCCTCGACCGCCCGGCACCAGCGCGCAAGGGACCCTTCAGCCGGAGACCAGAGTCCAGTCCCGGTCACGAGGCCACCGCCGCTGCCCGCCTCGAGAAGCACCACGCGGGCTGAGCCGTCGGCTAGCGGGTCACTCCCGAGCCTCTGTCTGCACCGCGCCAGCCCCAGACCACGGACGCTGAGCCTCCAGCGCGTGCCAGCCTGGGCCGCTGGGCTCTCGGGGCCAGCCCGCGACGATCCCCTGAGCTCTCCGCAGAAGGGCCGAGCGTCCGTTCCGGGGACGCCAGGCC,dna,FOXE1 promoter,GCF_000001405.26,,Regulatory +193,urn:mavedb:00000029-b-1,TGAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGCCCCTGGACCCCCAAATGCTGCATGAACACATTTTCAGGGGAGCCTGTGCCCCCAGGCGGGGGTCGGGCAGCCCCAGCCCCTCTCCTTTTCCTGGACTCTGGCCGTGCGCGGCAGCCCAGGTGTTTGCTCAGTTGCTGACCCAAAAGTGCTTCATTTTTCGTGCCCGCCCCGCGCCCCGGGCAGGCCAGTCATGTGTTAAGTTGCGCTTCTTTGCTGTGATGTGGGTGGGGGAGGAAGAGTAAACACAGTGCTGGCTCGGCTGCCCTGAGGGTGCTCAATCAAGCACAGGTTTCAAGTCTGGGTTCTGGTGTCCACTCACCCACCCCACCCCCCAAAATCAGACAAATGCTACTTTGTCTAACCTGCTGTGGCCTCTGAGACATGTTCTATTTTTAACCCCTTCTTGGAATTGGCTCTCTTCTTCAAAGGACCAGGTCCTGTTCCTCTTTCTCCCCGACTCCACCCCAGCTCCCTGTGAAGAGAGAGTTAATATATTTGTTTTATTTATTTGCTTTTTGTGTTGGGATGGGTTCGTGTCCAGTCCCGGGGGTCTG,dna,SORT1 enhancer (flipped),GCF_000001405.26,,Regulatory +194,urn:mavedb:00000049-a-5,ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA,dna,MTHFR,GCF_000001405.26,P42898,Protein coding +195,urn:mavedb:00000097-y-1,CTGTCTCCAGCAATTGGGCAGATGTGTGAGGCACCTGTGGTGACCCGAGAGTGGGTGTTGGACAGTGTAGCACTCTACCAGTGCCAGGAGCTGGACACCTACCTGATA,dna,BRCA1 Exon 23,GCF_000001405.13,,Protein coding +196,urn:mavedb:00000005-a-1,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +197,urn:mavedb:00000097-a-1,AAGTTCATTGGAACAGAAAGAAATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGGTAA,dna,BRCA1 Exon 2,GCF_000001405.13,,Protein coding +198,urn:mavedb:00000003-a-1,GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC,dna,BRCA1 RING domain,GCF_000001405.26,,Protein coding +199,urn:mavedb:00000030-a-1,AGGTTCTGTTTCTTGCTTAGTCACTTTCTGTTTGAACAAAATTGGAATTTCCTTTTTGGATCTGTTTCTTTAATTGTAAATTGAATCGGACTAAAACCTTTCCAATTTTTTCACATGTGAAGACATACACAAAAGTTTTATTGGAGGGTTGCACATGTGAAAGAAAAAGGGAGAAAGCAGGATTGAGCAGGGGGAGCCGTCAGATGGTAATGCAGATGTGATGAGATCTCTGCCGGACCAAAGAGAAGATTCCTTTTTAAATGGTGACAAATTCATGGGCTTTCTCTGCCTCAAAACCTAGCACAGCTGTTATTTACTGAACAATTAGAGAGCTAAGCACTTTTTAGATACTATATAATTTAATTGCCGTATGAGGCACCCTTAGTTTTCAGACGAGAAACCACAGTTACAGGGAAGGCAAGTAACTTAGTCAATGTCAGATAACTAGGAAAAGGTTAGAGGGGCCCTGGACACAGGCCTGTGTGACTGAGAAGCTTGGGCACTTCACTGCTACATTTCATCTCTTCGCTATAAACATTTTAGCTTTTTGTGTTTGCTGACTGGCAACAATACATAGTGAAAGTTCTAATAATTTGTAAT,dna,TCF7L2 enhancer,GCF_000001405.26,,Regulatory +200,urn:mavedb:00000005-a-4,ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA,dna,CBS,GCF_000001405.26,P35520,Protein coding +201,urn:mavedb:00000034-a-1,TGAGATATGGCTTCATTTTCTGTAATAAACACTAAGATCAAAACATGACCCAAGTTAAATTTCCTTGCAGGGTTCCCAGCAGGGGCTTCCCTTTTGTCTGTGATTTCCTCTCACCCACCAGAACCAGGCCAAATATGCGCATGTGCCACTAACACTAAGCAGCACTTCCTTAATCACTCATTTCCAACAATTTATGGATCATCAGTGGCAAAAAACGAGCAAAAATAATGAAAGAATGCAATGAAAGCTCGTGGAGACAGAGGCTGGACTTCCTACTCACTCTGTGTCTCTTTAAGATGGAGGCCTGATACAAATTAGCCACTGGGGGGAAAAAGTCATCTGGTCATAAAATACAGTACAAGGTCACTTTTATGTAAGTTTGCCAAAAGGGACATAAACCAGGACAATTTCAAACTGTGACACAGGATAGAAACATATTAAAAAAATCTTTGTTCCTCCTCTATTGTGCTGTCATGTTGCTCAGCA,dna,ZRS enhancer,GCF_000001405.26,,Regulatory +202,urn:mavedb:00000023-a-2,AGCTCTTCACCGGAGACCCAAATACAACAAATCAAGTCGCCTGCCCTGGCGACACTTTCGAAGGACTGGAGTGGGAATCAGAGCTTCACGGGTTAAAAAGCCGATGTCACATCGGCCGTTCGAAACTCCTCCTCTTGCAGTGAGGTGAAGACATTTGAAAATCACCCCACTGCAAACTCCTCCCCCTGCTAGAAACCTCACATTGAAATGCTGTAAATGACGTGGGCCCCGAGTGCAATCGCGGGAAGCCAGGGTTTCCAGCTAGGACACAGCAGGTCGTGATCCGGGTCGGGACACTGCCTGGCAGAGGCTGCGAGC,dna,LDLR promoter,GCF_000001405.26,,Regulatory +203,urn:mavedb:00000002-a-1,GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTCCTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT,dna,hYAP65 WW domain,GCF_000001405.26,P46937,Protein coding +204,urn:mavedb:00000107-a-1,MDAPRQVVNFGPGPAKLPHSVLLEIQKELLDYKGVGISVLEMSHRSSDFAKIINNTENLVRELLAVPDNYKVIFLQGGGCGQFSAVPLNLIGLKAGRCADYVVTGAWSAKAAEEAKKFGTINIVHPKLGSYTKIPDPSTWNLNPDASYVYYCANETVHGVEFDFIPDVKGAVLVCDMSSNFLSKPVDVSKFGVIFAGAQKNVGSAGVTVVIVRDDLLGFALRECPSVLEYKVQAGNSSLYNTPPCFSIYVMGLVLEWIKNNGGAAAMEKLSSIKSQTIYEIIDNSQGFYVCPVEPQNRSKMNIPFRIGNAKGDDALEKRFLDKALELNMLSLKGHRSVGGIRASLYNAVTIEDVQKLAAFMKKFLEMHQL,protein,PSAT1,GCF_000001405.26,,Protein coding +205,urn:mavedb:00000103-d-1,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,protein,MAPK1,GCF_000001405.26,,Protein coding +206,urn:mavedb:00000029-a-2,GAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGCCCCTGGACCCCCAAATGCTGCATGAACACATTTTCAGGGGAGCCTGTGCCCCCAGGCGGGGGTCGGGCAGCCCCAGCCCCTCTCCTTTTCCTGGACTCTGGCCGTGCGCGGCAGCCCAGGTGTTTGCTCAGTTGCTGACCCAAAAGTGCTTCATTTTTCGTGCCCGCCCCGCGCCCCGGGCAGGCCAGTCATGTGTTAAGTTGCGCTTCTTTGCTGTGATGTGGGTGGGGGAGGAAGAGTAAACACAGTGCTGGCTCGGCTGCCCTGAGGGTGCTCAATCAAGCACAGGTTTCAAGTCTGGGTTCTGGTGTCCACTCACCCACCCCACCCCCCAAAATCAGACAAATGCTACTTTGTCTAACCTGCTGTGGCCTCTGAGACATGTTCTATTTTTAACCCCTTCTTGGAATTGGCTCTCTTCTTCAAAGGACCAGGTCCTGTTCCTCTTTCTCCCCGACTCCACCCCAGCTCCCTGTGAAGAGAGAGTTAATATATTTGTTTTATTTATTTGCTTTTTGTGTTGGGATGGGTTCGTGTCCAGTCCCGGGGGTCTG,dna,SORT1 enhancer,GCF_000001405.26,,Regulatory +207,urn:mavedb:00000061-b-1,TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC,dna,RAF,GCF_000001405.26,P04049,Protein coding +208,urn:mavedb:00000097-q-1,TTTCTTTCAGCATGATTTTGAAGTCAGAGGAGATGTGGTCAATGGAAGAAACCACCAAGGTCCAAAGCGAGCAAGAGAATCCCAGGACAGAAAGGTAAAGCTCC,dna,BRCA1 Exon 19,GCF_000001405.13,,Protein coding +209,urn:mavedb:00000109-a-1,ATGCAGTATCTAAATATAAAAGAGGACTGCAATGCCATGGCTTTCTGTGCTAAAATGAGGAGCTCCAAGAAGACTGAGGTGAACCTGGAGGCCCCTGAGCCAGGGGTGGAAGTGATCTTCTATCTGTCGGACAGGGAGCCCCTCCGGCTGGGCAGTGGAGAGTACACAGCAGAGGAACTGTGCATCAGGGCTGCACAGGCATGCCGTATCTCTCCTCTTTGTCACAACCTCTTTGCCCTGTATGACGAGAACACCAAGCTCTGGTATGCTCCAAATCGCACCATCACCGTTGATGACAAGATGTCCCTCCGGCTCCACTACCGGATGAGGTTCTATTTCACCAATTGGCATGGAACCAACGACAATGAGCAGTCAGTGTGGCGTCATTCTCCAAAGAAGCAGAAAAATGGCTACGAGAAAAAAAAGATTCCAGATGCAACCCCTCTCCTTGATGCCAGCTCACTGGAGTATCTGTTTGCTCAGGGACAGTATGATTTGGTGAAATGCCTGGCTCCTATTCGAGACCCCAAGACCGAGCAGGATGGACATGATATTGAGAACGAGTGTCTAGGGATGGCTGTCCTGGCCATCTCACACTATGCCATGATGAAGAAGATGCAGTTGCCAGAACTGCCCAAGGACATCAGCTACAAGCGATATATTCCAGAAACATTGAATAAGTCCATCAGACAGAGGAACCTTCTCACCAGGATGCGGATAAATAATGTTTTCAAGGATTTCCTAAAGGAATTTAACAACAAGACCATTTGTGACAGCAGCGTGTCCACGCATGACCTGAAGGTGAAATACTTGGCTACCTTGGAAACTTTGACAAAACATTACGGTGCTGAAATATTTGAGACTTCCATGTTACTGATTTCATCAGAAAATGAGATGAATTGGTTTCATTCGAATGACGGTGGAAACGTTCTCTACTACGAAGTGATGGTGACTGGGAATCTTGGAATCCAGTGGAGGCATAAACCAAATGTTGTTTCTGTTGAAAAGGAAAAAAATAAACTGAAGCGGAAAAAACTGGAAAATAAACACAAGAAGGATGAGGAGAAAAACAAGATCCGGGAAGAGTGGAACAATTTTTCTTACTTCCCTGAAATCACTCACATTGTAATAAAGGAGTCTGTGGTCAGCATTAACAAGCAGGACAACAAGAAAATGGAACTGAAGCTCTCTTCCCACGAGGAGGCCTTGTCCTTTGTGTCCCTGGTAGATGGCTACTTCCGGCTCACAGCAGATGCCCATCATTACCTCTGCACCGACGTGGCCCCCCCGTTGATCGTCCACAACATACAGAATGGCTGTCATGGTCCAATCTGTACAGAATACGCCATCAATAAATTGCGGCAAGAAGGAAGCGAGGAGGGGATGTACGTGCTGAGGTGGAGCTGCACCGACTTTGACAACATCCTCATGACCGTCACCTGCTTTGAGAAGTCTGAGCAGGTGCAGGGTGCCCAGAAGCAGTTCAAGAACTTTCAGATCGAGGTGCAGAAGGGCCGCTACAGTCTGCACGGTTCGGACCGCAGCTTCCCCAGCTTGGGAGACCTCATGAGCCACCTCAAGAAGCAGATCCTGCGCACGGATAACATCAGCTTCATGCTAAAACGCTGCTGCCAGCCCAAGCCCCGAGAAATCTCCAACCTGCTGGTGGCTACTAAGAAAGCCCAGGAGTGGCAGCCCGTCTACCCCATGAGCCAGCTGAGTTTCGATCGGATCCTCAAGAAGGATCTGGTGCAGGGCGAGCACCTTGGGAGAGGCACGAGAACACACATCTATTCTGGGACCCTGATGGATTACAAGGATGACGAAGGAACTTCTGAAGAGAAGAAGATAAAAGTGATCCTCAAAGTCTTAGACCCCAGCCACAGGGATATTTCCCTGGCCTTCTTCGAGGCAGCCAGCATGATGAGACAGGTCTCCCACAAACACATCGTGTACCTCTATGGCGTCTGTGTCCGCGACGTGGAGAATATCATGGTGGAAGAGTTTGTGGAAGGGGGTCCTCTGGATCTCTTCATGCACCGGAAAAGCGATGTCCTTACCACACCATGGAAATTCAAAGTTGCCAAACAGCTGGCCAGTGCCCTGAGCTACTTGGAGGATAAAGACCTGGTCCATGGAAATGTGTGTACTAAAAACCTCCTCCTGGCCCGTGAGGGCATCGACAGTGAGTGTGGCCCATTCATCAAGCTCAGTGACCCCGGCATCCCCATTACGGTGCTGTCTAGGCAAGAATGCATTGAACGAATCCCATGGATTGCTCCTGAGTGTGTTGAGGACTCCAAGAACCTGAGTGTGGCTGCTGACAAGTGGAGCTTTGGAACCACGCTCTGGGAAATCTGCTACAATGGCGAGATCCCCTTGAAAGACAAGACGCTGATTGAGAAAGAGAGATTCTATGAAAGCCGGTGCAGGCCAGTGACACCATCATGTAAGGAGCTGGCTGACCTCATGACCCGCTGCATGAACTATGACCCCAATCAGAGGCCTTTCTTCCGAGCCATCATGAGAGACATTAATAAGCTTGAAGAGCAGAATCCAGATATTGTTTCAGAAAAAAAACCAGCAACTGAAGTGGACCCCACACATTTTGAAAAGCGCTTCCTAAAGAGGATCCGTGACTTGGGAGAGGGCCACTTTGGGAAGGTTGAGCTCTGCAGGTATGACCCCGAAGGGGACAATACAGGGGAGCAGGTGGCTGTTAAATCTCTGAAGCCTGAGAGTGGAGGTAACCACATAGCTGATCTGAAAAAGGAAATCGAGATCTTAAGGAACCTCTATCATGAGAACATTGTGAAGTACAAAGGAATCTGCACAGAAGACGGAGGAAATGGTATTAAGCTCATCATGGAATTTCTGCCTTCGGGAAGCCTTAAGGAATATCTTCCAAAGAATAAGAACAAAATAAACCTCAAACAGCAGCTAAAATATGCCGTTCAGATTTGTAAGGGGATGGACTATTTGGGTTCTCGGCAATACGTTCACCGGGACTTGGCAGCAAGAAATGTCCTTGTTGAGAGTGAACACCAAGTGAAAATTGGAGACTTCGGTTTAACCAAAGCAATTGAAACCGATAAGGAGTATTACACCGTCAAGGATGACCGGGACAGCCCTGTGTTTTGGTATGCTCCAGAATGTTTAATGCAATCTAAATTTTATATTGCCTCTGACGTCTGGTCTTTTGGAGTCACTCTGCATGAGCTGCTGACTTACTGTGATTCAGATTCTAGTCCCATGGCTTTGTTCCTGAAAATGATAGGCCCAACCCATGGCCAGATGACAGTCACAAGACTTGTGAATACGTTAAAAGAAGGAAAACGCCTGCCGTGCCCACCTAACTGTCCAGATGAGGTTTATCAACTTATGAGGAAATGCTGGGAATTCCAACCATCCAATCGGACAAGCTTTCAGAACCTTATTGAAGGATTTGAAGCACTTTTAAAATAA,dna,JAK,GCF_000001405.26,,protein_coding +210,urn:mavedb:00000112-a-1,MLFNLRILLNNAAFRNGHNFMVRNFRCGQPLQNKVQLKGRDLLTLKNFTGEEIKYMLWLSADLKFRIKQKGEYLPLLQGKSLGMIFEKRSTRTRLSTETGFALLGGHPCFLTTQDIHLGVNESLTDTARVLSSMADAVLARVYKQSDLDTLAKEASIPIINGLSDLYHPIQILADYLTLQEHYSSLKGLTLSWIGDGNNILHSIMMSAAKFGMHLQAATPKGYEPDASVTKLAEQYAKENGTKLLLTNDPLEAAHGGNVLITDTWISMGQEEEKKKRLQAFQGYQVTMKTAKVAASDWTFLHCLPRKPEEVDDEVFYSPRSLVFPEAENRKWTIMAVMVSLLTDYSPQLQKPKF,protein,OTC,GCF_000001405.26,,protein_coding +211,urn:mavedb:00000113-a-1,DAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIA,protein,APP,GCF_000001405.26,P05067,protein_coding +212,urn:mavedb:00000113-a-2,DAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIA,protein,APP,GCF_000001405.26,P05067,protein_coding +213,urn:mavedb:00000113-a-3,DAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIA,protein,APP,GCF_000001405.26,P05067,protein_coding diff --git a/notebooks/analysis/mave_mapping_fig_3b.R b/notebooks/analysis/mave_mapping_fig_3b.R index cd5caa0..8ff92c2 100644 --- a/notebooks/analysis/mave_mapping_fig_3b.R +++ b/notebooks/analysis/mave_mapping_fig_3b.R @@ -37,7 +37,7 @@ df <- data.frame('Experiment Cellular Context' = names, 'value' = context_counts ggplot(df, aes(x = factor(Experiment.Cellular.Context, levels = c('Human', 'Yeast', 'Bacteria', 'Mouse', 'Bacteriophage', 'N/A')), y = value, fill = rownames(df))) + geom_bar(stat = 'identity', fill = c("#F8766D","#B79F00","#90ee90","#00BFC4","#619CFF","#F564E3")) + - geom_text(aes(label = value), vjust = ifelse(df$value != 92, -1, 3), size = 10, colour = ifelse(df$value == 97, 'white', 'black')) + + geom_text(aes(label = value), vjust = ifelse(df$value != 86, -1, 3), size = 10, colour = ifelse(df$value == 97, 'white', 'black')) + xlab('MAVE Experiment Cellular Context') + ylab('Number of Experiments') + scale_y_continuous(expand = c(0, 0)) + diff --git a/notebooks/sally/mavedb_mapping.ipynb b/notebooks/sally/mavedb_mapping.ipynb new file mode 100644 index 0000000..5f38e1c --- /dev/null +++ b/notebooks/sally/mavedb_mapping.ipynb @@ -0,0 +1,3322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3e66de94", + "metadata": {}, + "source": [ + "# MaveDB Mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f9dce63c", + "metadata": {}, + "outputs": [], + "source": [ + "# Load Required Packages\n", + "import io\n", + "import re\n", + "import requests\n", + "import hgvs\n", + "import base64, hashlib\n", + "from ga4gh.vrs import models, vrs_deref, vrs_enref\n", + "from ga4gh.core import ga4gh_identify, ga4gh_serialize, ga4gh_digest, ga4gh_deref, sha512t24u\n", + "from ga4gh.vrs.extras.translator import AlleleTranslator\n", + "from ga4gh.vrs.dataproxy import SeqRepoDataProxy\n", + "from ga4gh.vrs.normalize import normalize\n", + "import pandas as pd\n", + "from gene.query import QueryHandler\n", + "from gene.database import create_db\n", + "import nest_asyncio\n", + "import asyncio\n", + "#from cool_seq_tool.cool_seq_tool import CoolSeqTool\n", + "#from cool_seq_tool.data_sources.uta_database import UTADatabase\n", + "from cool_seq_tool.sources.uta_database import UtaDatabase as UTADatabase # use alias for back-compatibility with the rest of the notebook\n", + "#from cool_seq_tool.data_sources.mane_transcript_mappings import MANETranscriptMappings\n", + "from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings as MANETranscriptMappings # use alias for back-compatibility with the rest of the notebook\n", + "import pickle\n", + "from os import environ\n", + "import Bio\n", + "from Bio.SeqUtils import seq1\n", + "from Bio.Seq import Seq\n", + "from biocommons.seqrepo import SeqRepo\n", + "from bs4 import BeautifulSoup\n", + "sr = SeqRepo(\"/usr/local/share/seqrepo/2021-01-29\", writeable = True)\n", + "environ[\"UTA_DB_URL\"] = 'postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta'\n", + "environ[\"GENE_NORM_DB_URL\"] = 'postgres://postgres:postgres@localhost:5432/gene_normalizer'\n", + "#from pyliftover import LiftOver\n", + "import subprocess\n", + "import mavehgvs" + ] + }, + { + "cell_type": "markdown", + "id": "ba98fdd2", + "metadata": {}, + "source": [ + "# The blocks below can be run to reproduce the output in the results directory" + ] + }, + { + "cell_type": "markdown", + "id": "a6d05229", + "metadata": {}, + "source": [ + "## Process Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "caa00b72", + "metadata": {}, + "outputs": [], + "source": [ + "# copied with slight mod from src/dcd_mapping/resources.py\n", + "def _get_uniprot_ref(scoreset_json):\n", + " \"\"\"Extract UniProt reference from scoreset metadata if available.\n", + "\n", + " :param scoreset_json: parsed JSON from scoresets API\n", + " :return: UniProt ID if available\n", + " \"\"\"\n", + " ext_ids = scoreset_json[\"targetGenes\"][0].get(\"externalIdentifiers\")\n", + " if not ext_ids:\n", + " return None\n", + " for ext_id in ext_ids:\n", + " if ext_id.get(\"identifier\", {}).get(\"dbName\") == \"UniProt\":\n", + " return f\"uniprot:{ext_id['identifier']['identifier']}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "25a899bc", + "metadata": {}, + "outputs": [], + "source": [ + "# this replaces the several blocks above\n", + "# only get metadata for a subset of scoresets\n", + "# and use new API documentation to parse the json responses\n", + "\n", + "urns = list()\n", + "target_sequences = list()\n", + "target_sequence_type = list()\n", + "targets = list()\n", + "assembly = list()\n", + "uniprot = list()\n", + "target_type = list()\n", + "\n", + "scoreset_urns = ['urn:mavedb:00000041-a-1',\n", + " 'urn:mavedb:00000048-a-1',\n", + " 'urn:mavedb:00000068-b-1,'\n", + " 'urn:mavedb:00000045-c-1',\n", + " 'urn:mavedb:00000018-a-1',\n", + " 'urn:mavedb:00000107-a-1',\n", + " 'urn:mavedb:00000103-d-1',\n", + " 'urn:mavedb:00000029-a-2',\n", + " 'urn:mavedb:00000061-b-1',\n", + " 'urn:mavedb:00000097-q-1',\n", + " 'urn:mavedb:00000003-a-1',\n", + " 'urn:mavedb:00000097-i-1',\n", + " 'urn:mavedb:00000099-a-1'\n", + " ]\n", + "for scoreset_urn in scoreset_urns:\n", + " url = f\"https://api.mavedb.org/api/v1/score-sets/{scoreset_urn}\"\n", + " response = requests.get(url)\n", + " json_parse = response.json()\n", + " if 'targetGenes' in json_parse:\n", + " if len(json_parse['targetGenes']) == 1:\n", + " if json_parse['targetGenes'][0]['targetSequence']['reference']['organismName'] == 'Homo sapiens':\n", + " urns.append(json_parse['urn'])\n", + " target_sequences.append(json_parse['targetGenes'][0]['targetSequence']['sequence'])\n", + " target_sequence_type.append(json_parse['targetGenes'][0]['targetSequence']['sequenceType'])\n", + " targets.append(json_parse['targetGenes'][0]['name'])\n", + " assembly.append(json_parse['targetGenes'][0]['targetSequence']['reference']['shortName'])\n", + " uniprot.append(_get_uniprot_ref(json_parse))\n", + " target_type.append(json_parse['targetGenes'][0]['category'])\n", + "\n", + "# Create, save dataframe\n", + "dat = {'urn': urns, 'target_sequence': target_sequences, 'target_sequence_type': target_sequence_type, 'target':targets, \n", + " 'assembly_id':assembly, 'uniprot_id':uniprot, 'target_type':target_type}\n", + "dat = pd.DataFrame(data=dat)\n", + "dat.to_csv('mave_dat.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "de00b98f", + "metadata": {}, + "source": [ + "## Part 1: MaveDB Metadata to BLAT Alignment Data" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "436f661c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urntarget_sequencetarget_sequence_typetargetassembly_iduniprot_idtarget_type
0urn:mavedb:00000041-a-1CTGCGGCTGGAGGTCAAGCTGGGCCAGGGCTGCTTTGGCGAGGTGT...dnaSrc catalytic domainhg38uniprot:P12931Protein coding
1urn:mavedb:00000048-a-1GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGG...dnaCXCR4hg38uniprot:P61073Protein coding
2urn:mavedb:00000018-a-1GGTGTCTGTTTGAGGTTGCTAGTGAACACAGTTGTGTCAGAAGCAA...dnaHBB promoterhg38NaNRegulatory
3urn:mavedb:00000107-a-1MDAPRQVVNFGPGPAKLPHSVLLEIQKELLDYKGVGISVLEMSHRS...proteinPSAT1hg38NaNProtein coding
4urn:mavedb:00000103-d-1MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNV...proteinMAPK1hg38NaNProtein coding
5urn:mavedb:00000029-a-2GAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGC...dnaSORT1 enhancerhg38NaNRegulatory
6urn:mavedb:00000061-b-1TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAA...dnaRAFhg38uniprot:P04049Protein coding
7urn:mavedb:00000097-q-1TTTCTTTCAGCATGATTTTGAAGTCAGAGGAGATGTGGTCAATGGA...dnaBRCA1 Exon 19hg19NaNProtein coding
8urn:mavedb:00000003-a-1GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATG...dnaBRCA1 RING domainhg38NaNProtein coding
9urn:mavedb:00000097-i-1AGTGTGAGCAGGGAGAAGCCAGAATTGACAGCTTCAACAGAAAGGG...dnaBRCA1 Exon 15hg19NaNProtein coding
10urn:mavedb:00000099-a-1ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATG...dnaRHOhg38NaNProtein coding
\n", + "
" + ], + "text/plain": [ + " urn \\\n", + "0 urn:mavedb:00000041-a-1 \n", + "1 urn:mavedb:00000048-a-1 \n", + "2 urn:mavedb:00000018-a-1 \n", + "3 urn:mavedb:00000107-a-1 \n", + "4 urn:mavedb:00000103-d-1 \n", + "5 urn:mavedb:00000029-a-2 \n", + "6 urn:mavedb:00000061-b-1 \n", + "7 urn:mavedb:00000097-q-1 \n", + "8 urn:mavedb:00000003-a-1 \n", + "9 urn:mavedb:00000097-i-1 \n", + "10 urn:mavedb:00000099-a-1 \n", + "\n", + " target_sequence target_sequence_type \\\n", + "0 CTGCGGCTGGAGGTCAAGCTGGGCCAGGGCTGCTTTGGCGAGGTGT... dna \n", + "1 GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGG... dna \n", + "2 GGTGTCTGTTTGAGGTTGCTAGTGAACACAGTTGTGTCAGAAGCAA... dna \n", + "3 MDAPRQVVNFGPGPAKLPHSVLLEIQKELLDYKGVGISVLEMSHRS... protein \n", + "4 MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNV... protein \n", + "5 GAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGC... dna \n", + "6 TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAA... dna \n", + "7 TTTCTTTCAGCATGATTTTGAAGTCAGAGGAGATGTGGTCAATGGA... dna \n", + "8 GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATG... dna \n", + "9 AGTGTGAGCAGGGAGAAGCCAGAATTGACAGCTTCAACAGAAAGGG... dna \n", + "10 ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATG... dna \n", + "\n", + " target assembly_id uniprot_id target_type \n", + "0 Src catalytic domain hg38 uniprot:P12931 Protein coding \n", + "1 CXCR4 hg38 uniprot:P61073 Protein coding \n", + "2 HBB promoter hg38 NaN Regulatory \n", + "3 PSAT1 hg38 NaN Protein coding \n", + "4 MAPK1 hg38 NaN Protein coding \n", + "5 SORT1 enhancer hg38 NaN Regulatory \n", + "6 RAF hg38 uniprot:P04049 Protein coding \n", + "7 BRCA1 Exon 19 hg19 NaN Protein coding \n", + "8 BRCA1 RING domain hg38 NaN Protein coding \n", + "9 BRCA1 Exon 15 hg19 NaN Protein coding \n", + "10 RHO hg38 NaN Protein coding " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dat = pd.read_csv('mave_dat.csv', index_col=0)\n", + "dat" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b2de5a8a", + "metadata": {}, + "outputs": [], + "source": [ + "# Alignment Helper Function\n", + "def get_gene_data(i, blat_chr, return_chr):\n", + " qh = QueryHandler(create_db())\n", + " try:\n", + " uniprot = dat.at[i,'uniprot_id']\n", + " gsymb = qh.normalize(str(f'uniprot:{uniprot}')).gene_descriptor.label\n", + " except:\n", + " try:\n", + " target = dat.at[i, 'target'].split(' ')[0]\n", + " gsymb = qh.normalize(target).gene_descriptor.label\n", + " except:\n", + " return 'NA' # if gsymb cannot be extracted\n", + " \n", + " temp = qh.search(gsymb).source_matches\n", + " source_dict = {}\n", + " for i in range(len(temp)):\n", + " source_dict[temp[i].source] = i\n", + " \n", + " if 'HGNC' in source_dict and return_chr == True:\n", + " chrom = temp[source_dict['HGNC']].records[0].locations[0].chr\n", + " return chrom\n", + " \n", + " if 'Ensembl' in source_dict and return_chr == False and len(temp[source_dict['Ensembl']].records) != 0:\n", + " for j in range(len(temp[source_dict['Ensembl']].records)):\n", + " for k in range(len(temp[source_dict['Ensembl']].records[j].locations)):\n", + " if temp[source_dict['Ensembl']].records[j].locations[k].interval.type == 'SequenceInterval': # Multiple records per source\n", + " start = temp[source_dict['Ensembl']].records[j].locations[k].interval.start.value\n", + " end = temp[source_dict['Ensembl']].records[j].locations[k].interval.end.value\n", + " loc_list = {}\n", + " loc_list['start'] = start\n", + " loc_list['end'] = end\n", + " return loc_list\n", + " if 'NCBI' in source_dict and return_chr == False and len(temp[source_dict['NCBI']].records) != 0:\n", + " for j in range(len(temp[source_dict['NCBI']].records)):\n", + " for k in range(len(temp[source_dict['NCBI']].records[j].locations)):\n", + " if temp[source_dict['NCBI']].records[j].locations[k].interval.type == 'SequenceInterval':\n", + " start = temp[source_dict['NCBI']].records[j].locations[k].interval.start.value\n", + " end = temp[source_dict['NCBI']].records[j].locations[k].interval.end.value\n", + " loc_list = {}\n", + " loc_list['start'] = start\n", + " loc_list['end'] = end\n", + " return loc_list\n", + " return 'NA' " + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "b52e25b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: RHO\n", + " \n", + " Hit: chr3 (198295559)\n", + " \n", + " HSPs: ---- -------- --------- ------ --------------- ---------------------\n", + " # E-value Bit score Span Query range Hit range\n", + " ---- -------- --------- ------ --------------- ---------------------\n", + " 0 ? ? ? [0:1047] [129528733:129533718]\n", + "129528733\n" + ] + } + ], + "source": [ + "# playing around with blat results\n", + "from Bio import SearchIO\n", + "result = SearchIO.read('blat_out.psl', 'blat-psl')\n", + "print(result[0])\n", + "print(result[0][0].hit_start)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "ae862b4f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000041-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 750 bases in 1 sequences\n", + "added urn:mavedb:00000041-a-1 to dict\n", + "urn:mavedb:00000048-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 1053 bases in 1 sequences\n", + "added urn:mavedb:00000048-a-1 to dict\n", + "urn:mavedb:00000018-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 187 bases in 1 sequences\n", + "added urn:mavedb:00000018-a-1 to dict\n", + "urn:mavedb:00000107-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Blatx 455 sequences in database, 1 files in query\n", + "added urn:mavedb:00000107-a-1 to dict\n", + "urn:mavedb:00000103-d-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Blatx 455 sequences in database, 1 files in query\n", + "added urn:mavedb:00000103-d-1 to dict\n", + "urn:mavedb:00000029-a-2\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 600 bases in 1 sequences\n", + "added urn:mavedb:00000029-a-2 to dict\n", + "urn:mavedb:00000061-b-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 117 bases in 1 sequences\n", + "added urn:mavedb:00000061-b-1 to dict\n", + "urn:mavedb:00000097-q-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 104 bases in 1 sequences\n", + "added urn:mavedb:00000097-q-1 to dict\n", + "urn:mavedb:00000003-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 909 bases in 1 sequences\n", + "added urn:mavedb:00000003-a-1 to dict\n", + "urn:mavedb:00000097-i-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 106 bases in 1 sequences\n", + "added urn:mavedb:00000097-i-1 to dict\n", + "urn:mavedb:00000099-a-1\n", + "Loaded 3209286105 letters in 455 sequences\n", + "Searched 1047 bases in 1 sequences\n", + "added urn:mavedb:00000099-a-1 to dict\n" + ] + } + ], + "source": [ + "# Get Query and Hit Ranges for Each Human Target Sequence\n", + "from Bio import SearchIO\n", + "mave_blat_dict = {}\n", + "blat_exec_path = '/Users/sallybg/workspace/blat/bin/blat'\n", + "blat_ref_path = '/Users/sallybg/workspace/blat/hg38.2bit'\n", + "\n", + "for i in range(len(dat.index)):\n", + " print(dat.at[i, 'urn'])\n", + " blat_file = open('blat_query.fa', 'w')\n", + " blat_file.write('>' + dat.at[i, 'target'] + '\\n')\n", + " blat_file.write(dat.at[i, 'target_sequence'] + '\\n')\n", + " blat_file.close()\n", + "\n", + " if dat.at[i, 'target_sequence_type'] == 'protein':\n", + " subprocess.run([blat_exec_path, blat_ref_path, '-q=prot', '-t=dnax', '-minScore=20', 'blat_query.fa', 'blat_out.psl'])\n", + " else:\n", + " subprocess.run([blat_exec_path, blat_ref_path, '-minScore=20', 'blat_query.fa', 'blat_out.psl'])\n", + "\n", + " # Extract ranges\n", + " chrom = ''\n", + " strand = ''\n", + " target = ''\n", + " target_type = ''\n", + " coverage = None\n", + " identity = None\n", + " query_ranges = list()\n", + " hit_ranges = list()\n", + " \n", + " try:\n", + " output = SearchIO.read('blat_out.psl', 'blat-psl')\n", + " except:\n", + " try:\n", + " subprocess.run([blat_exec_path, blat_ref_path, '-q=dnax', '-t=dnax', '-minScore=20', 'blat_query.fa', 'blat_out.psl'])\n", + " output = SearchIO.read('blat_out.psl', 'blat-psl')\n", + " except:\n", + " qh_dat = {'query_ranges': list('NA'), 'hit_ranges': list('NA')}\n", + " qh_dat = pd.DataFrame(data = qh_dat)\n", + " mave_blat_dict[dat.at[i, 'urn']] = {'chrom': 'NA', 'strand': 'NA', 'target': 'NA', 'target_type': 'NA',\n", + " 'uniprot': 'NA','coverage': 'NA','identity':'NA', 'hits': qh_dat}\n", + " continue\n", + "\n", + " # Find chromosome to select hit from\n", + " hit_scores = list()\n", + " hit_dict = {}\n", + " use_chr = False\n", + " \n", + " for c in range(len(output)):\n", + " correct_chr = get_gene_data(i,output[c].id.strip('chr'), return_chr = True)\n", + " if correct_chr == output[c].id.strip('chr'):\n", + " use_chr = True\n", + " break\n", + " if correct_chr == 'NA': # Take top scoring hit if target not found using gene normalizer\n", + " hit_scores = list()\n", + " for e in range(len(output[c])):\n", + " hit_scores.append(output[c][e].score)\n", + " hit_dict[c] = hit_scores\n", + "\n", + " if use_chr == False:\n", + " for key in hit_dict:\n", + " hit_dict[key] = max(hit_dict[key])\n", + " hit = max(hit_dict, key = hit_dict.get)\n", + " else:\n", + " hit = c\n", + " \n", + " \n", + " # Use location provided by gene normalizer to find hsp\n", + " loc_dict = get_gene_data(i, output[hit].id.strip('chr'), return_chr = False)\n", + " \n", + " hit_starts = list()\n", + " for n in range(len(output[hit])):\n", + " hit_starts.append(output[hit][n].hit_start)\n", + " \n", + " sub_scores = list()\n", + " for n in range(len(output[hit])):\n", + " sub_scores.append(output[hit][n].score)\n", + " \n", + " if loc_dict == 'NA':\n", + " hsp = output[hit][sub_scores.index(max(sub_scores))] # Take top score if no match found \n", + " else:\n", + " hsp = output[hit][hit_starts.index(min(hit_starts, key=lambda x:abs(x - loc_dict['start'])))]\n", + "\n", + " \n", + " for j in range(len(hsp)):\n", + " test_file = open('blat_output_test.txt', 'w')\n", + " test_file.write(str(hsp[j]))\n", + " test_file.close()\n", + "\n", + " query_string = ''\n", + " hit_string = ''\n", + " strand = hsp[0].query_strand\n", + " coverage = 100 * (hsp.query_end - hsp.query_start) / output.seq_len\n", + " coverage = f\"{hsp.query_end - hsp.query_start} / {output.seq_len}, {coverage}\" \n", + " identity = hsp.ident_pct\n", + "\n", + " test_file = open('blat_output_test.txt', 'r')\n", + " for k,line in enumerate(test_file):\n", + " if k == 1:\n", + " chrom = line.strip('\\n')\n", + " if k == 2:\n", + " query_string = line.strip('\\n')\n", + " if k == 3:\n", + " hit_string = line.strip('\\n')\n", + " test_file.close()\n", + "\n", + " chrom = chrom.split(' ')[9].strip('chr')\n", + " query_string = query_string.split(' ')\n", + " hit_string = hit_string.split(' ')\n", + " query_ranges.append(query_string[2])\n", + " hit_ranges.append(hit_string[4])\n", + " \n", + " # Add to dict\n", + " qh_dat = {'query_ranges': query_ranges, 'hit_ranges': hit_ranges}\n", + " qh_dat = pd.DataFrame(data = qh_dat)\n", + " mave_blat_dict[dat.at[i, 'urn']] = {'chrom': chrom,'strand': strand,'target': dat.at[i,'target'], 'target_type': dat.at[i, 'target_type'],\n", + " 'uniprot': dat.at[i,'uniprot_id'],'coverage': coverage,'identity': identity, 'hits': qh_dat} \n", + " print('added ' + dat.at[i, 'urn'] + ' to dict')" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "d66f6830", + "metadata": {}, + "outputs": [], + "source": [ + "with open('mave_blat.pickle', 'wb') as fn:\n", + " pickle.dump(mave_blat_dict, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "markdown", + "id": "8ad39211", + "metadata": {}, + "source": [ + "## Part 2: BLAT Output to Transcript Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1059d30e", + "metadata": {}, + "outputs": [], + "source": [ + "## Helper functions\n", + "\n", + "def get_start(string):\n", + " return int(string.split(':')[0].strip('['))\n", + "\n", + "def get_end(string):\n", + " return int(string.split(':')[1].strip(']'))\n", + "\n", + "def get_locs_list(hitsdat):\n", + " locs_list = []\n", + " for i in range(len(hitsdat.index)):\n", + " start = get_start(hitsdat.at[i, 'hit_ranges'])\n", + " end = get_end(hitsdat.at[i, 'hit_ranges'])\n", + " locs_list.append([start,end])\n", + " return locs_list\n", + "\n", + "def get_hits_list(hitsdat):\n", + " hits_list = []\n", + " for i in range(len(hitsdat.index)):\n", + " start = get_start(hitsdat.at[i, 'query_ranges'])\n", + " end = get_end(hitsdat.at[i, 'query_ranges'])\n", + " hits_list.append([start,end])\n", + " return hits_list\n", + "\n", + "def get_query_hits(dat):\n", + " query_list = []\n", + " hits_list = []\n", + " for i in range(len(dat.index)):\n", + " query_start = get_start(dat.at[i, 'query_ranges'])\n", + " query_end = get_end(dat.at[i, 'query_ranges'])\n", + " query_list.append([query_start, query_end])\n", + " hit_start = get_start(dat.at[i, 'hit_ranges'])\n", + " hit_end = get_end(dat.at[i, 'hit_ranges'])\n", + " hits_list.append([hit_start, hit_end])\n", + " return query_list, hits_list\n", + "\n", + "def get_ga4gh(dp, ref):\n", + " aliases = dp.get_metadata(ref)['aliases']\n", + " f = filter(lambda x: 'ga4gh' in x, aliases)\n", + " return 'ga4gh:' + list(f)[0].split(':')[1]\n", + "\n", + "def get_chr(dp, chrom):\n", + " aliases = dp.get_metadata('GRCh38:' + chrom)['aliases']\n", + " f = filter(lambda x: 'refseq' in x, aliases)\n", + " return list(f)[0].split(':')[1]\n", + "\n", + "def modify_hgvs(var, ref, off, hp):\n", + " if len(var) == 3 or var == '_wt' or var == '_sy' or '[' in var:\n", + " return var\n", + " var = ref + ':' + var\n", + " var = hp.parse_hgvs_variant(var)\n", + " var.posedit.pos.start.base = var.posedit.pos.start.base + off\n", + " var.posedit.pos.end.base = var.posedit.pos.end.base + off\n", + " return(str(var))\n", + "\n", + "def blat_check(i):\n", + " item = mave_blat_dict[dat.at[i, 'urn']]\n", + " if item['uniprot'] == None:\n", + " test = dat.at[i, 'target'].split(' ')\n", + " for j in range(len(test)):\n", + " try:\n", + " out = qh.normalize(test[j]).gene\n", + " gene_dat = [out.label, out.extensions[2].value['chr']]\n", + " if item['chrom'] != gene_dat[1]:\n", + " return False\n", + " else:\n", + " return True\n", + " except:\n", + " continue\n", + "\n", + "def get_haplotype_allele(var, ref, offset, l, tr, dp, ts, mapped, ranges, hits, strand):\n", + " var = var.lstrip(f'{l}.')\n", + "\n", + " if '[' in var:\n", + " var = var[1:][:-1]\n", + " varlist = var.split(';')\n", + " varlist = list(set(varlist))\n", + " else:\n", + " varlist = list()\n", + " varlist.append(var)\n", + "\n", + " locs = {}\n", + " alleles = []\n", + "\n", + " for i in range(len(varlist)):\n", + " try:\n", + " hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " if mapped == 'pre':\n", + " print(allele)\n", + " allele.location.sequence_id = 'ga4gh:SQ.' + sha512t24u(ts.encode('ascii'))\n", + " if 'dup' in hgvs_string:\n", + " allele.state.sequence = 2*str(sr[str(allele.location.sequence_id)][allele.location.start.value:allele.location.end.value])\n", + " \n", + " else:\n", + " if l != 'g':\n", + " allele.location.start.value = allele.location.start.value + offset\n", + " allele.location.end.value = allele.location.end.value + offset\n", + " if 'dup' in hgvs_string:\n", + " allele.state.sequence = 2*str(sr[str(allele.location.sequence_id)][allele.location.start.value:allele.location.end.value])\n", + " \n", + " else:\n", + " start = allele.location.start.value\n", + " if len(hits) == 1 and strand == 1:\n", + " i = 0\n", + " diff = start - hits[i][0]\n", + " diff2 = allele.location.end.value - start\n", + " allele.location.start.value = ranges[i][0] + diff\n", + " allele.location.end.value = allele.location.start.value + diff2\n", + " else:\n", + " for i in range(len(hits)):\n", + " if start >= hits[i][0] and start < hits[i][1]:\n", + " break\n", + " diff = start - hits[i][0]\n", + " diff2 = allele.location.end.value - start\n", + " if strand == 1: # positive orientation\n", + " allele.location.start.value = ranges[i][0] + diff\n", + " allele.location.end.value = allele.location.start.value + diff2\n", + " if 'dup' in hgvs_string:\n", + " allele.state.sequence = 2*str(sr[str(allele.location.sequence_id)][allele.location.start.value:allele.location.end.value])\n", + " else: \n", + " allele.location.start.value = ranges[i][1] - diff - diff2\n", + " allele.location.end.value = allele.location.start.value + diff2\n", + " if 'dup' in hgvs_string:\n", + " allele.state.sequence = 2*str(sr[str(allele.location.sequence_id)][allele.location.start.value:allele.location.end.value])\n", + " allele.state.sequence = str(Seq(str(allele.state.sequence)).reverse_complement())\n", + " \n", + " if allele.state.sequence == 'N' and l != 'p':\n", + " allele.state.sequence = str(sr[str(allele.location.sequence_id)][allele.location.start.value:allele.location.end.value])\n", + " allele = normalize(allele, data_proxy = dp) \n", + " allele.id = ga4gh_identify(allele)\n", + " alleles.append(allele)\n", + " except:\n", + " vrstext = {'definition':ref + ':'+ l +'.' + varlist[i], 'type': 'Text'}\n", + " return vrstext\n", + " \n", + " if len(alleles) == 1: # Not haplotype\n", + " return alleles[0]\n", + " else:\n", + " return models.Haplotype(members = alleles)\n", + " \n", + "def get_clingen_id(hgvs):\n", + " url = 'https://reg.genome.network/allele?hgvs=' + hgvs\n", + " page = requests.get(url).json()\n", + " page = page['@id']\n", + " try:\n", + " return page.split('/')[4]\n", + " except:\n", + " return 'NA'" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "24d27355", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['urn:mavedb:00000097-q-1', 'no transcripts found']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sallybg/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/Bio/Seq.py:2880: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "{'urn:mavedb:00000041-a-1': ['NP_938033.1',\n", + " 269,\n", + " 'urn:mavedb:00000041-a-1',\n", + " True,\n", + " 'NM_198291.3',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000048-a-1': ['NP_003458.1',\n", + " 1,\n", + " 'urn:mavedb:00000048-a-1',\n", + " True,\n", + " 'NM_003467.3',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000107-a-1': ['NP_478059.1',\n", + " 0,\n", + " 'urn:mavedb:00000107-a-1',\n", + " True,\n", + " 'NM_058179.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000103-d-1': ['NP_002736.3',\n", + " 0,\n", + " 'urn:mavedb:00000103-d-1',\n", + " True,\n", + " 'NM_002745.5',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000061-b-1': ['NP_002871.1',\n", + " 51,\n", + " 'urn:mavedb:00000061-b-1',\n", + " True,\n", + " 'NM_002880.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000097-q-1': [],\n", + " 'urn:mavedb:00000003-a-1': ['NP_009225.1',\n", + " 1,\n", + " 'urn:mavedb:00000003-a-1',\n", + " False,\n", + " 'NM_007294.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000097-i-1': ['NP_009225.1',\n", + " 1630,\n", + " 'urn:mavedb:00000097-i-1',\n", + " False,\n", + " 'NM_007294.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000099-a-1': ['NP_000530.1',\n", + " 0,\n", + " 'urn:mavedb:00000099-a-1',\n", + " True,\n", + " 'NM_000539.3',\n", + " 'MANE Select']}" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## UTA Transcript Selection\n", + "nest_asyncio.apply()\n", + "mane = MANETranscriptMappings()\n", + "utadb = UTADatabase('postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta')\n", + "qh = QueryHandler(create_db())\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "\n", + "mappings_dict = {}\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "with open('mave_blat.pickle', 'rb') as fn:\n", + " mave_blat_dict = pickle.load(fn)\n", + "\n", + "for j in range(len(dat.index)):\n", + " if dat.at[j, 'target_type'] == 'Protein coding' or dat.at[j, 'target_type'] == 'protein_coding':\n", + " item = mave_blat_dict[dat.at[j,'urn']]\n", + " #if blat_check(j) == False:\n", + " # mappings_dict[dat.at[j, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + " if item['chrom'] == 'NA':\n", + " continue\n", + " locs = get_locs_list(item['hits'])\n", + " chrom = get_chr(dp, item['chrom'])\n", + "\n", + " uniprot = dat.at[j, 'uniprot_id']\n", + " uniprot_gene = qh.normalize(str(f'uniprot:{uniprot}')).gene\n", + " # try to normalize based on the uniprot_id, but if there is no match, normalize based on the first part of the target name\n", + " if uniprot_gene == None:\n", + " temp = dat.at[j, 'target'].split(' ')\n", + " if temp[0] == 'JAK':\n", + " temp[0] = 'JAK1'\n", + " gsymb = qh.normalize(temp[0]).gene.label\n", + " else:\n", + " gsymb = uniprot_gene.label\n", + "\n", + "\n", + " async def mapq():\n", + " transcript_lists = []\n", + " for i in range(len(locs)):\n", + " testquery = (f\"\"\"select *\n", + " from uta_20210129.tx_exon_aln_v\n", + " where hgnc = '{gsymb}'\n", + " and {locs[i][0]} between alt_start_i and alt_end_i\n", + " or {locs[i][1]} between alt_start_i and alt_end_i\n", + " and alt_ac = '{chrom}'\"\"\") \n", + " \n", + " out = await utadb.execute_query(testquery)\n", + " tl = []\n", + " for j in range(len(out)):\n", + " if out[j]['tx_ac'].startswith('NR_') == False:\n", + " tl.append(out[j]['tx_ac'])\n", + " if tl != []:\n", + " transcript_lists.append(tl)\n", + " return(transcript_lists)\n", + "\n", + " ts = asyncio.run(mapq())\n", + " try:\n", + " isect = list(set.intersection(*map(set,ts)))\n", + " except:\n", + " try: # Look for transcripts using uniprot id\n", + " url = 'https://www.uniprot.org/uniprot/' + str(dat.at[j, 'uniprot_id']) + '.xml'\n", + " page = requests.get(url)\n", + " page = BeautifulSoup(page.text)\n", + " page = page.find_all('sequence')\n", + " up = page[1].get_text()\n", + "\n", + " stri = str(dat.at[j,'target_sequence'])\n", + " if up.find(stri) != -1:\n", + " full_match = True\n", + " else:\n", + " full_match = False\n", + " start = up.find(stri[:10])\n", + " mappings_dict[dat.at[j,'urn']] = [dat.at[j, 'uniprot_id'], start, dat.at[j, 'urn'], full_match]\n", + " continue\n", + " except:\n", + " print([dat.at[j, 'urn'], 'no transcripts found'])\n", + " mappings_dict[dat.at[j,'urn']] = []\n", + " continue\n", + "\n", + " mane_trans = mane.get_mane_from_transcripts(isect)\n", + " if mane_trans != []:\n", + " if len(mane_trans) == 1:\n", + " np = mane_trans[0]['RefSeq_prot']\n", + " nm = mane_trans[0]['RefSeq_nuc']\n", + " status = 'MANE Select'\n", + " else:\n", + " if mane_trans[0]['MANE_status'] == 'MANE Select':\n", + " np = mane_trans[0]['RefSeq_prot']\n", + " nm = mane_trans[0]['RefSeq_nuc']\n", + " status = 'MANE Select'\n", + " else:\n", + " np = mane_trans[1]['RefSeq_prot']\n", + " nm = mane_trans[1]['RefSeq_nuc']\n", + " status = 'MANE Plus Clinical'\n", + " \n", + " oseq = dat.at[j, 'target_sequence']\n", + " \n", + " if len(set(str(oseq))) > 4:\n", + " stri = str(oseq)\n", + " else:\n", + " oseq = Seq(oseq)\n", + " stri = str(oseq.translate(table=1)).replace('*', '')\n", + " \n", + " if str(sr[np]).find(stri) != -1:\n", + " full_match = True\n", + " else:\n", + " full_match = False\n", + " start = str(sr[np]).find(stri[:10])\n", + " mappings_dict[dat.at[j,'urn']] = [np, start, dat.at[j, 'urn'], full_match, nm, status]\n", + " \n", + " else:\n", + " trans_lens = []\n", + " for i in range(len(isect)):\n", + " trans_lens.append(len(str(sr[isect[i]])))\n", + " loc = trans_lens.index(max(trans_lens))\n", + " nm = isect[loc]\n", + " \n", + " testquery = f\"SELECT pro_ac FROM uta_20210129.associated_accessions WHERE tx_ac = '{nm}'\"\n", + " async def np():\n", + " out = await utadb.execute_query(testquery)\n", + " try:\n", + " return out[0]['pro_ac']\n", + " except:\n", + " return out\n", + " np = asyncio.run(np())\n", + " \n", + " if np != []:\n", + " oseq = dat.at[j, 'target_sequence']\n", + " \n", + " if len(set(str(oseq))) > 4:\n", + " stri = str(oseq)\n", + " else:\n", + " oseq = Seq(oseq)\n", + " stri = str(oseq.translate(table=1)).replace('*', '')\n", + " \n", + " if str(sr[np]).find(stri) != -1:\n", + " full_match = True\n", + " else:\n", + " full_match = False\n", + " start = str(sr[np]).find(stri[:10])\n", + " mappings_dict[dat.at[j,'urn']] = [np, start, dat.at[j, 'urn'], full_match, nm, 'Longest Compatible'] \n", + "mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "9a3cb7f1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sallybg/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/Bio/Seq.py:2880: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future.\n", + " warnings.warn(\n", + "/Users/sallybg/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/Bio/Seq.py:2880: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Find start location in provided target sequence when start position is not first position of sequence \n", + "import operator\n", + "offset_within_ts = {}\n", + "\n", + "def validation_helper(protstring):\n", + " protstring = protstring[1:][:-1]\n", + " vs = protstring.split(';')\n", + " return vs\n", + "\n", + "for i in range(len(mave_dat.index)):\n", + " if mave_dat.at[i, 'target_type'] == 'Protein coding' and mave_dat.at[i, 'target_sequence_type'] == 'dna':\n", + " urn = mave_dat.at[i, 'urn']\n", + " if urn == 'urn:mavedb:00000053-a-1' or urn == 'urn:mavedb:00000053-a-2': # target sequence missing codon\n", + " continue\n", + " oseq = Seq(mave_dat.at[i, 'target_sequence'])\n", + " ts = str(oseq.translate(table = 1))\n", + "\n", + " string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + " origdat = requests.get(string).content\n", + " dat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + "\n", + " protlist = dat['hgvs_pro'].to_list()\n", + " if type(dat.at[0, 'hgvs_pro']) != str or dat.at[0, 'hgvs_pro'].startswith('NP'):\n", + " continue\n", + " protlist = [x.lstrip('p.') for x in protlist]\n", + " \n", + " aa_dict = {}\n", + " for k in range(len(protlist)):\n", + " if protlist[k] == '_sy' or protlist[k] == '_wt':\n", + " continue\n", + " else:\n", + " if ';' in protlist[k]: \n", + " vs = validation_helper(protlist[k])\n", + " for l in range(len(vs)):\n", + " aa = vs[l][:3]\n", + " if aa == '=' or vs[l][-3:] not in Bio.SeqUtils.IUPACData.protein_letters_3to1.keys():\n", + " continue\n", + " if '=' in vs[l]:\n", + " loc = vs[l][3:][:-1]\n", + " else:\n", + " loc = vs[l][3:][:-3]\n", + " if loc not in aa_dict:\n", + " loc = re.sub('[^0-9]', '', loc)\n", + " aa_dict[loc] = seq1(aa)\n", + " \n", + " else:\n", + " if '_' in protlist[k]:\n", + " continue\n", + " aa = protlist[k][:3]\n", + " if aa == '=' or protlist[k][-3:] not in Bio.SeqUtils.IUPACData.protein_letters_3to1.keys():\n", + " continue\n", + " if '=' in protlist[k]:\n", + " loc = protlist[k][3:][:-1]\n", + " else:\n", + " loc = protlist[k][3:][:-3]\n", + " if loc not in aa_dict:\n", + " loc = re.sub('[^0-9]', '', loc)\n", + " aa_dict[loc] = seq1(aa)\n", + " \n", + "\n", + " aa_dict.pop('', None)\n", + " \n", + " err_locs = []\n", + " for m in range(len(ts)):\n", + " if str(m) in list(aa_dict.keys()):\n", + " if aa_dict[str(m)] != ts[int(m) - 1]: # Str vs dict offset\n", + " err_locs.append(m)\n", + " \n", + " if len(err_locs) > 1:\n", + " aa_dict = {int(k):v for k,v in aa_dict.items()}\n", + " aa_dict = sorted(aa_dict.items())\n", + " aa_dict = dict(aa_dict)\n", + " locs = list(aa_dict.keys())[0:5]\n", + " p0, p1, p2, p3, p4 = locs[0], locs[1], locs[2], locs[3], locs[4]\n", + " offset = locs[0]\n", + "\n", + " seq = ''\n", + " for key in aa_dict:\n", + " seq = seq + aa_dict[key]\n", + " \n", + " for i in range(len(ts)):\n", + " if ts[i] == aa_dict[p0] and ts[i + p1 - p0] == aa_dict[p1] and ts[i + p2 - p0] == aa_dict[p2] and ts[i + p3 - p0] == aa_dict[p3] and ts[i + p4 - p0] == aa_dict[p4]:\n", + " if i + 1 == min(aa_dict.keys()) or i + 2 == min(aa_dict.keys()):\n", + " offset_within_ts[urn] = 0\n", + " else:\n", + " offset_within_ts[urn] = i\n", + " break\n", + "\n", + "for key in offset_within_ts:\n", + " mappings_dict[key][1] = offset_within_ts[key]" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "cfdad34c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'urn:mavedb:00000041-a-1': ['NP_938033.1',\n", + " 269,\n", + " 'urn:mavedb:00000041-a-1',\n", + " True,\n", + " 'NM_198291.3',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000048-a-1': ['NP_003458.1',\n", + " 0,\n", + " 'urn:mavedb:00000048-a-1',\n", + " True,\n", + " 'NM_003467.3',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000107-a-1': ['NP_478059.1',\n", + " 0,\n", + " 'urn:mavedb:00000107-a-1',\n", + " True,\n", + " 'NM_058179.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000103-d-1': ['NP_002736.3',\n", + " 0,\n", + " 'urn:mavedb:00000103-d-1',\n", + " True,\n", + " 'NM_002745.5',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000061-b-1': ['NP_002871.1',\n", + " 51,\n", + " 'urn:mavedb:00000061-b-1',\n", + " True,\n", + " 'NM_002880.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000097-q-1': [],\n", + " 'urn:mavedb:00000003-a-1': ['NP_009225.1',\n", + " 1,\n", + " 'urn:mavedb:00000003-a-1',\n", + " False,\n", + " 'NM_007294.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000097-i-1': ['NP_009225.1',\n", + " 1630,\n", + " 'urn:mavedb:00000097-i-1',\n", + " False,\n", + " 'NM_007294.4',\n", + " 'MANE Select'],\n", + " 'urn:mavedb:00000099-a-1': ['NP_000530.1',\n", + " 0,\n", + " 'urn:mavedb:00000099-a-1',\n", + " True,\n", + " 'NM_000539.3',\n", + " 'MANE Select']}" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "37a7890c", + "metadata": {}, + "outputs": [], + "source": [ + "with open('mappings.pickle', 'wb') as fn:\n", + " pickle.dump(mappings_dict, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "markdown", + "id": "17c59aa0", + "metadata": {}, + "source": [ + "## Part 3: Transcript to VRS Variant" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f86c4681", + "metadata": {}, + "outputs": [], + "source": [ + "with open('mave_blat.pickle', 'rb') as fn:\n", + " mave_blat_dict = pickle.load(fn)\n", + " \n", + "with open('mappings.pickle', 'rb') as fn:\n", + " mappings_dict = pickle.load(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "510b9973", + "metadata": {}, + "outputs": [], + "source": [ + "def get_haplotype_allele_mavehgvs(var, ref, offset, l, tr, dp, ts, mapped, ranges, hits, strand):\n", + " var = var.lstrip(f'{l}.')\n", + " if '[' in var:\n", + " var = var[1:][:-1]\n", + " varlist = var.split(';')\n", + " varlist = list(set(varlist))\n", + " else:\n", + " varlist = list()\n", + " varlist.append(var)\n", + "\n", + " locs = {}\n", + " alleles = []\n", + "\n", + " for i in range(len(varlist)):\n", + " # hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + " # allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " if mapped == 'pre':\n", + " hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + "\n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, hgvs_string):\n", + " raise NotImplementedError(\"Pre-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + " \n", + " # TODO multi position variants\n", + " # this actually works for pre-map, but don't support it until post-map works\n", + " if re.search(mavehgvs.patterns.protein.pro_multi_variant, hgvs_string):\n", + " raise NotImplementedError(\"Pre-map VRS translation not supported for multi-position variants\")\n", + "\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " # it's necessary to update the sequence identifier after translation, rather than including it in the hgvs string,\n", + " # because the hgvs parser expects a digit after the 'SQ.'\n", + " # note: not updating sequence reference until after normalization,\n", + " # because computed sequence identifier should include 'ga4gh:SQ', (see example here https://vrs.ga4gh.org/en/1.1/impl-guide/example.html)\n", + " # and the 'ga4gh:' breaks the normalizer\n", + " #allele.location.sequenceReference.refgetAccession = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " if l != 'g':\n", + " # TODO do we need to do anything for negative strand if using p. hgvs?\n", + " # expecting protein-based ref, so hgvs string is already mostly correct - just need to calculate offset\n", + " # could parse whole list outside of for loop since this function takes a list\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['p.' + varlist[i]])[0][0]\n", + " # looks like offset is calculated based on amino acids, so this should be correct, but should validate\n", + " # may want to only do this if offset != 0? i guess that depends on how often offset == 0\n", + "\n", + " # TODO positions can be a tuple if there are multiple positions associated with the variant.\n", + " # if positions is a tuple, accessing position like this won't work.\n", + " # so need to check length of parsed_hgvs.positions\n", + " # should we expect multi-position protein variants?\n", + " # looks like yes - example from mavehgvs spec: p.His7_Gln8insSer\n", + " \n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, str(parsed_hgvs)):\n", + " raise NotImplementedError(\"Post-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + "\n", + " parsed_hgvs.positions.position = parsed_hgvs.positions.position + offset\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + "\n", + " # allele.location.start = allele.location.start + offset\n", + " # allele.location.end = allele.location.end + offset\n", + " # dups haven't been fixed yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " # can we assume that the noncoding hgvs strings coming in from mavedb in the hgvs_nt column are c.?\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.' + varlist[i]])[0][0]\n", + " # start = allele.location.start\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for non-protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " start = parsed_hgvs.positions.position - 1 #hgvs uses 1-based numbering for c. sequences, while blat hits are 0-based\n", + "\n", + " # get hit\n", + " if len(hits) == 1:\n", + " i = 0\n", + " else:\n", + " for i in range(len(hits)):\n", + " if start >= hits[i][0] and start < hits[i][1]:\n", + " break\n", + "\n", + " # if hit is on positive strand\n", + " if strand == 1:\n", + " # get variant start relative to the reference (the \"hit\")\n", + " # distance from beginning of query to variant start position:\n", + " query_to_start = start - hits[i][0]\n", + " # distance from beginning of ref to the variant start position:\n", + " ref_to_start = ranges[i][0] + query_to_start\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + " # if hit is on negative strand \n", + " else:\n", + " # in this case, picture the rev comp of the query/variant as mapping to the positive strand of the ref\n", + " # the start of the reverse complement of the variant is the end of the \"original\" variant\n", + " # so we need to know where the end of the original variant is, relative to the query molecule\n", + " # for single-position variants, we'll assume the end (rev comp view) is equal to: start - 1 \n", + " # TODO this works for single-position variants only!\n", + " # this error is redundant (should be caught above),\n", + " # but since it's not necessarily obvious that this works for\n", + " # single-position variants only,\n", + " # I'm putting it here as well because development\n", + " # will need to happen here as well in order to support multi-position\n", + " # variants, since diff2 = 1 is ONLY a good assumption for single-position variants\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # the distance between the start and end of the variant is dependent on the number of positions covered by the variant!\n", + " # this is hardcoded for single-position variants, for now\n", + " end = start\n", + " # subtract 1 from end of hit range, because blat ranges are 0-based [start, end)\n", + " ref_to_start = (ranges[i][1] -1 ) - (end - hits[i][0])\n", + " # or could do ranges[i][0] + (end - hits[i][1]), is one better than the other? any cases where one might be inaccurate?\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + "\n", + " # rev comp each sequence, assuming [0] is original and [1] is variant\n", + " # this is only tested for single position variants\n", + "\n", + " revcomp_sequences_list = []\n", + " for sequence in parsed_hgvs._sequences:\n", + " revcomp_sequences_list.append(str(Seq(sequence).reverse_complement()))\n", + " parsed_hgvs._sequences = tuple(revcomp_sequences_list)\n", + "\n", + " # get hgvs and allele\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " # TODO dups will need to be corrected after the allele object is created, because the mavehgvs string\n", + " # does not contain information about the identity of the base that is duplicated\n", + " # not immediately sure how to handle rev comp dups\n", + "\n", + " # haven't fixed this if block yet, need test case\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " if allele.state.sequence.root == 'N' and l != 'p':\n", + " print('sequence is N. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " allele = normalize(allele, data_proxy = dp)\n", + " \n", + " # update sequence reference id after normalization, see commented notes in pre mapping section above\n", + " if mapped == 'pre':\n", + " # not sure if refgetAccession is the appropriate field to update here, since this is a ga4gh computed seq id.\n", + " # do ga4gh computed seq ids count as refget accession ids?\n", + " allele.location.sequenceReference.refgetAccession = 'ga4gh:SQ.' + sha512t24u(ts.encode('ascii'))\n", + " allele.id = ga4gh_identify(allele)\n", + " alleles.append(allele)\n", + " \n", + " if len(alleles) == 1: # Not haplotype\n", + " return alleles[0]\n", + " else:\n", + " return models.Haplotype(members = alleles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9efe716", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000099-a-1\n", + "skipping, pre-map fs\n", + "p.Glu341fs\n", + "skipping, pre-map fs\n", + "p.Phe13fs\n", + "skipping, post-map multi position variant\n", + "p.Val137_Pro142del\n", + "skipping, pre-map fs\n", + "p.Leu328fs\n", + "skipping, pre-map fs\n", + "p.Asn315fs\n", + "skipping, pre-map fs\n", + "p.Ser334fs\n", + "skipping, pre-map fs\n", + "p.Ala335fs\n", + "skipping, post-map multi position variant\n", + "p.Tyr206_Phe208del\n", + "skipping, pre-map fs\n", + "p.Thr340fs\n", + "skipping, pre-map fs\n", + "p.Glu341fs\n", + "skipping, pre-map fs\n", + "p.Glu332fs\n", + "skipping, pre-map fs\n", + "p.Pro327fs\n", + "skipping, post-map multi position variant\n", + "p.Arg69_Leu72del\n", + "skipping, pre-map fs\n", + "p.Pro327fs\n", + "skipping, post-map multi position variant\n", + "p.Leu318_Thr319delinsPro\n", + "skipping, pre-map fs\n", + "p.Ter349fs\n", + "skipping, pre-map fs\n", + "p.Ter349fs\n" + ] + }, + { + "ename": "ValueError", + "evalue": "All arrays must be of the same length", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)\n", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 43\u001b[0m line \u001b[0;36m8\n", + "\u001b[1;32m 81\u001b[0m \u001b[39mprint\u001b[39m(varm[j])\n", + "\u001b[1;32m 82\u001b[0m \u001b[39mcontinue\u001b[39;00m\n", + "\u001b[0;32m---> 84\u001b[0m tempdat \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame({\u001b[39m'\u001b[39;49m\u001b[39mpre_mapping\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_pre_map, \u001b[39m'\u001b[39;49m\u001b[39mmapped\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_post_map})\n", + "\u001b[1;32m 85\u001b[0m mappings_list\u001b[39m.\u001b[39mappend(tempdat)\n", + "\u001b[1;32m 86\u001b[0m scores_list\u001b[39m.\u001b[39mappend(spro)\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/frame.py:767\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n", + "\u001b[1;32m 761\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n", + "\u001b[1;32m 762\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n", + "\u001b[1;32m 763\u001b[0m )\n", + "\u001b[1;32m 765\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n", + "\u001b[1;32m 766\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n", + "\u001b[0;32m--> 767\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, typ\u001b[39m=\u001b[39;49mmanager)\n", + "\u001b[1;32m 768\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n", + "\u001b[1;32m 769\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m \u001b[39mimport\u001b[39;00m mrecords\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:503\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n", + "\u001b[1;32m 499\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;32m 500\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n", + "\u001b[1;32m 501\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n", + "\u001b[0;32m--> 503\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39;49mdtype, typ\u001b[39m=\u001b[39;49mtyp, consolidate\u001b[39m=\u001b[39;49mcopy)\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:114\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n", + "\u001b[1;32m 111\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n", + "\u001b[1;32m 112\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n", + "\u001b[1;32m 113\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", + "\u001b[0;32m--> 114\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n", + "\u001b[1;32m 115\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;32m 116\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:677\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n", + "\u001b[1;32m 675\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n", + "\u001b[1;32m 676\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n", + "\u001b[0;32m--> 677\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[1;32m 679\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n", + "\u001b[1;32m 680\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n", + "\u001b[1;32m 681\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n", + "\u001b[1;32m 682\u001b[0m )\n", + "\n", + "\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length" + ] + } + ], + "source": [ + "# VRS Variant Mapping - Coding Scoresets\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_mappings_dict = {}\n", + "scores_dict_coding = {}\n", + "mavedb_ids_coding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "# for each urn in the mave data requested from mavedb:\n", + "for i in range(len(dat.index)):\n", + " # this section only processes protein coding sequences\n", + " if dat.at[i, 'target_type'] == 'Protein coding' or dat.at[i, 'target_type'] == 'protein_coding':\n", + " # if there is a mapping entry for this urn:\n", + " if dat.at[i, 'urn'] in mappings_dict.keys():\n", + " print(dat.at[i, 'urn'])\n", + " # grab the urn's mapping entry\n", + " item = mappings_dict[dat.at[i, 'urn']]\n", + " # get scoreset for this urn from mavedb\n", + " string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + " origdat = requests.get(string).content\n", + " vardat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + " scores = vardat['score'].to_list()\n", + " accessions = vardat['accession'].to_list()\n", + " \n", + " mappings_list = []\n", + " scores_list = []\n", + " accessions_list = []\n", + " \n", + " # Process protein column\n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " \n", + " if len(item) != 0:\n", + " np = item[0]\n", + " offset = item[1]\n", + " varm = vardat['hgvs_pro']\n", + " \n", + " ts = dat.at[i, 'target_sequence']\n", + " if len(set(str(ts))) > 4:\n", + " stri = str(ts)\n", + " \n", + " else:\n", + " ts = Seq(ts)\n", + " ts = str(ts.translate(table=1)).replace('*', '')\n", + " \n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " spro = []\n", + " accpro = []\n", + " \n", + " for j in range(len(varm)):\n", + " if type(varm[j]) != str or len(varm[j]) == 3 or varm[j] == '_wt' or varm[j] == '_sy':\n", + " continue\n", + " if varm[j].startswith('NP') == True:\n", + " var_ids_pre_map.append(tr.translate_from(varm[j], 'hgvs'))\n", + " var_ids_post_map.append(tr.translate_from(varm[j], 'hgvs'))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " else:\n", + " try:\n", + " if np.startswith('N') == True:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(varm[j], np, 0, 'p', tr, dp, ts, 'pre', '', '', ''))\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(varm[j], np, offset, 'p', tr, dp, ts, 'post', '', '', ''))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " else:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(varm[j], np, 0, 'p', tr, dp, ts, 'pre', '', '', ''))\n", + " # TODO ranges and hits don't actually get used by get_haplotype_allele, are they intended to be used here?\n", + " # what is the 'np' that we're expecting here if it doesn't start with 'N'?\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(varm[j], np, offset, 'p', tr, dp, ts, 'post', ranges, hits, ''))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " except:\n", + " continue\n", + " \n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " mappings_list.append(tempdat)\n", + " scores_list.append(spro)\n", + " accessions_list.append(accpro)\n", + " \n", + " # Process nt column if data present\n", + " if vardat['hgvs_nt'].isnull().values.all() == False and '97' not in dat.at[i, 'urn']:\n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " \n", + " item = mave_blat_dict[dat.at[i, 'urn']]\n", + " ranges = get_locs_list(item['hits'])\n", + " hits = get_hits_list(item['hits'])\n", + " ref = get_chr(dp, item['chrom'])\n", + " ts = dat.at[i, 'target_sequence']\n", + " strand = mave_blat_dict[dat.at[i, 'urn']]['strand']\n", + " \n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " ntlist = vardat['hgvs_nt']\n", + " varm = vardat['hgvs_pro']\n", + " sn = []\n", + " accn = []\n", + " \n", + " for j in range(len(ntlist)):\n", + " if type(ntlist[j]) != str or ntlist[j] == '_wt' or ntlist[j] == '_sy':\n", + " continue\n", + " else:\n", + " try:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(ntlist[j][2:], ref, 0, 'g', tr, dp, ts,'pre', ranges, hits, strand).as_dict())\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(ntlist[j][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand).as_dict())\n", + " sn.append(scores[j])\n", + " accn.append(accessions[j])\n", + " except:\n", + " continue\n", + "\n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " mappings_list.append(tempdat)\n", + " scores_list.append(sn)\n", + " accessions_list.append(accn)\n", + " \n", + " vrs_mappings_dict[dat.at[i, 'urn']] = mappings_list\n", + " scores_dict_coding[dat.at[i, 'urn']] = scores_list\n", + " mavedb_ids_coding[dat.at[i, 'urn']] = accessions_list\n", + "vrs_mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70258f03", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000018-a-1\n", + "index: 2\n" + ] + }, + { + "ename": "ValueError", + "evalue": "All arrays must be of the same length", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)\n", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 25\u001b[0m line \u001b[0;36m5\n", + "\u001b[1;32m 52\u001b[0m \u001b[39mexcept\u001b[39;00m:\n", + "\u001b[1;32m 53\u001b[0m \u001b[39mcontinue\u001b[39;00m\n", + "\u001b[0;32m---> 55\u001b[0m tempdat \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame({\u001b[39m'\u001b[39;49m\u001b[39mpre_mapping\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_pre_map, \u001b[39m'\u001b[39;49m\u001b[39mmapped\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_post_map})\n", + "\u001b[1;32m 56\u001b[0m vrs_noncoding_mappings_dict[dat\u001b[39m.\u001b[39mat[i, \u001b[39m'\u001b[39m\u001b[39murn\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m tempdat\n", + "\u001b[1;32m 57\u001b[0m scores_dict_noncoding[dat\u001b[39m.\u001b[39mat[i, \u001b[39m'\u001b[39m\u001b[39murn\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m scores_list\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/frame.py:767\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n", + "\u001b[1;32m 761\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n", + "\u001b[1;32m 762\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n", + "\u001b[1;32m 763\u001b[0m )\n", + "\u001b[1;32m 765\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n", + "\u001b[1;32m 766\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n", + "\u001b[0;32m--> 767\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, typ\u001b[39m=\u001b[39;49mmanager)\n", + "\u001b[1;32m 768\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n", + "\u001b[1;32m 769\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m \u001b[39mimport\u001b[39;00m mrecords\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:503\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n", + "\u001b[1;32m 499\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;32m 500\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n", + "\u001b[1;32m 501\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n", + "\u001b[0;32m--> 503\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39;49mdtype, typ\u001b[39m=\u001b[39;49mtyp, consolidate\u001b[39m=\u001b[39;49mcopy)\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:114\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n", + "\u001b[1;32m 111\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n", + "\u001b[1;32m 112\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n", + "\u001b[1;32m 113\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", + "\u001b[0;32m--> 114\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n", + "\u001b[1;32m 115\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;32m 116\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n", + "\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:677\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n", + "\u001b[1;32m 675\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n", + "\u001b[1;32m 676\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n", + "\u001b[0;32m--> 677\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[1;32m 679\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n", + "\u001b[1;32m 680\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n", + "\u001b[1;32m 681\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n", + "\u001b[1;32m 682\u001b[0m )\n", + "\n", + "\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length" + ] + } + ], + "source": [ + "# VRS variant mapping non-protein coding scoresets\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_noncoding_mappings_dict = {}\n", + "scores_dict_noncoding = {}\n", + "mavedb_ids_noncoding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "for i in range(len(dat.index)):\n", + " if dat.at[i, 'target_type'] != 'Protein coding' and dat.at[i, 'target_type'] != 'protein_coding':\n", + " print(dat.at[i, 'urn'])\n", + " item = mave_blat_dict[dat.at[i, 'urn']]\n", + " #if blat_check(i) == False:\n", + " # vrs_noncoding_mappings_dict[dat.at[i, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + " #ranges = get_locs_list(item['hits'])[0]\n", + " string = string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + " origdat = requests.get(string).content\n", + " varsdat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + " ntlist = varsdat['hgvs_nt'].to_list()\n", + " \n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " ranges = get_locs_list(item['hits'])\n", + " ref = get_chr(dp, item['chrom'])\n", + " hits = get_hits_list(item['hits'])\n", + " strand = mave_blat_dict[dat.at[i, 'urn']]['strand']\n", + " \n", + " ts = dat.at[i, 'target_sequence']\n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " scores = varsdat['score'].to_list()\n", + " scores_list = []\n", + " accessions = varsdat['accession'].to_list()\n", + " accessions_list = []\n", + "\n", + " for j in range(len(ntlist)):\n", + " if ntlist[j] == '_wt' or ntlist[j] == '_sy':\n", + " continue\n", + " else:\n", + " try:\n", + " var_ids_pre_map.append(get_haplotype_allele_temp(ntlist[j][2:], ref, 0, 'g', tr, dp, ts, 'pre', ranges, hits, strand))\n", + " var_ids_post_map.append(get_haplotype_allele_temp(ntlist[j][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand))\n", + " scores_list.append(scores[j])\n", + " accessions_list.append(accessions[j])\n", + " except:\n", + " continue\n", + " \n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " vrs_noncoding_mappings_dict[dat.at[i, 'urn']] = tempdat\n", + " scores_dict_noncoding[dat.at[i, 'urn']] = scores_list\n", + " mavedb_ids_noncoding[dat.at[i, 'urn']] = accessions_list\n", + "\n", + "vrs_noncoding_mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "543a1e3a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16d05568", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f00b962", + "metadata": {}, + "outputs": [], + "source": [ + "# below this is stuff that I used for testing and dev" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2c2ed68b", + "metadata": {}, + "outputs": [], + "source": [ + "# temp - get single score set for protein coding gene, for testing\n", + "\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_mappings_dict = {}\n", + "scores_dict_coding = {}\n", + "mavedb_ids_coding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "item = mappings_dict[dat.at[0, 'urn']]\n", + "\n", + "string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[0, 'urn']+ '/scores'\n", + "origdat = requests.get(string).content\n", + "vardat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + "scores = vardat['score'].to_list()\n", + "accessions = vardat['accession'].to_list()\n", + "\n", + "mappings_list = []\n", + "scores_list = []\n", + "accessions_list = []\n", + "\n", + "# Process protein column\n", + "var_ids_pre_map = []\n", + "var_ids_post_map = []\n", + "\n", + "if len(item) != 0:\n", + " np = item[0]\n", + " offset = item[1]\n", + "varm = vardat['hgvs_pro']\n", + "\n", + "ts = dat.at[0, 'target_sequence']\n", + "if len(set(str(ts))) > 4:\n", + " stri = str(ts)\n", + "\n", + "else:\n", + " ts = Seq(ts)\n", + " ts = str(ts.translate(table=1)).replace('*', '')\n", + " \n", + "digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + "sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + "\n", + "spro = []\n", + "accpro = []" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "3ba5943e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000018-a-1\n" + ] + } + ], + "source": [ + "# temp - get single scoreset for non-protein coding gene, for testing\n", + "\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_noncoding_mappings_dict = {}\n", + "scores_dict_noncoding = {}\n", + "mavedb_ids_noncoding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "print(dat.at[2, 'urn'])\n", + "item = mave_blat_dict[dat.at[2, 'urn']]\n", + "#if blat_check(i) == False:\n", + " # vrs_noncoding_mappings_dict[dat.at[i, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + "#ranges = get_locs_list(item['hits'])[0]\n", + "string = string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[2, 'urn']+ '/scores'\n", + "origdat = requests.get(string).content\n", + "varsdat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + "ntlist = varsdat['hgvs_nt'].to_list()\n", + "\n", + "var_ids_pre_map = []\n", + "var_ids_post_map = []\n", + "ranges = get_locs_list(item['hits'])\n", + "ref = get_chr(dp, item['chrom'])\n", + "hits = get_hits_list(item['hits'])\n", + "strand = mave_blat_dict[dat.at[2, 'urn']]['strand']\n", + "\n", + "ts = dat.at[2, 'target_sequence']\n", + "digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + "sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + "\n", + "scores = varsdat['score'].to_list()\n", + "scores_list = []\n", + "accessions = varsdat['accession'].to_list()\n", + "accessions_list = []" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "e5ecb85d", + "metadata": {}, + "outputs": [], + "source": [ + "# get rev comp score set for testing\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_mappings_dict = {}\n", + "scores_dict_coding = {}\n", + "mavedb_ids_coding = {}\n", + "\n", + "i = 9\n", + "\n", + "item = mappings_dict[dat.at[i, 'urn']]\n", + "#if blat_check(i) == False:\n", + " # vrs_mappings_dict[dat.at[i, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + "# get scoreset for this urn from mavedb\n", + "string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + "origdat = requests.get(string).content\n", + "vardat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + "scores = vardat['score'].to_list()\n", + "accessions = vardat['accession'].to_list()\n", + "\n", + "mappings_list = []\n", + "scores_list = []\n", + "accessions_list = []\n", + "\n", + "item = mave_blat_dict[dat.at[9, 'urn']]\n", + "ranges = get_locs_list(item['hits'])\n", + "hits = get_hits_list(item['hits'])\n", + "ref = get_chr(dp, item['chrom'])\n", + "ts = dat.at[9, 'target_sequence']\n", + "strand = mave_blat_dict[dat.at[9, 'urn']]['strand']\n", + "\n", + "digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + "sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + "\n", + "ntlist = vardat['hgvs_nt']\n", + "varm = vardat['hgvs_pro']\n", + "sn = []\n", + "accn = []" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "3f25103b", + "metadata": {}, + "outputs": [], + "source": [ + "# get scoreset with frameshifts for testing\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_mappings_dict = {}\n", + "scores_dict_coding = {}\n", + "mavedb_ids_coding = {}\n", + "\n", + "i = 10\n", + "\n", + "item = mappings_dict[dat.at[i, 'urn']]\n", + "#if blat_check(i) == False:\n", + " # vrs_mappings_dict[dat.at[i, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + "# get scoreset for this urn from mavedb\n", + "string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + "origdat = requests.get(string).content\n", + "vardat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + "scores = vardat['score'].to_list()\n", + "accessions = vardat['accession'].to_list()\n", + "\n", + "mappings_list = []\n", + "scores_list = []\n", + "accessions_list = []\n", + "\n", + "item = mave_blat_dict[dat.at[i, 'urn']]\n", + "ranges = get_locs_list(item['hits'])\n", + "hits = get_hits_list(item['hits'])\n", + "ref = get_chr(dp, item['chrom'])\n", + "ts = dat.at[i, 'target_sequence']\n", + "strand = mave_blat_dict[dat.at[i, 'urn']]['strand']\n", + "\n", + "digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + "sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + "\n", + "ntlist = vardat['hgvs_nt']\n", + "varm = vardat['hgvs_pro']\n", + "sn = []\n", + "accn = []" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "5f8c1378", + "metadata": {}, + "outputs": [], + "source": [ + "def get_haplotype_allele_temp(var, ref, offset, l, tr, dp, ts, mapped, ranges, hits, strand):\n", + " var = var.lstrip(f'{l}.')\n", + " if '[' in var:\n", + " var = var[1:][:-1]\n", + " varlist = var.split(';')\n", + " varlist = list(set(varlist))\n", + " else:\n", + " varlist = list()\n", + " varlist.append(var)\n", + "\n", + " locs = {}\n", + " alleles = []\n", + "\n", + " for i in range(len(varlist)):\n", + " hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " if mapped == 'pre':\n", + " allele.location.sequenceReference.refgetAccession = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " # dups haven't been tested yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " if l != 'g':\n", + " allele.location.start = allele.location.start + offset\n", + " allele.location.end = allele.location.end + offset\n", + " # dups haven't been fixed yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " start = allele.location.start\n", + " if len(hits) == 1 and strand == 1:\n", + " i = 0\n", + " diff = start - hits[i][0]\n", + " diff2 = allele.location.end - start\n", + " allele.location.start = ranges[i][0] + diff\n", + " allele.location.end = allele.location.start + diff2\n", + " else:\n", + " for i in range(len(hits)):\n", + " if start >= hits[i][0] and start < hits[i][1]:\n", + " break\n", + " diff = start - hits[i][0]\n", + " diff2 = allele.location.end - start\n", + " if strand == 1: # positive orientation\n", + " allele.location.start = ranges[i][0] + diff\n", + " allele.location.end = allele.location.start + diff2\n", + " # haven't fixed dups yet, need test case\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[\"ga4gh:\" + str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " else: \n", + " allele.location.start = ranges[i][1] - diff - diff2\n", + " allele.location.end = allele.location.start + diff2\n", + " # haven't fixed dups yet, need test case\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " # haven't tested rev comp yet, need test case\n", + " print('this is a rev comp. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = str(Seq(str(allele.state.sequence.root)).reverse_complement())\n", + " \n", + " # haven't fixed this if block yet, need test case\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " if allele.state.sequence.root == 'N' and l != 'p':\n", + " print('sequence is N. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " print('pre-normalized sequence: ' + allele.state.sequence.root)\n", + " print(allele)\n", + " allele = normalize(allele, data_proxy = dp) \n", + " print('post-normalized sequence: ' + allele.state.sequence.root)\n", + " allele.id = ga4gh_identify(allele)\n", + " alleles.append(allele)\n", + " \n", + " if len(alleles) == 1: # Not haplotype\n", + " return alleles[0]\n", + " else:\n", + " return models.Haplotype(members = alleles)\n", + "\n", + "# protein coding\n", + "#pre\n", + "#get_haplotype_allele_temp(varm[0], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\n", + "#post\n", + "#get_haplotype_allele_temp(varm[0], np, offset, 'p', tr, dp, ts, 'post', '', '', '')\n", + "# post, protein coding with nt hgvs column and target seq on rev strand\n", + "#get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand)\n", + " \n", + "# non protein coding\n", + "# pre already works\n", + "# post\n", + "#get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)\n", + "# variant with 'dup' in hgvs_nt\n", + "#get_haplotype_allele_temp(ntlist[17][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "9079ab1d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'chrom': '17',\n", + " 'strand': -1,\n", + " 'target': 'BRCA1 Exon 15',\n", + " 'target_type': 'Protein coding',\n", + " 'uniprot': nan,\n", + " 'coverage': '106 / 106, 100.0',\n", + " 'identity': 100.0,\n", + " 'hits': query_ranges hit_ranges\n", + " 0 [0:106] [43070917:43071023]}" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# view blat dict for target on rev strand\n", + "mave_blat_dict['urn:mavedb:00000097-i-1']" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "4b6b9fd8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "ename": "NotImplementedError", + "evalue": "Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 28\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 2\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mtype\u001b[39m(parsed_hgvs\u001b[39m.\u001b[39mpositions))\n\u001b[1;32m 3\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(parsed_hgvs\u001b[39m.\u001b[39mpositions, mavehgvs\u001b[39m.\u001b[39mposition\u001b[39m.\u001b[39mVariantPosition):\n\u001b[0;32m----> 4\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPost-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented." + ] + } + ], + "source": [ + "parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.78+5_78+10del'])[0][0]\n", + "print(type(parsed_hgvs.positions))\n", + "if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "d8275d6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'_target_id': None, 'variant_count': 1, '_prefix': 'c', '_variant_types': 'sub', '_positions': 106, '_sequences': ('T', 'G')}\n", + "G\n", + "106\n", + "\n", + "c.106A>C\n" + ] + } + ], + "source": [ + "import mavehgvs\n", + "parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.' + '106T>G'])[0][0]\n", + "print(parsed_hgvs.__dict__)\n", + "print(parsed_hgvs._sequences[1])\n", + "print(parsed_hgvs.positions)\n", + "print(type(parsed_hgvs.positions))\n", + "#parsed_hgvs._sequences[1] = str(Seq(str(parsed_hgvs._sequences[1])).reverse_complement())\n", + "if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + "\n", + "# rev comp each sequence, assuming [0] is original and [1] is variant\n", + "\n", + "revcomp_sequences_list = []\n", + "for sequence in parsed_hgvs._sequences:\n", + " revcomp_sequences_list.append(str(Seq(sequence).reverse_complement()))\n", + "parsed_hgvs._sequences = tuple(revcomp_sequences_list)\n", + "print(parsed_hgvs)\n", + "# parsed_hgvs._sequences = tuple(parsed_hgvs._sequences[0], str(Seq(str(parsed_hgvs._sequences[1])).reverse_complement()))\n", + "# print(parsed_hgvs)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "85b4154f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1021\n", + "None\n", + "Glu341\n", + "None\n" + ] + } + ], + "source": [ + "parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.1021dup'])[0][0]\n", + "print(parsed_hgvs.positions)\n", + "print(parsed_hgvs._sequences)\n", + "\n", + "parsed_hgvs = mavehgvs.util.parse_variant_strings(['p.Glu341fs'])[0][0]\n", + "print(parsed_hgvs.positions)\n", + "print(parsed_hgvs._sequences)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "6ef99803", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['urn:mavedb:00000041-a-1', 'urn:mavedb:00000048-a-1', 'urn:mavedb:00000018-a-1', 'urn:mavedb:00000107-a-1', 'urn:mavedb:00000103-d-1', 'urn:mavedb:00000029-a-2', 'urn:mavedb:00000061-b-1', 'urn:mavedb:00000097-q-1', 'urn:mavedb:00000003-a-1', 'urn:mavedb:00000097-i-1', 'urn:mavedb:00000099-a-1'])" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mave_blat_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "id": "9efe8983", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "None\n", + "(?P(?P(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))fs)\n", + "yes\n" + ] + } + ], + "source": [ + "#hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + "parsed_hgvs = mavehgvs.util.parse_variant_strings(['p.Glu341fs'])[0][0]\n", + "import re\n", + "print(re.search(mavehgvs.patterns.protein.pro_fs, 'p.Glu341fs'))\n", + "print(re.search(mavehgvs.patterns.protein.pro_fs, 'no'))\n", + "print(mavehgvs.patterns.protein.pro_fs)\n", + "\n", + "if re.search(mavehgvs.patterns.protein.pro_fs, 'p.Glu341fs'):\n", + " print('yes')\n", + "if re.search(mavehgvs.patterns.protein.pro_fs, 'no'):\n", + " print('no')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "id": "f573fa8c", + "metadata": {}, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Post-map VRS translation not supported for fs variants denoted with protein hgvs strings", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 37\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 205\u001b[0m \u001b[39mreturn\u001b[39;00m models\u001b[39m.\u001b[39mHaplotype(members \u001b[39m=\u001b[39m alleles)\n\u001b[1;32m 207\u001b[0m \u001b[39m# protein coding\u001b[39;00m\n\u001b[1;32m 208\u001b[0m \u001b[39m#pre\u001b[39;00m\n\u001b[1;32m 209\u001b[0m \u001b[39m#get_haplotype_allele_temp(varm[0], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[39m# 99-a-1 has fs variants, test those\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[39m#get_haplotype_allele_sally(varm[17], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\u001b[39;00m\n\u001b[0;32m--> 260\u001b[0m get_haplotype_allele_sally(varm[\u001b[39m17\u001b[39;49m], np, offset, \u001b[39m'\u001b[39;49m\u001b[39mp\u001b[39;49m\u001b[39m'\u001b[39;49m, tr, dp, ts, \u001b[39m'\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 37\u001b[0m line \u001b[0;36m6\n\u001b[1;32m 60\u001b[0m \u001b[39m# TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \u001b[39mif\u001b[39;00m re\u001b[39m.\u001b[39msearch(mavehgvs\u001b[39m.\u001b[39mpatterns\u001b[39m.\u001b[39mprotein\u001b[39m.\u001b[39mpro_fs, \u001b[39mstr\u001b[39m(parsed_hgvs)):\n\u001b[0;32m---> 62\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mNotImplementedError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPost-map VRS translation not supported for fs variants denoted with protein hgvs strings\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 64\u001b[0m parsed_hgvs\u001b[39m.\u001b[39mpositions\u001b[39m.\u001b[39mposition \u001b[39m=\u001b[39m parsed_hgvs\u001b[39m.\u001b[39mpositions\u001b[39m.\u001b[39mposition \u001b[39m+\u001b[39m offset\n\u001b[1;32m 65\u001b[0m hgvs_string \u001b[39m=\u001b[39m ref \u001b[39m+\u001b[39m \u001b[39m'\u001b[39m\u001b[39m:\u001b[39m\u001b[39m'\u001b[39m \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(parsed_hgvs)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Post-map VRS translation not supported for fs variants denoted with protein hgvs strings" + ] + } + ], + "source": [ + "import re\n", + "import sys\n", + "import mavehgvs\n", + "\n", + "def get_haplotype_allele_sally(var, ref, offset, l, tr, dp, ts, mapped, ranges, hits, strand):\n", + " var = var.lstrip(f'{l}.')\n", + " if '[' in var:\n", + " var = var[1:][:-1]\n", + " varlist = var.split(';')\n", + " varlist = list(set(varlist))\n", + " else:\n", + " varlist = list()\n", + " varlist.append(var)\n", + "\n", + " locs = {}\n", + " alleles = []\n", + "\n", + " for i in range(len(varlist)):\n", + " # hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + " # allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " if mapped == 'pre':\n", + " hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + "\n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, hgvs_string):\n", + " raise NotImplementedError(\"Pre-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + "\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " # it's necessary to update the sequence identifier after translation, rather than including it in the hgvs string,\n", + " # because the hgvs parser expects a digit after the 'SQ.'\n", + " # note: not updating sequence reference until after normalization,\n", + " # because computed sequence identifier should include 'ga4gh:SQ', (see example here https://vrs.ga4gh.org/en/1.1/impl-guide/example.html)\n", + " # and the 'ga4gh:' breaks the normalizer\n", + " #allele.location.sequenceReference.refgetAccession = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "\n", + " # dups haven't been tested yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " if l != 'g':\n", + " # TODO do we need to do anything for negative strand if using p. hgvs?\n", + " # expecting protein-based ref, so hgvs string is already mostly correct - just need to calculate offset\n", + " # could parse whole list outside of for loop since this function takes a list\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['p.' + varlist[i]])[0][0]\n", + " # looks like offset is calculated based on amino acids, so this should be correct, but should validate\n", + " # may want to only do this if offset != 0? i guess that depends on how often offset == 0\n", + "\n", + " # TODO positions can be a tuple if there are multiple positions associated with the variant.\n", + " # if positions is a tuple, accessing position like this won't work.\n", + " # so need to check length of parsed_hgvs.positions\n", + " # should we expect multi-position protein variants?\n", + " # looks like yes - example from mavehgvs spec: p.His7_Gln8insSer\n", + " \n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, str(parsed_hgvs)):\n", + " raise NotImplementedError(\"Post-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + " \n", + " parsed_hgvs.positions.position = parsed_hgvs.positions.position + offset\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + "\n", + " # allele.location.start = allele.location.start + offset\n", + " # allele.location.end = allele.location.end + offset\n", + " # dups haven't been fixed yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " # TODO not sure if this should be c, n, or g\n", + " # works for now but will need to be correct if we want to return the hgvs string (which we probably do)\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.' + varlist[i]])[0][0]\n", + " # start = allele.location.start\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for non-protein-coding variants spanning multiple positions has not been implemented.\")\n", + "\n", + " start = parsed_hgvs.positions.position - 1 #hgvs uses 1-based numbering for c. sequences, while blat hits are 0-based\n", + "\n", + " # get hit\n", + " if len(hits) == 1:\n", + " i = 0\n", + " else:\n", + " for i in range(len(hits)):\n", + " if start >= hits[i][0] and start < hits[i][1]:\n", + " break\n", + "\n", + " # if hit is on positive strand\n", + " if strand == 1:\n", + " # get variant start relative to the reference (the \"hit\")\n", + " # distance from beginning of query to variant start position:\n", + " query_to_start = start - hits[i][0]\n", + " # distance from beginning of ref to the variant start position:\n", + " ref_to_start = ranges[i][0] + query_to_start\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + " # if hit is on negative strand \n", + " else:\n", + " # in this case, picture the rev comp of the query/variant as mapping to the positive strand of the ref\n", + " # the start of the reverse complement of the variant is the end of the \"original\" variant\n", + " # so we need to know where the end of the original variant is, relative to the query molecule\n", + " # for single-position variants, we'll assume the end (rev comp view) is equal to: start - 1 \n", + " # TODO this works for single-position variants only!\n", + " # this error is redundant (should be caught above),\n", + " # but since it's not necessarily obvious that this works for\n", + " # single-position variants only,\n", + " # I'm putting it here as well because development\n", + " # will need to happen here as well in order to support multi-position\n", + " # variants, since diff2 = 1 is ONLY a good assumption for single-position variants\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # the distance between the start and end of the variant is dependent on the number of positions covered by the variant!\n", + " # this is hardcoded for single-position variants, for now\n", + " end = start\n", + " # subtract 1 from end of hit range, because blat ranges are 0-based [start, end)\n", + " ref_to_start = (ranges[i][1] -1 ) - (end - hits[i][0])\n", + " # or could do ranges[i][0] + (end - hits[i][1]), is one better than the other? any cases where one might be inaccurate?\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + "\n", + " # rev comp each sequence, assuming [0] is original and [1] is variant\n", + " # this is only tested for single position variants\n", + "\n", + " revcomp_sequences_list = []\n", + " for sequence in parsed_hgvs._sequences:\n", + " revcomp_sequences_list.append(str(Seq(sequence).reverse_complement()))\n", + " parsed_hgvs._sequences = tuple(revcomp_sequences_list)\n", + "\n", + " # get hgvs and allele\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + "\n", + "\n", + " # if len(hits) == 1 and strand == 1:\n", + " # i = 0\n", + " # diff = start - hits[i][0]\n", + " # # diff2 = allele.location.end - start\n", + " # parsed_hgvs.positions.position = ranges[i][0] + diff\n", + " # # allele.location.end = allele.location.start + diff2\n", + " # else:\n", + " # for i in range(len(hits)):\n", + " # if start >= hits[i][0] and start < hits[i][1]:\n", + " # break\n", + " # diff = start - hits[i][0]\n", + " # # diff2 = allele.location.end - start\n", + " # if strand == 1: # positive orientation\n", + " # # allele.location.start = ranges[i][0] + diff\n", + " # # allele.location.end = allele.location.start + diff2\n", + " # parsed_hgvs.positions.position = ranges[i][0] + diff\n", + " # # haven't fixed dups yet, need test case\n", + " # if 'dup' in hgvs_string:\n", + " # print('dup in hgvs string. this has not been tested yet, review output.')\n", + " # allele.state.sequence.root = 2*str(sr[\"ga4gh:\" + str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " # else: # negative strand\n", + " # # TODO this works for single-position variants only!\n", + " # # this error is redundant (should be caught above),\n", + " # # but since it's not necessarily obvious that this works for\n", + " # # singlle-position variants only,\n", + " # # I'm putting it here as well because development\n", + " # # will need to happen here as well in order to support multi-position\n", + " # # variants, since diff2 = 1 is ONLY a good assumption for single-position variants\n", + " # if len(parsed_hgvs.positions) > 1:\n", + " # raise NotImplementedError(\"Post-map VRS translation for non-protein-coding variants spanning multiple positions has not been implemented.\")\n", + " # # if position is only one variant,\n", + " # # assume that diff2 = 1?\n", + " \n", + " # allele.location.start = ranges[i][1] - diff - diff2\n", + " # allele.location.end = allele.location.start + diff2\n", + " # # haven't fixed dups yet, need test case\n", + " # if 'dup' in hgvs_string:\n", + " # print('dup in hgvs string. this has not been tested yet, review output.')\n", + " # allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " # # haven't tested rev comp yet, need test case\n", + " # print('this is a rev comp. this has not been tested yet, review output.')\n", + " # allele.state.sequence.root = str(Seq(str(allele.state.sequence.root)).reverse_complement())\n", + " \n", + " # TODO dups and 'fs' need to either be corrected after the fact, or use the ref or target sequence to correct them\n", + " # this doesn't currently work for dups, definitely won't work for rev comp fs, may work for + strand fs but haven't tested\n", + "\n", + " # haven't fixed this if block yet, need test case\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " if allele.state.sequence.root == 'N' and l != 'p':\n", + " print('sequence is N. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " allele = normalize(allele, data_proxy = dp)\n", + " \n", + " # update sequence reference id after normalization, see commented notes in pre mapping section above\n", + " if mapped == 'pre':\n", + " # not sure if refgetAccession is the appropriate field to update here, since this is a ga4gh computed seq id.\n", + " # do ga4gh computed seq ids count as refget accession ids?\n", + " allele.location.sequenceReference.refgetAccession = 'ga4gh:SQ.' + sha512t24u(ts.encode('ascii'))\n", + " allele.id = ga4gh_identify(allele)\n", + " alleles.append(allele)\n", + " \n", + " if len(alleles) == 1: # Not haplotype\n", + " return alleles[0]\n", + " else:\n", + " return models.Haplotype(members = alleles)\n", + "\n", + "# protein coding\n", + "#pre\n", + "#get_haplotype_allele_temp(varm[0], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\n", + "#post\n", + "#get_haplotype_allele_temp(varm[0], np, offset, 'p', tr, dp, ts, 'post', '', '', '')\n", + "# post, protein coding with nt hgvs column and target seq on rev strand\n", + "#get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand)\n", + " \n", + "# non protein coding\n", + "# pre already works\n", + "# post\n", + "#get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)\n", + "# variant with 'dup' in hgvs_nt\n", + "#get_haplotype_allele_temp(ntlist[17][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)\n", + " \n", + "\n", + "# sally's changes\n", + "# protein coding\n", + "# pre\n", + "#get_haplotype_allele_sally(varm[0], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\n", + "# post\n", + "# new = get_haplotype_allele_sally(varm[0], np, offset, 'p', tr, dp, ts, 'post', '', '', '')\n", + "# # compare to old version\n", + "# old = get_haplotype_allele_temp(varm[0], np, offset, 'p', tr, dp, ts, 'post', '', '', '')\n", + "# print(\"new\")\n", + "# print(new)\n", + "# print()\n", + "# print(\"old\")\n", + "# print(old)\n", + "# pass!\n", + "\n", + "# post, rev comp hgvs_nt\n", + "# old = get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand)\n", + "# new = get_haplotype_allele_sally(ntlist[0][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand)\n", + "# print(\"new\")\n", + "# print(new)\n", + "# print()\n", + "# print(\"old\")\n", + "# print(old)\n", + "# pass!\n", + "\n", + "# post, non-rev-comp hgvs_nt\n", + "# old = get_haplotype_allele_temp(ntlist[0][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)\n", + "# new = get_haplotype_allele_sally(ntlist[0][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand)\n", + "# print(\"new\")\n", + "# print(new)\n", + "# print()\n", + "# print(\"old\")\n", + "# print(old)\n", + "# pass!\n", + " \n", + "# 99-a-1 has fs variants, test those\n", + "#get_haplotype_allele_sally(varm[17], np, 0, 'p', tr, dp, ts, 'pre', '', '', '')\n", + "get_haplotype_allele_sally(varm[17], np, offset, 'p', tr, dp, ts, 'post', '', '', '')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "e0274f35", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "106T>G\n", + " query_ranges hit_ranges\n", + "0 [0:106] [43070917:43071023]\n", + "[[43070917, 43071023]]\n", + "[[0, 106]]\n" + ] + } + ], + "source": [ + "print(ntlist[0][2:])\n", + "print(mave_blat_dict[dat.at[9, 'urn']]['hits'])\n", + "print(ranges)\n", + "print(hits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "08803c62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "341\n", + "None\n" + ] + } + ], + "source": [ + "parsed = mavehgvs.util.parse_variant_strings(['p.Glu341fs'])[0][0]\n", + "print(parsed.positions.position)\n", + "print(parsed._sequences)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "4b992e55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(His7, Gln8)\n", + "7\n", + "8\n", + "Ser\n" + ] + } + ], + "source": [ + "# mavehgvs position test\n", + "parsed = mavehgvs.util.parse_variant_strings(['p.His7_Gln8insSer'])[0][0]\n", + "print(parsed.positions)\n", + "print(parsed.positions[0].position)\n", + "# is it a safe assumption that mavehgvs variant positions are always ordered least to greatest by int?\n", + "print(parsed.positions[-1].position)\n", + "\n", + "print(parsed._sequences)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "69bfdf77", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " query_ranges hit_ranges\n", + "0 [0:52] [37397802:37397854]\n", + "1 [52:232] [37400114:37400294]\n", + "2 [232:309] [37401601:37401678]\n", + "3 [309:463] [37402434:37402588]\n", + "4 [463:595] [37402748:37402880]\n", + "5 [595:750] [37403170:37403325]\n" + ] + } + ], + "source": [ + "# how do blat output ranges work?\n", + "print(mave_blat_dict[dat.at[0, 'urn']]['hits'])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f65a95b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n", + "(?Pp\\.\\[(?:(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))?(?:=))|(?:\\(=\\)))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))fs)|(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))del)|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))del))|(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))dup)|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))dup))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))ins(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)+))|(?:(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*)))|(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*)))delins(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)+)))(?:;(?:(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))?(?:=))|(?:\\(=\\)))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))fs)|(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))del)|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))del))|(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))dup)|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))dup))|(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))ins(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)+))|(?:(?:(?:(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*))_(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*)))|(?:(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)[1-9][0-9]*)))delins(?:(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)+)))){1,}\\])\n", + "\n" + ] + } + ], + "source": [ + "print(re.search(mavehgvs.patterns.protein.pro_multi_variant, 'p.Val137_Pro142del'))\n", + "print(mavehgvs.patterns.protein.pro_multi_variant)\n", + "print(re.search(mavehgvs.patterns.protein.pro_single_variant, 'p.Val137_Pro142del'))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b3a13aca", + "metadata": {}, + "outputs": [], + "source": [ + "def get_haplotype_allele_mavehgvs(var, ref, offset, l, tr, dp, ts, mapped, ranges, hits, strand):\n", + " var = var.lstrip(f'{l}.')\n", + " if '[' in var:\n", + " var = var[1:][:-1]\n", + " varlist = var.split(';')\n", + " varlist = list(set(varlist))\n", + " else:\n", + " varlist = list()\n", + " varlist.append(var)\n", + "\n", + " locs = {}\n", + " alleles = []\n", + "\n", + " for i in range(len(varlist)):\n", + " # hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + " # allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " if mapped == 'pre':\n", + " hgvs_string = ref + ':'+ l +'.' + varlist[i]\n", + "\n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, hgvs_string):\n", + " raise NotImplementedError(\"Pre-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + " \n", + " # TODO multi position variants\n", + " # this actually works for pre-map, but don't support it until post-map works\n", + " if re.search(mavehgvs.patterns.protein.pro_multi_variant, hgvs_string):\n", + " raise NotImplementedError(\"Pre-map VRS translation not supported for multi-position variants\")\n", + "\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " # it's necessary to update the sequence identifier after translation, rather than including it in the hgvs string,\n", + " # because the hgvs parser expects a digit after the 'SQ.'\n", + " # note: not updating sequence reference until after normalization,\n", + " # because computed sequence identifier should include 'ga4gh:SQ', (see example here https://vrs.ga4gh.org/en/1.1/impl-guide/example.html)\n", + " # and the 'ga4gh:' breaks the normalizer\n", + " #allele.location.sequenceReference.refgetAccession = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + "\n", + " if 'dup' in hgvs_string:\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " if l != 'g':\n", + " # TODO do we need to do anything for negative strand if using p. hgvs?\n", + " # expecting protein-based ref, so hgvs string is already mostly correct - just need to calculate offset\n", + " # could parse whole list outside of for loop since this function takes a list\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['p.' + varlist[i]])[0][0]\n", + " # looks like offset is calculated based on amino acids, so this should be correct, but should validate\n", + " # may want to only do this if offset != 0? i guess that depends on how often offset == 0\n", + "\n", + " # TODO positions can be a tuple if there are multiple positions associated with the variant.\n", + " # if positions is a tuple, accessing position like this won't work.\n", + " # so need to check length of parsed_hgvs.positions\n", + " # should we expect multi-position protein variants?\n", + " # looks like yes - example from mavehgvs spec: p.His7_Gln8insSer\n", + " \n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # TODO protein fs hgvs strings are not supported because they can't be handled by vrs allele translator\n", + " if re.search(mavehgvs.patterns.protein.pro_fs, str(parsed_hgvs)):\n", + " raise NotImplementedError(\"Post-map VRS translation not supported for fs variants denoted with protein hgvs strings\")\n", + "\n", + " parsed_hgvs.positions.position = parsed_hgvs.positions.position + offset\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + "\n", + " # allele.location.start = allele.location.start + offset\n", + " # allele.location.end = allele.location.end + offset\n", + " # dups haven't been fixed yet, need to find a test case\n", + " if 'dup' in hgvs_string:\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " print('dup in hgvs string. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = 2*str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " \n", + " else:\n", + " # can we assume that the noncoding hgvs strings coming in from mavedb in the hgvs_nt column are c.?\n", + " parsed_hgvs = mavehgvs.util.parse_variant_strings(['c.' + varlist[i]])[0][0]\n", + " # start = allele.location.start\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for non-protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " start = parsed_hgvs.positions.position - 1 #hgvs uses 1-based numbering for c. sequences, while blat hits are 0-based\n", + "\n", + " # get hit\n", + " if len(hits) == 1:\n", + " i = 0\n", + " else:\n", + " for i in range(len(hits)):\n", + " if start >= hits[i][0] and start < hits[i][1]:\n", + " break\n", + "\n", + " # if hit is on positive strand\n", + " if strand == 1:\n", + " # get variant start relative to the reference (the \"hit\")\n", + " # distance from beginning of query to variant start position:\n", + " query_to_start = start - hits[i][0]\n", + " # distance from beginning of ref to the variant start position:\n", + " ref_to_start = ranges[i][0] + query_to_start\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + " # if hit is on negative strand \n", + " else:\n", + " # in this case, picture the rev comp of the query/variant as mapping to the positive strand of the ref\n", + " # the start of the reverse complement of the variant is the end of the \"original\" variant\n", + " # so we need to know where the end of the original variant is, relative to the query molecule\n", + " # for single-position variants, we'll assume the end (rev comp view) is equal to: start - 1 \n", + " # TODO this works for single-position variants only!\n", + " # this error is redundant (should be caught above),\n", + " # but since it's not necessarily obvious that this works for\n", + " # single-position variants only,\n", + " # I'm putting it here as well because development\n", + " # will need to happen here as well in order to support multi-position\n", + " # variants, since diff2 = 1 is ONLY a good assumption for single-position variants\n", + " if not isinstance(parsed_hgvs.positions, mavehgvs.position.VariantPosition):\n", + " raise NotImplementedError(\"Post-map VRS translation for protein-coding variants spanning multiple positions has not been implemented.\")\n", + " \n", + " # the distance between the start and end of the variant is dependent on the number of positions covered by the variant!\n", + " # this is hardcoded for single-position variants, for now\n", + " end = start\n", + " # subtract 1 from end of hit range, because blat ranges are 0-based [start, end)\n", + " ref_to_start = (ranges[i][1] -1 ) - (end - hits[i][0])\n", + " # or could do ranges[i][0] + (end - hits[i][1]), is one better than the other? any cases where one might be inaccurate?\n", + " # hgvs is 1-based, so convert back to 1-based\n", + " parsed_hgvs.positions.position = ref_to_start + 1\n", + "\n", + " # rev comp each sequence, assuming [0] is original and [1] is variant\n", + " # this is only tested for single position variants\n", + "\n", + " revcomp_sequences_list = []\n", + " for sequence in parsed_hgvs._sequences:\n", + " revcomp_sequences_list.append(str(Seq(sequence).reverse_complement()))\n", + " parsed_hgvs._sequences = tuple(revcomp_sequences_list)\n", + "\n", + " # get hgvs and allele\n", + " hgvs_string = ref + ':' + str(parsed_hgvs)\n", + " allele = tr.translate_from(hgvs_string, 'hgvs')\n", + " \n", + " # TODO dups will need to be corrected after the allele object is created, because the mavehgvs string\n", + " # does not contain information about the identity of the base that is duplicated\n", + " # not immediately sure how to handle rev comp dups\n", + "\n", + " # haven't fixed this if block yet, need test case\n", + " # not sure if this needs to be allele.state.sequence.root\n", + " if allele.state.sequence.root == 'N' and l != 'p':\n", + " print('sequence is N. this has not been tested yet, review output.')\n", + " allele.state.sequence.root = str(sr[str(allele.location.sequenceReference.refgetAccession)][allele.location.start:allele.location.end])\n", + " allele = normalize(allele, data_proxy = dp)\n", + " \n", + " # update sequence reference id after normalization, see commented notes in pre mapping section above\n", + " if mapped == 'pre':\n", + " # not sure if refgetAccession is the appropriate field to update here, since this is a ga4gh computed seq id.\n", + " # do ga4gh computed seq ids count as refget accession ids?\n", + " allele.location.sequenceReference.refgetAccession = 'ga4gh:SQ.' + sha512t24u(ts.encode('ascii'))\n", + " allele.id = ga4gh_identify(allele)\n", + " alleles.append(allele)\n", + " \n", + " if len(alleles) == 1: # Not haplotype\n", + " return alleles[0]\n", + " else:\n", + " return models.Haplotype(members = alleles)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b3d361bd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000099-a-1\n", + "skipping, pre-map fs\n", + "p.Glu341fs\n", + "skipping, pre-map fs\n", + "p.Phe13fs\n", + "skipping, post-map multi position variant\n", + "p.Val137_Pro142del\n", + "skipping, pre-map fs\n", + "p.Leu328fs\n", + "skipping, pre-map fs\n", + "p.Asn315fs\n", + "skipping, pre-map fs\n", + "p.Ser334fs\n", + "skipping, pre-map fs\n", + "p.Ala335fs\n", + "skipping, post-map multi position variant\n", + "p.Tyr206_Phe208del\n", + "skipping, pre-map fs\n", + "p.Thr340fs\n", + "skipping, pre-map fs\n", + "p.Glu341fs\n", + "skipping, pre-map fs\n", + "p.Glu332fs\n", + "skipping, pre-map fs\n", + "p.Pro327fs\n", + "skipping, post-map multi position variant\n", + "p.Arg69_Leu72del\n", + "skipping, pre-map fs\n", + "p.Pro327fs\n", + "skipping, post-map multi position variant\n", + "p.Leu318_Thr319delinsPro\n", + "skipping, pre-map fs\n", + "p.Ter349fs\n", + "skipping, pre-map fs\n", + "p.Ter349fs\n" + ] + }, + { + "ename": "ValueError", + "evalue": "All arrays must be of the same length", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 43\u001b[0m line \u001b[0;36m8\n\u001b[1;32m 81\u001b[0m \u001b[39mprint\u001b[39m(varm[j])\n\u001b[1;32m 82\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m---> 84\u001b[0m tempdat \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame({\u001b[39m'\u001b[39;49m\u001b[39mpre_mapping\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_pre_map, \u001b[39m'\u001b[39;49m\u001b[39mmapped\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_post_map})\n\u001b[1;32m 85\u001b[0m mappings_list\u001b[39m.\u001b[39mappend(tempdat)\n\u001b[1;32m 86\u001b[0m scores_list\u001b[39m.\u001b[39mappend(spro)\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/frame.py:767\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 761\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n\u001b[1;32m 762\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n\u001b[1;32m 763\u001b[0m )\n\u001b[1;32m 765\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n\u001b[1;32m 766\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, typ\u001b[39m=\u001b[39;49mmanager)\n\u001b[1;32m 768\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n\u001b[1;32m 769\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m \u001b[39mimport\u001b[39;00m mrecords\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:503\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[1;32m 501\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n\u001b[0;32m--> 503\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39;49mdtype, typ\u001b[39m=\u001b[39;49mtyp, consolidate\u001b[39m=\u001b[39;49mcopy)\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:114\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n\u001b[1;32m 112\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 114\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n\u001b[1;32m 115\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:677\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 675\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n\u001b[1;32m 676\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m--> 677\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 679\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n\u001b[1;32m 680\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 681\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 682\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length" + ] + } + ], + "source": [ + "# VRS Variant Mapping - Coding Scoresets\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_mappings_dict = {}\n", + "scores_dict_coding = {}\n", + "mavedb_ids_coding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "# for each urn in the mave data requested from mavedb:\n", + "#for i in range(len(dat.index)):\n", + "for i in range(0,1):\n", + " i = 10\n", + " # this section only processes protein coding sequences\n", + " if dat.at[i, 'target_type'] == 'Protein coding' or dat.at[i, 'target_type'] == 'protein_coding':\n", + " # if there is a mapping entry for this urn:\n", + " if dat.at[i, 'urn'] in mappings_dict.keys():\n", + " print(dat.at[i, 'urn'])\n", + " # grab the urn's mapping entry\n", + " item = mappings_dict[dat.at[i, 'urn']]\n", + " # get scoreset for this urn from mavedb\n", + " string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + " origdat = requests.get(string).content\n", + " vardat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + " scores = vardat['score'].to_list()\n", + " accessions = vardat['accession'].to_list()\n", + " \n", + " mappings_list = []\n", + " scores_list = []\n", + " accessions_list = []\n", + " \n", + " # Process protein column\n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " \n", + " if len(item) != 0:\n", + " np = item[0]\n", + " offset = item[1]\n", + " varm = vardat['hgvs_pro']\n", + " \n", + " ts = dat.at[i, 'target_sequence']\n", + " if len(set(str(ts))) > 4:\n", + " stri = str(ts)\n", + " \n", + " else:\n", + " ts = Seq(ts)\n", + " ts = str(ts.translate(table=1)).replace('*', '')\n", + " \n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " spro = []\n", + " accpro = []\n", + " \n", + " for j in range(len(varm)):\n", + " if type(varm[j]) != str or len(varm[j]) == 3 or varm[j] == '_wt' or varm[j] == '_sy':\n", + " continue\n", + " if varm[j].startswith('NP') == True:\n", + " var_ids_pre_map.append(tr.translate_from(varm[j], 'hgvs'))\n", + " var_ids_post_map.append(tr.translate_from(varm[j], 'hgvs'))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " else:\n", + " try:\n", + " if np.startswith('N') == True:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(varm[j], np, 0, 'p', tr, dp, ts, 'pre', '', '', ''))\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(varm[j], np, offset, 'p', tr, dp, ts, 'post', '', '', ''))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " else:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(varm[j], np, 0, 'p', tr, dp, ts, 'pre', '', '', ''))\n", + " # TODO ranges and hits don't actually get used by get_haplotype_allele, are they intended to be used here?\n", + " # what is the 'np' that we're expecting here if it doesn't start with 'N'?\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(varm[j], np, offset, 'p', tr, dp, ts, 'post', ranges, hits, ''))\n", + " spro.append(scores[j])\n", + " accpro.append(accessions[j])\n", + " except:\n", + " continue\n", + " \n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " mappings_list.append(tempdat)\n", + " scores_list.append(spro)\n", + " accessions_list.append(accpro)\n", + " \n", + " # Process nt column if data present\n", + " if vardat['hgvs_nt'].isnull().values.all() == False and '97' not in dat.at[i, 'urn']:\n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " \n", + " item = mave_blat_dict[dat.at[i, 'urn']]\n", + " ranges = get_locs_list(item['hits'])\n", + " hits = get_hits_list(item['hits'])\n", + " ref = get_chr(dp, item['chrom'])\n", + " ts = dat.at[i, 'target_sequence']\n", + " strand = mave_blat_dict[dat.at[i, 'urn']]['strand']\n", + " \n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " ntlist = vardat['hgvs_nt']\n", + " varm = vardat['hgvs_pro']\n", + " sn = []\n", + " accn = []\n", + " \n", + " for j in range(len(ntlist)):\n", + " if type(ntlist[j]) != str or ntlist[j] == '_wt' or ntlist[j] == '_sy':\n", + " continue\n", + " else:\n", + " try:\n", + " var_ids_pre_map.append(get_haplotype_allele_mavehgvs(ntlist[j][2:], ref, 0, 'g', tr, dp, ts,'pre', ranges, hits, strand).as_dict())\n", + " var_ids_post_map.append(get_haplotype_allele_mavehgvs(ntlist[j][2:], ref, 0, 'g', tr, dp, ts,'post', ranges, hits, strand).as_dict())\n", + " sn.append(scores[j])\n", + " accn.append(accessions[j])\n", + " except:\n", + " continue\n", + "\n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " mappings_list.append(tempdat)\n", + " scores_list.append(sn)\n", + " accessions_list.append(accn)\n", + " \n", + " vrs_mappings_dict[dat.at[i, 'urn']] = mappings_list\n", + " scores_dict_coding[dat.at[i, 'urn']] = scores_list\n", + " mavedb_ids_coding[dat.at[i, 'urn']] = accessions_list\n", + "vrs_mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "ad893d9b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "urn:mavedb:00000018-a-1\n", + "index: 2\n" + ] + }, + { + "ename": "ValueError", + "evalue": "All arrays must be of the same length", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 25\u001b[0m line \u001b[0;36m5\n\u001b[1;32m 52\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[1;32m 53\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m---> 55\u001b[0m tempdat \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame({\u001b[39m'\u001b[39;49m\u001b[39mpre_mapping\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_pre_map, \u001b[39m'\u001b[39;49m\u001b[39mmapped\u001b[39;49m\u001b[39m'\u001b[39;49m: var_ids_post_map})\n\u001b[1;32m 56\u001b[0m vrs_noncoding_mappings_dict[dat\u001b[39m.\u001b[39mat[i, \u001b[39m'\u001b[39m\u001b[39murn\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m tempdat\n\u001b[1;32m 57\u001b[0m scores_dict_noncoding[dat\u001b[39m.\u001b[39mat[i, \u001b[39m'\u001b[39m\u001b[39murn\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m scores_list\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/frame.py:767\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 761\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n\u001b[1;32m 762\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n\u001b[1;32m 763\u001b[0m )\n\u001b[1;32m 765\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n\u001b[1;32m 766\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, typ\u001b[39m=\u001b[39;49mmanager)\n\u001b[1;32m 768\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n\u001b[1;32m 769\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m \u001b[39mimport\u001b[39;00m mrecords\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:503\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[1;32m 501\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n\u001b[0;32m--> 503\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39;49mdtype, typ\u001b[39m=\u001b[39;49mtyp, consolidate\u001b[39m=\u001b[39;49mcopy)\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:114\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n\u001b[1;32m 112\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 114\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n\u001b[1;32m 115\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n", + "File \u001b[0;32m~/workspace/varianteffect/dcd_mapping/.venv/lib/python3.11/site-packages/pandas/core/internals/construction.py:677\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 675\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n\u001b[1;32m 676\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m--> 677\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 679\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n\u001b[1;32m 680\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 681\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 682\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length" + ] + } + ], + "source": [ + "# VRS variant mapping non-protein coding scoresets\n", + "dp = SeqRepoDataProxy(sr = sr)\n", + "tr = AlleleTranslator(data_proxy = dp, normalize = False)\n", + "qh = QueryHandler(create_db())\n", + "vrs_noncoding_mappings_dict = {}\n", + "scores_dict_noncoding = {}\n", + "mavedb_ids_noncoding = {}\n", + "\n", + "mave_dat = pd.read_csv('mave_dat.csv')\n", + "dat = mave_dat\n", + "\n", + "for i in range(len(dat.index)):\n", + " if dat.at[i, 'target_type'] != 'Protein coding' and dat.at[i, 'target_type'] != 'protein_coding':\n", + " print(dat.at[i, 'urn'])\n", + " item = mave_blat_dict[dat.at[i, 'urn']]\n", + " #if blat_check(i) == False:\n", + " # vrs_noncoding_mappings_dict[dat.at[i, 'urn']] = 'BLAT hit not found on correct chromosome'\n", + " # continue\n", + " #ranges = get_locs_list(item['hits'])[0]\n", + " string = string = 'https://api.mavedb.org/api/v1/score-sets/' + mave_dat.at[i, 'urn']+ '/scores'\n", + " origdat = requests.get(string).content\n", + " varsdat = pd.read_csv(io.StringIO(origdat.decode('utf-8')))\n", + " ntlist = varsdat['hgvs_nt'].to_list()\n", + " \n", + " var_ids_pre_map = []\n", + " var_ids_post_map = []\n", + " ranges = get_locs_list(item['hits'])\n", + " ref = get_chr(dp, item['chrom'])\n", + " hits = get_hits_list(item['hits'])\n", + " strand = mave_blat_dict[dat.at[i, 'urn']]['strand']\n", + " \n", + " ts = dat.at[i, 'target_sequence']\n", + " digest = 'SQ.' + sha512t24u(ts.encode('ascii'))\n", + " alias_dict_list = [{'namespace': 'ga4gh', 'alias': digest}]\n", + " sr.store(ts, nsaliases = alias_dict_list) # Add custom digest to SeqRepo\n", + " \n", + " scores = varsdat['score'].to_list()\n", + " scores_list = []\n", + " accessions = varsdat['accession'].to_list()\n", + " accessions_list = []\n", + "\n", + " for j in range(len(ntlist)):\n", + " if ntlist[j] == '_wt' or ntlist[j] == '_sy':\n", + " continue\n", + " else:\n", + " try:\n", + " var_ids_pre_map.append(get_haplotype_allele_temp(ntlist[j][2:], ref, 0, 'g', tr, dp, ts, 'pre', ranges, hits, strand))\n", + " var_ids_post_map.append(get_haplotype_allele_temp(ntlist[j][2:], ref, 0, 'g', tr, dp, ts, 'post', ranges, hits, strand))\n", + " scores_list.append(scores[j])\n", + " accessions_list.append(accessions[j])\n", + " except:\n", + " continue\n", + " \n", + " tempdat = pd.DataFrame({'pre_mapping': var_ids_pre_map, 'mapped': var_ids_post_map})\n", + " vrs_noncoding_mappings_dict[dat.at[i, 'urn']] = tempdat\n", + " scores_dict_noncoding[dat.at[i, 'urn']] = scores_list\n", + " mavedb_ids_noncoding[dat.at[i, 'urn']] = accessions_list\n", + "\n", + "vrs_noncoding_mappings_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9c0cbec6", + "metadata": {}, + "outputs": [], + "source": [ + "with open('vrs_mappings_coding_normalize_false.pickle', 'wb') as fn:\n", + " pickle.dump(vrs_mappings_dict, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4432a1f5", + "metadata": {}, + "outputs": [], + "source": [ + "with open('scores_coding.pickle', 'wb') as fn:\n", + " pickle.dump(scores_dict_coding, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f2faf010", + "metadata": {}, + "outputs": [], + "source": [ + "with open('vrs_mappings_noncoding_normalize_false.pickle', 'wb') as fn:\n", + " pickle.dump(vrs_noncoding_mappings_dict, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c2daea2d", + "metadata": {}, + "outputs": [], + "source": [ + "with open('scores_noncoding.pickle', 'wb') as fn:\n", + " pickle.dump(scores_dict_noncoding, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "23529dbf", + "metadata": {}, + "outputs": [], + "source": [ + "with open('mavedb_ids_coding.pickle', 'wb') as fn:\n", + " pickle.dump(mavedb_ids_coding, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c1e5040e", + "metadata": {}, + "outputs": [], + "source": [ + "with open('mavedb_ids_noncoding.pickle', 'wb') as fn:\n", + " pickle.dump(mavedb_ids_noncoding, fn, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "592af332", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'list' object has no attribute 'at'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sallybg/workspace/varianteffect/dcd_mapping/notebooks/sally/mavedb_mapping.ipynb Cell 29\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 1\u001b[0m \u001b[39m# Variant Mapping Example - Coding,Noncoding,Protein + Genomic\u001b[39;00m\n\u001b[1;32m 2\u001b[0m ex \u001b[39m=\u001b[39m vrs_mappings_dict[\u001b[39m'\u001b[39m\u001b[39murn:mavedb:00000041-a-1\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[0;32m----> 3\u001b[0m \u001b[39mprint\u001b[39m(ex\u001b[39m.\u001b[39;49mat[\u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mpre_mapping\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[39mprint\u001b[39m(ex\u001b[39m.\u001b[39mat[\u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mmapped\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m 6\u001b[0m ex \u001b[39m=\u001b[39m vrs_noncoding_mappings_dict[\u001b[39m'\u001b[39m\u001b[39murn:mavedb:00000018-a-1\u001b[39m\u001b[39m'\u001b[39m]\n", + "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'at'" + ] + } + ], + "source": [ + "# Variant Mapping Example - Coding,Noncoding,Protein + Genomic\n", + "ex = vrs_mappings_dict['urn:mavedb:00000041-a-1'][10]\n", + "print(ex.at[0, 'pre_mapping'])\n", + "print(ex.at[0, 'mapped'])\n", + "\n", + "ex = vrs_noncoding_mappings_dict['urn:mavedb:00000018-a-1']\n", + "print(ex.at[0, 'pre_mapping'])\n", + "print(ex.at[0, 'mapped'])" + ] + }, + { + "cell_type": "markdown", + "id": "f3ad683a", + "metadata": {}, + "source": [ + "# The blocks below can be run to access the output in the results directory" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4e5ae5b6", + "metadata": {}, + "outputs": [], + "source": [ + "# Load metadata\n", + "mave_dat = pd.read_csv('results/mave_dat.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "201f3aef", + "metadata": {}, + "outputs": [], + "source": [ + "# Load alignment data\n", + "with open('results/mave_blat.pickle', 'rb') as fn:\n", + " mave_blat_dict = pickle.load(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "18bdefde", + "metadata": {}, + "outputs": [], + "source": [ + "# Load mappings data\n", + "with open('results/mappings.pickle', 'rb') as fn:\n", + " mappings_dict = pickle.load(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1ba2cf09", + "metadata": {}, + "outputs": [], + "source": [ + "# Load coding data\n", + "with open('results/vrs_mappings_coding_normalize_false.pickle', 'rb') as fn:\n", + " vrs_mappings_coding = pickle.load(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0ea50429", + "metadata": {}, + "outputs": [], + "source": [ + "# Load noncoding data\n", + "with open('results/vrs_mappings_noncoding_normalize_false.pickle', 'rb') as fn:\n", + " vrs_mappings_noncoding = pickle.load(fn)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 19e105c..10ac2db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,14 @@ classifiers = [ "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -requires-python = ">=3.10" +requires-python = ">=3.11" + +# Requirement for gene_normalizer==0.3.0-dev2 is to address +# an issue with connecting to postgres: see +# https://github.com/cancervariants/gene-normalization/releases +# https://github.com/cancervariants/gene-normalization/pull/333 +# and clean up deps once a new release is made. + dependencies = [ "requests", "biopython", @@ -31,11 +38,11 @@ dependencies = [ "click", "cool-seq-tool>=0.4.0.dev1", "ga4gh.vrs~=2.0.0-a6", - # probably easiest to just include pg dependency group even if it's not always necessary - "gene-normalizer[pg]>=0.3.0-dev1", + "gene_normalizer[etl,pg]==0.3.0-dev2", "pydantic>=2", "python-dotenv", "setuptools>=68.0", # tmp -- ensure 3.12 compatibility + "mavehgvs==0.6.1" ] dynamic = ["version"] diff --git a/settings/.env.dev b/settings/.env.dev new file mode 100644 index 0000000..f7980e6 --- /dev/null +++ b/settings/.env.dev @@ -0,0 +1,26 @@ +#################################################################################################### +# Environment variables for vrs-mapping +#################################################################################################### + +GENE_NORM_DB_URL=postgres://postgres:postgres@db:5432/gene_normalizer +MAVEDB_STORAGE_DIR=/root/.local/share/dcd-mapping + +#################################################################################################### +# Environment variables for postgres +#################################################################################################### + +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DB=gene_normalizer + +#################################################################################################### +# Environment variables for UTA connection via CoolSeqTool +#################################################################################################### + +UTA_DB_URL=postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20180821 + +#################################################################################################### +# Environment variables for seqrepo +#################################################################################################### + +SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2021-01-29 diff --git a/src/dcd_mapping/align.py b/src/dcd_mapping/align.py index 6a39fab..8d9f878 100644 --- a/src/dcd_mapping/align.py +++ b/src/dcd_mapping/align.py @@ -154,8 +154,8 @@ def _get_blat_output(metadata: ScoresetMetadata, silent: bool) -> QueryResult: :return: BLAT query result :raise AlignmentError: if BLAT subprocess returns error code """ - with tempfile.NamedTemporaryFile() as query_file: - query_file = _build_query_file(metadata, Path(query_file.name)) + with tempfile.NamedTemporaryFile() as tmp_file: + query_file = _build_query_file(metadata, Path(tmp_file.name)) if metadata.target_sequence_type == TargetSequenceType.PROTEIN: target_args = "-q=prot -t=dnax" else: diff --git a/src/dcd_mapping/annotate.py b/src/dcd_mapping/annotate.py index cab9050..e698031 100644 --- a/src/dcd_mapping/annotate.py +++ b/src/dcd_mapping/annotate.py @@ -1,4 +1,5 @@ """Annotate MaveDB score set metadata with mapped scores.""" + import datetime import json import logging @@ -464,6 +465,7 @@ def save_mapped_output_json( align_result: AlignmentResult, tx_output: TxSelectResult | None, include_vrs_2: bool = False, + preferred_layer_only: bool = False, output_path: Path | None = None, ) -> Path: """Save mapping output for a score set in a JSON file @@ -477,24 +479,47 @@ def save_mapped_output_json( /urn:mavedb:00000XXX-X-X_mapping_.json :return: output location """ - preferred_layer = _set_scoreset_layer(urn, mappings) metadata = get_raw_scoreset_metadata(urn) - computed_reference_sequence = _get_computed_reference_sequence( - urn, preferred_layer, tx_output - ) - mapped_reference_sequence = _get_mapped_reference_sequence( - preferred_layer, tx_output, align_result - ) + if preferred_layer_only: + preferred_layers = { + _set_scoreset_layer(urn, mappings), + } + else: + preferred_layers = {mapping.annotation_layer for mapping in mappings} + + reference_sequences = { + layer: {"computed_reference_sequence": None, "mapped_reference_sequence": None} + for layer in AnnotationLayer + } + + for layer in preferred_layers: + reference_sequences[layer][ + "computed_reference_sequence" + ] = _get_computed_reference_sequence(urn, layer, tx_output) + reference_sequences[layer][ + "mapped_reference_sequence" + ] = _get_mapped_reference_sequence(layer, tx_output, align_result) + mapped_scores: list[ScoreAnnotation] = [] for m in mappings: - if m.annotation_layer == preferred_layer: + if m.annotation_layer in preferred_layers: # drop annotation layer from mapping object mapped_scores.append(ScoreAnnotation(**m.model_dump())) output = ScoresetMapping( metadata=metadata, - computed_reference_sequence=computed_reference_sequence, - mapped_reference_sequence=mapped_reference_sequence, + computed_protein_reference_sequence=reference_sequences[ + AnnotationLayer.PROTEIN + ]["computed_reference_sequence"], + mapped_protein_reference_sequence=reference_sequences[AnnotationLayer.PROTEIN][ + "mapped_reference_sequence" + ], + computed_genomic_reference_sequence=reference_sequences[ + AnnotationLayer.GENOMIC + ]["computed_reference_sequence"], + mapped_genomic_reference_sequence=reference_sequences[AnnotationLayer.GENOMIC][ + "mapped_reference_sequence" + ], mapped_scores=mapped_scores, ) @@ -504,7 +529,7 @@ def save_mapped_output_json( m.post_mapped_2_0 = None if not output_path: - now = datetime.datetime.now(tz=datetime.timezone.utc).isoformat() + now = datetime.datetime.now(tz=datetime.UTC).isoformat() output_path = LOCAL_STORE_PATH / f"{urn}_mapping_{now}.json" _logger.info("Saving mapping output to %s", output_path) diff --git a/src/dcd_mapping/cli.py b/src/dcd_mapping/cli.py index 2d8c9eb..e9dd0a9 100644 --- a/src/dcd_mapping/cli.py +++ b/src/dcd_mapping/cli.py @@ -1,4 +1,5 @@ """Provide command-line interface for accessing mapping functions.""" + import asyncio import logging from pathlib import Path @@ -38,11 +39,18 @@ default=False, help="Include VRS 2.0 mappings", ) +@click.option( + "--prefer_genomic", + is_flag=True, + default=False, + help="If mapped variants are available relative to a genomic sequence, only output the genomic mappings", +) def cli( urn: str, debug: bool, output: Path | None, include_vrs_2: bool, + prefer_genomic: bool, ) -> None: """Get VRS mapping on preferred transcript for URN. @@ -63,7 +71,9 @@ def cli( ) _logger.debug("debug logging enabled") try: - asyncio.run(map_scoreset_urn(urn, output, include_vrs_2, silent=False)) + asyncio.run( + map_scoreset_urn(urn, output, include_vrs_2, prefer_genomic, silent=False) + ) except ( LookupError, AlignmentError, diff --git a/src/dcd_mapping/lookup.py b/src/dcd_mapping/lookup.py index 56c9116..42f80fb 100644 --- a/src/dcd_mapping/lookup.py +++ b/src/dcd_mapping/lookup.py @@ -7,6 +7,7 @@ * the `VRS-Python Translator tool `_ * the UniProt web API """ + import logging import os from pathlib import Path @@ -392,8 +393,9 @@ def check_seqrepo() -> None: sr = get_seqrepo() if not sr.sr["NC_000001.11"][780000:780020]: raise DataLookupError + conn = sr.sr.aliases._db try: - conn = sr.sr.aliases._db + # conn = sr.sr.aliases._db cursor = conn.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS test_table (id INTEGER PRIMARY KEY)") cursor.execute("INSERT INTO test_table (id) VALUES (1)") @@ -401,8 +403,9 @@ def check_seqrepo() -> None: cursor.execute("DELETE FROM test_table WHERE id = 1") cursor.execute("DROP TABLE test_table") conn.commit() - conn.close() + # conn.close() except sqlite3.Error as e: + conn.close() _logger.error("SeqRepo sequences DB isn't writeable.") raise DataLookupError from e diff --git a/src/dcd_mapping/main.py b/src/dcd_mapping/main.py index afed6ee..7a81d54 100644 --- a/src/dcd_mapping/main.py +++ b/src/dcd_mapping/main.py @@ -1,4 +1,5 @@ """Provide core MaveDB mapping methods.""" + import logging import os import subprocess @@ -123,6 +124,7 @@ async def map_scoreset( records: list[ScoreRow], output_path: Path | None = None, include_vrs_2: bool = False, + prefer_genomic: bool = False, silent: bool = True, ) -> None: """Given information about a MAVE experiment, map to VRS and save output as JSON. @@ -182,6 +184,7 @@ async def map_scoreset( alignment_result, transcript, include_vrs_2, + prefer_genomic, output_path, ) _emit_info(f"Annotated scores saved to: {final_output}.", silent) @@ -191,6 +194,7 @@ async def map_scoreset_urn( urn: str, output_path: Path | None = None, include_vrs_2: bool = False, + prefer_genomic: bool = False, silent: bool = True, ) -> None: """Perform end-to-end mapping for a scoreset. @@ -208,4 +212,6 @@ async def map_scoreset_urn( _logger.critical(msg) click.echo(f"Error: {msg}") raise e - await map_scoreset(metadata, records, output_path, include_vrs_2, silent) + await map_scoreset( + metadata, records, output_path, include_vrs_2, prefer_genomic, silent + ) diff --git a/src/dcd_mapping/schemas.py b/src/dcd_mapping/schemas.py index 64788bf..e77d385 100644 --- a/src/dcd_mapping/schemas.py +++ b/src/dcd_mapping/schemas.py @@ -4,7 +4,7 @@ from cool_seq_tool.schemas import AnnotationLayer, Strand, TranscriptPriority from ga4gh.vrs._internal.models import Allele, Haplotype -from pydantic import BaseModel, StrictBool, StrictInt, StrictStr +from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr from dcd_mapping import vrs_v1_schemas @@ -142,6 +142,8 @@ class MappedScore(BaseModel): This model defines the output of the VRS mapping phase of the pipeline. """ + model_config = ConfigDict(use_enum_values=True) + accession_id: StrictStr annotation_layer: AnnotationLayer score: str | None @@ -178,6 +180,8 @@ class ScoresetMapping(BaseModel): """Provide all mapped scores for a scoreset.""" metadata: Any # TODO get exact MaveDB metadata structure? - computed_reference_sequence: ComputedReferenceSequence - mapped_reference_sequence: MappedReferenceSequence + computed_protein_reference_sequence: ComputedReferenceSequence | None + mapped_protein_reference_sequence: MappedReferenceSequence | None + computed_genomic_reference_sequence: ComputedReferenceSequence | None + mapped_genomic_reference_sequence: MappedReferenceSequence | None mapped_scores: list[ScoreAnnotation] diff --git a/src/dcd_mapping/vrs_map.py b/src/dcd_mapping/vrs_map.py index 005edf4..4756323 100644 --- a/src/dcd_mapping/vrs_map.py +++ b/src/dcd_mapping/vrs_map.py @@ -1,5 +1,7 @@ """Map transcripts to VRS objects.""" + import logging +from collections.abc import Iterable from itertools import cycle import click @@ -15,6 +17,8 @@ SequenceString, ) from ga4gh.vrs.normalize import normalize +from mavehgvs.util import parse_variant_strings +from mavehgvs.variant import Variant from dcd_mapping.lookup import ( get_chromosome_identifier, @@ -64,47 +68,228 @@ def _process_any_aa_code(hgvs_pro_string: str) -> str: return hgvs_pro_string -def _create_hgvs_strings( - alignment: AlignmentResult, +def _create_pre_mapped_hgvs_strings( raw_description: str, layer: AnnotationLayer, tx: TxSelectResult | None = None, + alignment: AlignmentResult | None = None, ) -> list[str]: - """Properly format MAVE variant strings - - * Add accession - * Split up plural/'haplotype' variant expressions - * Drop empty/nonexistent/non-computable variant expressions - * Convert "?" -> "Xaa" (should only be for amino acid variation expressions) - - :param align_results: Alignment results for a score set - :param raw_description: The variant list as expressed in MaveDB - :param layer: The Annotation Layer (protein or genomic) - :param tx: The transcript selection information for a score set - :return A list of processed variants + """Generate a list of (pre-mapped) HGVS strings from one long string containing many valid HGVS substrings + + Currently, the provided transcript is used as the reference for the hgvs string, but this is inaccurate + because pre-mapped variants should be relative to the user-provided target sequence, not an external accession. + Any offset between the transcript and target sequence is not taken into account here (the variant position + is relative to the target sequence). + + :param raw_description: A string containing valid HGVS sub-strings + :param layer: An enum denoting the targeted annotation layer of these HGVS strings + :param tx: A TxSelectResult object defining the transcript we are mapping to (or None). + :param alignment: An AlignmentResult object defining the alignment we are mapping to (or None). + :return: A list of HGVS strings prior to being mapped to the `tx` or `alignment` + """ + if layer is AnnotationLayer.PROTEIN and tx is None: + msg = f"Transcript result must be provided for {layer} annotations (Transcript was `{tx}`)." + raise ValueError(msg) + if layer is AnnotationLayer.GENOMIC and alignment is None: + msg = f"Alignment result must be provided for {layer} annotations (Alignment was `{alignment}`)." + raise ValueError(msg) + + raw_variant_strings = _parse_raw_variant_str(raw_description) + variants, errors = parse_variant_strings(raw_variant_strings) + + hgvs_strings = [] + for variant, error in zip(variants, errors, strict=True): + if error is not None: + msg = f"Variant could not be parsed by mavehgvs: {error}" + raise ValueError(msg) + + # Ideally we would create an HGVS string namespaced to GA4GH. The line below + # creates such a string, but it is not able to be parsed by the GA4GH VRS translator. + # hgvs_strings.append('ga4gh:' + sequence_id + ':' + str(variant)) + if layer is AnnotationLayer.PROTEIN: + assert tx # noqa: S101. mypy help + hgvs_strings.append(tx.np + ":" + str(variant)) + elif layer is AnnotationLayer.GENOMIC: + assert alignment # noqa: S101. mypy help + hgvs_strings.append( + get_chromosome_identifier(alignment.chrom) + ":" + str(variant) + ) + else: + msg = ( + f"Could not generate HGVS strings for invalid AnnotationLayer: {layer}" + ) + raise ValueError(msg) + + return hgvs_strings + + +def _create_post_mapped_hgvs_strings( + raw_description: str, + layer: AnnotationLayer, + tx: TxSelectResult | None = None, + alignment: AlignmentResult | None = None, +) -> list[str]: + """Generate a list of (post-mapped) HGVS strings from one long string containing many valid HGVS substrings. + + For protein annotations, these strings must be adjusted to match the offset defined by the start of the + transcript sequence. For genomic annotations, these strings must be adjusted to match the coordinates of + the reference alignment. + + :param raw_description: A string containing valid HGVS sub-strings + :param layer: An enum denoting the targeted annotation layer of these HGVS strings + :param tx: A TxSelectResult object defining the transcript we are mapping to (or None) + :param alignment: An AlignmentResult object defining the alignment we are mapping to (or None) + :return: A list of HGVS strings relative to the `tx` or `alignment` + """ + if layer is AnnotationLayer.PROTEIN and tx is None: + msg = f"Transcript result must be provided for {layer} annotations (Transcript was `{tx}`)." + raise ValueError(msg) + if layer is AnnotationLayer.GENOMIC and alignment is None: + msg = f"Alignment result must be provided for {layer} annotations (Alignment was `{alignment}`)." + raise ValueError(msg) + + raw_variants = _parse_raw_variant_str(raw_description) + variants, errors = parse_variant_strings(raw_variants) + + hgvs_strings = [] + for variant, error in zip(variants, errors, strict=True): + if error is not None: + msg = f"Variant could not be parsed by mavehgvs: {error}" + raise ValueError(msg) + + if layer is AnnotationLayer.PROTEIN: + assert tx # noqa: S101. mypy help + + variant = _adjust_protein_variant_to_ref(variant, tx) + hgvs_strings.append(tx.np + ":" + str(variant)) + elif layer is AnnotationLayer.GENOMIC: + assert alignment # noqa: S101. mypy help + + variant = _adjust_genomic_variant_to_ref(variant, alignment) + hgvs_strings.append( + get_chromosome_identifier(alignment.chrom) + ":" + str(variant) + ) + else: + msg = ( + f"Could not generate HGVS strings for invalid AnnotationLayer: {layer}" + ) + raise ValueError(msg) + + return hgvs_strings + + +def _adjust_protein_variant_to_ref( + variant: Variant, + tx: TxSelectResult, +) -> Variant: + if isinstance(variant.positions, Iterable): + for position in variant.positions: + position.position = position.position + tx.start + return variant + + variant.positions.position = variant.positions.position + tx.start + return variant + + +def _adjust_genomic_variant_to_ref( + variant: Variant, + alignment: AlignmentResult, +) -> Variant: + """Adjust a variant relative to a provided alignment. + + :param variant: A variant object relative to a scoreset's target sequence + :param alignment: An AlignmentResult object denoting the alignment we are mapping to + :return: A variant object that describes the variant relative to the provided alignment result """ - if layer == AnnotationLayer.PROTEIN: - if not tx: - msg = "Can't get protein layer if no transcript selection results given" - raise VrsMapError(msg) - acc = tx.np + # adjust starts - hgvs uses 1-based numbering for c. sequences, while blat hits are 0-based + starts = [] + if isinstance(variant.positions, Iterable): + is_multi_position = True + for position in variant.positions: + starts.append(position.position - 1) else: - acc = get_chromosome_identifier(alignment.chrom) + is_multi_position = False + starts.append(variant.positions.position - 1) + + # get hit + query_subrange_containing_hit = None + target_subrange_containing_hit = None + for query_subrange, target_subrange in zip( + alignment.query_subranges, alignment.hit_subranges, strict=True + ): + if all( + start >= query_subrange.start and start < query_subrange.end + for start in starts + ): + query_subrange_containing_hit = query_subrange + target_subrange_containing_hit = target_subrange + break + + if query_subrange_containing_hit is None or target_subrange_containing_hit is None: + msg = "Hit was not contained, or multi-position hit was not fully contained, within the query and/or target subranges." + raise ValueError(msg) + + for idx, start in enumerate(starts): + if alignment.strand is Strand.POSITIVE: + # get variant start relative to the reference (the "hit") + # distance from beginning of query to variant start position: + query_to_start = start - query_subrange_containing_hit.start + + # distance from beginning of ref to the variant start position: + ref_to_start = target_subrange_containing_hit.start + query_to_start + else: + # picture the rev comp of the query/variant as mapping to the positive strand of the ref + # the start of the reverse complement of the variant is the end of the "original" variant + # so we need to know where the end of the original variant is, relative to the query molecule + end = start + + # subtract 1 from end of hit range, because blat ranges are 0-based [start, end) + ref_to_start = (target_subrange_containing_hit.end - 1) - ( + end - query_subrange_containing_hit.start + ) + + # add distance from ref to variant start; hgvs is 1-based, so convert back to 1-based + if is_multi_position: + variant.positions[idx].position = ref_to_start + 1 + else: + variant.positions.position = ref_to_start + 1 + + # get reverse complement of sequence if the target maps to the negative strand of the reference + if alignment.strand is Strand.NEGATIVE: + # variant._sequences can be a string or an iterable + if isinstance(variant._sequences, str): + variant._sequences = str(Seq(variant._sequences).reverse_complement()) + elif variant._sequences is not None: + revcomp_sequences_list = [] + for sequence in variant._sequences: + revcomp_sequences_list.append(str(Seq(sequence).reverse_complement())) + variant._sequences = revcomp_sequences_list + + # reverse order of positions tuple + if is_multi_position: + variant._positions = tuple(reversed(list(variant.positions))) + + # change prefix from c. to g. since variant is now relative to chr reference + variant._prefix = "g" + + return variant + + +def _parse_raw_variant_str(raw_description: str) -> list[str]: + """Parse a string which may contain many HGVS strings into a list of each one. + + :param raw_description: A string that may contain a list of variant descriptions or a single variant description + :return: A list of HGVS strings + """ if "[" in raw_description: - descr_list = list(set(raw_description[3:-1].split(";"))) - hgvs_strings = [f"{acc}:{layer.value}.{d}" for d in descr_list] - else: - descr_list = [raw_description] - hgvs_strings = [f"{acc}:{d}" for d in descr_list] - hgvs_strings = list(filter(_hgvs_variant_is_valid, hgvs_strings)) - if layer == AnnotationLayer.PROTEIN: - hgvs_strings = [_process_any_aa_code(s) for s in hgvs_strings] - return hgvs_strings + prefix = raw_description[0:2] + return [prefix + var for var in set(raw_description[3:-1].split(";"))] + + return [raw_description] def _map_protein_coding_pro( row: ScoreRow, - align_result: AlignmentResult, sequence_id: str, transcript: TxSelectResult, ) -> MappedScore | None: @@ -113,8 +298,6 @@ def _map_protein_coding_pro( These arguments are a little lazy and could be pruned down later :param row: A row of output from a MaveDB score set - :param align_result: The alignment data for a score set - :param sequence: The target sequence for a score set :param sequence_id: The GA4GH accession for the provided sequence :param transcript: The transcript selection information for a score set :return: VRS mapping object if mapping succeeds @@ -128,6 +311,8 @@ def _map_protein_coding_pro( "Can't process variant syntax %s for %s", row.hgvs_pro, row.accession ) return None + + # TODO: Handle edge cases without hardcoding URNs. # Special case for experiment set urn:mavedb:0000097 if row.hgvs_pro.startswith("NP_009225.1:p."): vrs_variation = translate_hgvs_to_vrs(row.hgvs_pro) @@ -138,24 +323,31 @@ def _map_protein_coding_pro( pre_mapped=vrs_variation, post_mapped=vrs_variation, ) - hgvs_strings = _create_hgvs_strings( - align_result, row.hgvs_pro, AnnotationLayer.PROTEIN, transcript + + pre_mapped_hgvs_strings = _create_pre_mapped_hgvs_strings( + row.hgvs_pro, + AnnotationLayer.PROTEIN, + tx=transcript, ) - pre_mapped_protein = _get_variation( - hgvs_strings, + post_mapped_hgvs_strings = _create_post_mapped_hgvs_strings( + row.hgvs_pro, + AnnotationLayer.PROTEIN, + tx=transcript, + ) + + pre_mapped_protein = _construct_vrs_allele( + pre_mapped_hgvs_strings, AnnotationLayer.PROTEIN, sequence_id, - align_result, True, ) - post_mapped_protein = _get_variation( - hgvs_strings, + post_mapped_protein = _construct_vrs_allele( + post_mapped_hgvs_strings, AnnotationLayer.PROTEIN, - sequence_id, - align_result, + None, False, - transcript.start, ) + if pre_mapped_protein and post_mapped_protein: return MappedScore( accession_id=row.accession, @@ -164,6 +356,57 @@ def _map_protein_coding_pro( pre_mapped=pre_mapped_protein, post_mapped=post_mapped_protein, ) + + return None + + +def _map_genomic( + row: ScoreRow, + sequence_id: str, + align_result: AlignmentResult, +) -> MappedScore | None: + """Construct VRS object mapping for ``hgvs_nt`` variant column entry + + These arguments are a little lazy and could be pruned down later + + :param row: A row of output from a MaveDB score set + :param sequence_id: The GA4GH accession for the provided sequence + :param align_result: The transcript selection information for a score set + :return: VRS mapping object if mapping succeeds + """ + pre_mapped_hgvs_strings = _create_pre_mapped_hgvs_strings( + row.hgvs_nt, + AnnotationLayer.GENOMIC, + alignment=align_result, + ) + post_mapped_hgvs_strings = _create_post_mapped_hgvs_strings( + row.hgvs_nt, + AnnotationLayer.GENOMIC, + alignment=align_result, + ) + + pre_mapped_genomic = _construct_vrs_allele( + pre_mapped_hgvs_strings, + AnnotationLayer.GENOMIC, + sequence_id, + True, + ) + post_mapped_genomic = _construct_vrs_allele( + post_mapped_hgvs_strings, + AnnotationLayer.GENOMIC, + None, + False, + ) + + if pre_mapped_genomic and post_mapped_genomic: + return MappedScore( + accession_id=row.accession, + score=row.score, + annotation_layer=AnnotationLayer.GENOMIC, + pre_mapped=pre_mapped_genomic, + post_mapped=post_mapped_genomic, + ) + return None @@ -237,46 +480,28 @@ def _map_protein_coding( variations: list[MappedScore] = [] for row in records: - hgvs_pro_mappings = _map_protein_coding_pro( - row, align_result, psequence_id, transcript - ) + hgvs_pro_mappings = _map_protein_coding_pro(row, psequence_id, transcript) if hgvs_pro_mappings: variations.append(hgvs_pro_mappings) - if not _hgvs_nt_is_valid(row.hgvs_nt): - continue - hgvs_strings = _create_hgvs_strings( - align_result, row.hgvs_nt, AnnotationLayer.GENOMIC - ) - pre_mapped_genomic = _get_variation( - hgvs_strings, - AnnotationLayer.GENOMIC, - gsequence_id, - align_result, - True, - ) - post_mapped_genomic = _get_variation( - hgvs_strings, - AnnotationLayer.GENOMIC, - gsequence_id, - align_result, - False, - ) - if pre_mapped_genomic is None or post_mapped_genomic is None: + else: _logger.warning( - "Encountered apparently invalid genomic variants in %s: %s", + "Encountered apparently invalid protein variants in %s: %s", row.accession, - row.hgvs_nt, - ) - continue - variations.append( - MappedScore( - accession_id=row.accession, - score=row.score, - annotation_layer=AnnotationLayer.GENOMIC, - pre_mapped=pre_mapped_genomic, - post_mapped=post_mapped_genomic, + row.hgvs_pro, ) - ) + + if _hgvs_nt_is_valid(row.hgvs_nt): + hgvs_nt_mappings = _map_genomic(row, gsequence_id, align_result) + + if hgvs_nt_mappings: + variations.append(hgvs_nt_mappings) + else: + _logger.warning( + "Encountered apparently invalid genomic variants in %s: %s", + row.accession, + row.hgvs_nt, + ) + return variations @@ -305,37 +530,18 @@ def _map_regulatory_noncoding( "Can't process variant syntax %s for %s", row.hgvs_nt, metadata.urn ) continue - hgvs_strings = _create_hgvs_strings( - align_result, row.hgvs_nt, AnnotationLayer.GENOMIC - ) - pre_map_allele = _get_variation( - hgvs_strings, - AnnotationLayer.GENOMIC, - sequence_id, - align_result, - True, - offset=0, - ) - post_map_allele = _get_variation( - hgvs_strings, - AnnotationLayer.GENOMIC, - sequence_id, - align_result, - False, - offset=0, - ) - if not pre_map_allele or not post_map_allele: - msg = "Genomic variations missing" - raise VrsMapError(msg) - variations.append( - MappedScore( - accession_id=row.accession, - annotation_layer=AnnotationLayer.GENOMIC, - pre_mapped=pre_map_allele, - post_mapped=post_map_allele, - score=row.score, + + hgvs_nt_mappings = _map_genomic(row, sequence_id, align_result) + + if hgvs_nt_mappings: + variations.append(hgvs_nt_mappings) + else: + _logger.warning( + "Encountered apparently invalid genomic variants in %s: %s", + row.accession, + row.hgvs_nt, ) - ) + return variations @@ -360,90 +566,34 @@ def _rle_to_lse( return LiteralSequenceExpression(sequence=derived_sequence) -def _get_variation( +def _construct_vrs_allele( hgvs_strings: list[str], layer: AnnotationLayer, - sequence_id: str, - alignment: AlignmentResult, + sequence_id: str | None, pre_map: bool, - offset: int = 0, -) -> Allele | Haplotype | None: - """Create variation (allele). - - :param hgvs_strings: The HGVS suffix that represents a variant - :param layer: annotation layer - :param sequence_id: target sequence digest eg ``"ga4gh:SQ.jUOcLPDjSqWFEo9kSOG8ITe1dr9QK3h6"`` - :param alignment: The AlignmentResult object for a score set - :param pre_map: if True, return object for pre mapping stage. Otherwise return for - post-mapping. - :param offset: The offset to adjust the start and end positions in allele. This - parameter is used if the annotation layer is protein. For genomic variants, the - offset is computed with respect to the alignment block. - :return: an allele or haplotype - """ - if sequence_id.startswith("ga4gh:"): - sequence_id = sequence_id[6:] +) -> Allele | Haplotype: alleles: list[Allele] = [] for hgvs_string in hgvs_strings: - # Generate VRS Allele structure. Set VA digests and SL digests to None allele = translate_hgvs_to_vrs(hgvs_string) - allele.id = None - allele.digest = None - allele.location.id = None - allele.location.digest = None - if "dup" in hgvs_string: - allele.state.sequence = SequenceString(2 * _get_allele_sequence(allele)) if pre_map: + if sequence_id is None: + msg = "Must provide sequence id to construct pre-mapped VRS allele" + raise ValueError(msg) allele.location.sequenceReference.refgetAccession = sequence_id - if "dup" in hgvs_string: - allele.state.sequence = SequenceString(2 * _get_allele_sequence(allele)) - else: - if layer == AnnotationLayer.PROTEIN: - allele.location.start += offset - allele.location.end += offset - else: - start: int = allele.location.start - if ( - len(alignment.query_subranges) == 1 - and alignment.strand == Strand.POSITIVE - ): - subrange_start = alignment.query_subranges[0].start - diff = start - subrange_start - diff2: int = allele.location.end - start - allele.location.start = alignment.hit_subranges[0].start + diff - allele.location.end = allele.location.start + diff2 - else: - for query_subrange, hit_subrange in zip( # noqa: B007 # TODO remove hit_subrange? - alignment.query_subranges, alignment.hit_subranges, strict=False - ): - if start >= query_subrange.start and start < query_subrange.end: - break - diff = start - query_subrange.start - diff2: int = allele.location.end - start - if alignment.strand == Strand.POSITIVE: # positive orientation - allele.location.start = hit_subrange.start + diff - allele.location.end = allele.location.start + diff2 - if "dup" in hgvs_string: - allele.state.sequence = SequenceString( - 2 * _get_allele_sequence(allele) - ) - else: - allele.location.start = hit_subrange.end - diff - diff2 - allele.location.end = allele.location.start + diff2 - if "dup" in hgvs_string: - allele.state.sequence = SequenceString( - 2 * _get_allele_sequence(allele) - ) - temp_str = str( - Seq(str(allele.state.sequence.root)).reverse_complement() - ) - allele.state.sequence = SequenceString(temp_str) + + if "dup" in hgvs_string: + allele.state.sequence = SequenceString(2 * _get_allele_sequence(allele)) + + # TODO check assumption that c.= leads to an "N" in the sequence.root if allele.state.sequence.root == "N" and layer == AnnotationLayer.GENOMIC: allele.state.sequence = SequenceString(_get_allele_sequence(allele)) + if "=" in hgvs_string and layer == AnnotationLayer.PROTEIN: allele.state.sequence = SequenceString(_get_allele_sequence(allele)) + allele = normalize(allele, data_proxy=get_seqrepo()) + if isinstance(allele.state, ReferenceLengthExpression): _logger.debug( "Coercing state for %s into LSE: %s", @@ -457,10 +607,13 @@ def _get_variation( alleles.append(allele) if not alleles: - return None - if len(alleles) == 1: - return alleles[0] - return Haplotype(members=alleles) + msg = f"Input variant hgvs_string(s) could not be translated to an allele: {hgvs_strings}." + raise ValueError(msg) + + if len(alleles) > 1: + return Haplotype(members=alleles) + + return alleles[0] def vrs_map( diff --git a/tests/fixtures/urn:mavedb:00000002-a-2_scores.csv b/tests/fixtures/urn:mavedb:00000002-a-2_scores.csv index c521ae8..393b270 100644 --- a/tests/fixtures/urn:mavedb:00000002-a-2_scores.csv +++ b/tests/fixtures/urn:mavedb:00000002-a-2_scores.csv @@ -1,5 +1,2 @@ accession,hgvs_nt,hgvs_splice,hgvs_pro,score,SE,epsilon,SE_101208,score_101208,SE_110307,score_110307 urn:mavedb:00000002-a-2#1,NA,NA,p.[Ala11Gly;His23Asp],-2.195822360043693,0.039522549654755226,0.00016271557241353792,0.26275009188313664,-2.3668204254880316,0.34288362080122936,-1.9070053558248752 -urn:mavedb:00000002-a-2#2679,NA,NA,p.Gln26?,-1.8006447618878303,1.7178767640945949,0.0,0.9011033521625522,-3.2403734638769706,0.3427674463875434,-0.6266255940903953 -urn:mavedb:00000002-a-2#3096,NA,NA,p.[Ser14Arg;Asp25?],-0.06470918003032197,3.161101902092387e-50,7.208644543045662e-98,1.714769079515034,-0.1929322833638718,2.0614706290730704,0.12060527606508133 -urn:mavedb:00000002-a-2#26248,NA,NA,p.[Asp25?;Gln26Pro;Thr27Ala],-2.108552505131139,1.4081755357724763e-57,2.743447640839061e-112,0.2910737465254972,-2.1041883371997407,1.6185941310591887,-2.2435020128308185 diff --git a/tests/test_vrs_map.py b/tests/test_vrs_map.py index cce0944..37133c2 100644 --- a/tests/test_vrs_map.py +++ b/tests/test_vrs_map.py @@ -100,37 +100,11 @@ def test_2_a_2( "ga4gh:VA.aF9h1d9DvWWGlkhRAbdz1Ni9DQUOXIhL", ], }, - ("urn:mavedb:00000002-a-2#2679", AnnotationLayer.PROTEIN): { - "pre_mapped": "ga4gh:VA.5Jf_a17Q6ySEpDvHr1FR1kmE6L1RWpGK", - "post_mapped": "ga4gh:VA.PWfyP7Ktd3L2IT564-h9FVyqv9NvnnEJ", - }, - ("urn:mavedb:00000002-a-2#3096", AnnotationLayer.PROTEIN): { - "pre_mapped": [ - "ga4gh:VA.A4nh1CUx6gUy0pCePT9RxZQDrY9BzEoa", - "ga4gh:VA.H6BdObvEycBGJPqnASVYOPwf9bHboT6w", - ], - "post_mapped": [ - "ga4gh:VA.PLOa58Eo06IGBGQbrsOPBpXcuw4mDAFH", - "ga4gh:VA.xi7XqR9LSoq0n8B3W2ufPEg12MqEZ3jD", - ], - }, - ("urn:mavedb:00000002-a-2#26248", AnnotationLayer.PROTEIN): { - "pre_mapped": [ - "ga4gh:VA.M_mxkauLTyizIeufKNmOk9vplL9N8Svn", - "ga4gh:VA.krtCaV7JjlvM4esBW0XzUnnsQgixnmyV", - "ga4gh:VA.H6BdObvEycBGJPqnASVYOPwf9bHboT6w", - ], - "post_mapped": [ - "ga4gh:VA.Z1CFy03R9dyAEfWj_G4dsyRHkj7dbJto", - "ga4gh:VA.sr_W-vpBZbM1ItYhaqFw3m_O08EEqtqg", - "ga4gh:VA.xi7XqR9LSoq0n8B3W2ufPEg12MqEZ3jD", - ], - }, } mappings = vrs_map(metadata, align_result, records, transcript=tx_result) assert mappings is not None - assert len(mappings) == 4 + assert len(mappings) == 1 for m in mappings: _assert_correct_vrs_map(m, expected_mappings_data)