Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
b66b66a
Added ClinGen allele IDs to the variant data model.
jstone-dev Jan 27, 2025
f4d14b0
Added a ClinGen allele ID variant lookup endpoint to the API.
jstone-dev Jan 27, 2025
04526f7
Added a "get variant" endpoint; added permission checks to the varian…
jstone-dev Jan 28, 2025
b7fc08d
Include mapped variants in get/lookup variant responses.
jstone-dev Jan 28, 2025
0d5b1eb
ClinVar Variant Table
bencap Jan 29, 2025
4582b75
fixup
bencap Jan 29, 2025
89edb37
Minimal Script for ClinVar Variant Data Refresh
bencap Jan 30, 2025
7ec9672
View model updates to support clinvar variants
bencap Jan 30, 2025
47baa03
Add check for Nonetype clingen allele ids
bencap Jan 30, 2025
ea69e68
HACK: Expedient solution for surfacing clinsig/reviewstat in CSV scor…
bencap Jan 30, 2025
569d40c
Fixed HTTP method typo.
jstone-dev Jan 28, 2025
56b05ae
Include full score set details in "get variant" response.
jstone-dev Jan 30, 2025
0063f72
Revised the temporary hack that presents ClinVar significances as sco…
jstone-dev Jan 31, 2025
b5b599e
Bug fixes
jstone-dev Jan 31, 2025
8a40392
Genericize ClinVar Variants Table
bencap Feb 18, 2025
7e10d7f
Move clingen_allele_id column to mapped_variants table
bencap Feb 19, 2025
0b1a432
fixup
bencap Feb 19, 2025
aa74796
Clinical control router tests and code fixup from tests output
bencap Feb 19, 2025
a906eb8
Undo demo hack, refactors variant data to csv function into a single …
bencap Feb 20, 2025
df91ffb
Replace id with URN for mapped variant view models, inherit create vi…
bencap Feb 20, 2025
453c7a7
Add route for generating pairwise db_name/db_version clinical control…
bencap Feb 20, 2025
bd0d389
Format changes to clinical control options
bencap Feb 24, 2025
9071ad4
Fix foreign key downgrade constraint name
bencap Feb 25, 2025
8f1002b
Rebase alembic revision ordering
bencap Mar 4, 2025
cbfb139
Current and 2.0 filter for mapped variants
bencap Mar 4, 2025
f88b856
tmp: Staging filter. Drop this commit prior to release.
bencap Mar 4, 2025
6b3d8e1
Make allele ID column alembic upgrade revise stats mat view
bencap Mar 24, 2025
e436b49
wip: clingen ldh submission
bencap Mar 26, 2025
db47a69
wip: clingen ldh submission job
bencap Mar 28, 2025
ddd5079
wip: clingen submission script
bencap Apr 8, 2025
733b086
Alembic revision reorder from rebase
bencap Apr 8, 2025
81bf500
wip: clingen submission and linkage scripts
bencap Apr 9, 2025
cee36d2
Script and LDH Connection Fixes
bencap Apr 17, 2025
d062761
Create distinction between Slack errors and messages
bencap Apr 17, 2025
bd17d1a
Refactor clingen variation function into clingen lib
bencap Apr 17, 2025
73ab329
Only submit mapped variants with a defined post mapped object
bencap Apr 17, 2025
1543d0a
Ensure post mapped metadata is not empty
bencap Apr 17, 2025
b4ba745
Flesh out retry logic for linking clingen allele ids
bencap Apr 18, 2025
d0c2f85
Handle Haplotype VRS Objects
bencap Apr 22, 2025
a9ace94
Add unlinked flag to linking job
bencap Apr 22, 2025
1d4d6b5
wip: docs and tests for clingen jobs
bencap Apr 22, 2025
2410696
Add enqueue linking error to exceptions classes
bencap Apr 22, 2025
6e67d1a
Add new clingen functions to worker
bencap Apr 22, 2025
4f19304
Tests for VRS Variation Extraction Utilities
bencap Apr 23, 2025
631ae0f
Tests for ClinGen Library Methods
bencap Apr 24, 2025
8f53861
Test cases for ClinGen submission
bencap Apr 26, 2025
467fe3b
Tests for Clingen Linkage Job
bencap Apr 28, 2025
82d5970
Test case fixes
bencap Apr 28, 2025
ae8ad7c
Use ubuntu-latest for 3.9 Tests
bencap Apr 28, 2025
b84b17a
Make Mapping Job Enqueue ClinGen Submission Job
bencap Apr 29, 2025
903461e
Dont backoff clingen submission requests
bencap Apr 29, 2025
f3c56fd
Defer linking job in seconds
bencap Apr 29, 2025
95beefd
Mock Requests.put in tests rather than outdate request_with_backoff
bencap Apr 29, 2025
7082958
Use Named Function for Linkage Job
bencap Apr 29, 2025
43ed08b
Define clingen fetch at top level of module
bencap Apr 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/run-tests-on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ env:

jobs:
run-tests-3_9:
runs-on: ubuntu-20.04
name: Pytest on Python 3.9 / Ubuntu 20.04
runs-on: ubuntu-latest
name: Pytest on Python 3.9
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down
58 changes: 58 additions & 0 deletions alembic/versions/34026092c7f8_clinvar_variant_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""clinvar variant table

Revision ID: 34026092c7f8
Revises: e8a3b5d8f885
Create Date: 2025-01-28 21:48:42.532346

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "34026092c7f8"
down_revision = "e8a3b5d8f885"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"clinvar_variants",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("allele_id", sa.Integer(), nullable=False),
sa.Column("gene_symbol", sa.String(), nullable=False),
sa.Column("clinical_significance", sa.String(), nullable=False),
sa.Column("clinical_review_status", sa.String(), nullable=False),
sa.Column("clinvar_db_version", sa.String(), nullable=False),
sa.Column("creation_date", sa.Date(), nullable=False),
sa.Column("modification_date", sa.Date(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_clinvar_variants_allele_id"), "clinvar_variants", ["allele_id"], unique=False)
op.add_column("mapped_variants", sa.Column("clinvar_variant_id", sa.Integer(), nullable=True))
op.create_index(
op.f("ix_mapped_variants_clinvar_variant_id"), "mapped_variants", ["clinvar_variant_id"], unique=False
)
op.create_foreign_key(
"mapped_variant_clinvar_variant_id_foreign_key_constraint",
"mapped_variants",
"clinvar_variants",
["clinvar_variant_id"],
["id"],
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(
"mapped_variant_clinvar_variant_id_foreign_key_constraint", "mapped_variants", type_="foreignkey"
)
op.drop_index(op.f("ix_mapped_variants_clinvar_variant_id"), table_name="mapped_variants")
op.drop_column("mapped_variants", "clinvar_variant_id")
op.drop_index(op.f("ix_clinvar_variants_allele_id"), table_name="clinvar_variants")
op.drop_table("clinvar_variants")
# ### end Alembic commands ###
101 changes: 101 additions & 0 deletions alembic/versions/695b73abe581_genericize_clinvar_variants_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""genericize clinvar variants table

Revision ID: 695b73abe581
Revises: 34026092c7f8
Create Date: 2025-02-18 11:54:15.243078

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "695b73abe581"
down_revision = "34026092c7f8"
branch_labels = None
depends_on = None


def upgrade():
op.rename_table("clinvar_variants", "clinical_controls")
op.execute("ALTER SEQUENCE clinvar_variants_id_seq RENAME TO clinical_controls_id_seq")
op.execute("ALTER INDEX clinvar_variants_pkey RENAME TO clinical_controls_pkey")

op.alter_column("clinical_controls", "clinvar_db_version", nullable=False, new_column_name="db_version")
op.alter_column("clinical_controls", "allele_id", nullable=False, new_column_name="db_identifier")
op.add_column("clinical_controls", sa.Column("db_name", sa.String(), nullable=True))

op.create_index("ix_clinical_controls_gene_symbol", "clinical_controls", ["gene_symbol"])
op.create_index("ix_clinical_controls_db_name", "clinical_controls", ["db_name"])
op.create_index("ix_clinical_controls_db_identifier", "clinical_controls", ["db_identifier"])
op.create_index("ix_clinical_controls_db_version", "clinical_controls", ["db_version"])

op.create_table(
"mapped_variants_clinical_controls",
sa.Column("mapped_variant_id", sa.Integer(), nullable=False),
sa.Column("clinical_control_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["mapped_variant_id"],
["mapped_variants.id"],
),
sa.ForeignKeyConstraint(
["clinical_control_id"],
["clinical_controls.id"],
),
sa.PrimaryKeyConstraint("mapped_variant_id", "clinical_control_id"),
)

# Convert any existing ClinVar variants into clinical control variants. Since
# this table is being update from a clinvar specific table, we assume all existing
# controls are from ClinVar.
op.execute(
"""
INSERT INTO mapped_variants_clinical_controls (
mapped_variant_id,
clinical_control_id
)
SELECT id, clinvar_variant_id
FROM mapped_variants
WHERE clinvar_variant_id IS NOT NULL
"""
)

op.execute("UPDATE clinical_controls SET db_name='ClinVar'")
op.alter_column("clinical_controls", "db_name", nullable=False)

op.drop_index("ix_mapped_variants_clinvar_variant_id", "mapped_variants")
op.drop_column("mapped_variants", "clinvar_variant_id")


def downgrade():
op.rename_table("clinical_controls", "clinvar_variants")
op.execute("ALTER SEQUENCE clinical_controls_id_seq RENAME TO clinvar_variants_id_seq")
op.execute("ALTER INDEX clinical_controls_pkey RENAME TO clinvar_variants_pkey")

op.drop_index("ix_clinical_controls_gene_symbol", "clinical_controls")
op.drop_index("ix_clinical_controls_db_name", "clinical_controls")
op.drop_index("ix_clinical_controls_db_identifier", "clinical_controls")
op.drop_index("ix_clinical_controls_db_version", "clinical_controls")

op.alter_column("clinvar_variants", "db_version", nullable=False, new_column_name="clinvar_db_version")
op.alter_column("clinvar_variants", "db_identifier", nullable=False, new_column_name="allele_id")
op.drop_column("clinvar_variants", "db_name")

op.add_column(
"mapped_variants",
sa.Column("clinvar_variant_id", sa.Integer(), sa.ForeignKey("clinvar_variants.id"), nullable=True),
)

# Downgrades a many-to-many relationship to a one to many. This will result in data loss.
op.execute(
"""
UPDATE mapped_variants
SET clinvar_variant_id=mapped_variants_clinical_controls.clinical_control_id
FROM mapped_variants_clinical_controls
WHERE mapped_variants_clinical_controls.mapped_variant_id=mapped_variants.id
"""
)

op.create_index("ix_mapped_variants_clinvar_variant_id", "mapped_variants", ["clinvar_variant_id"])
op.drop_table("mapped_variants_clinical_controls")
53 changes: 53 additions & 0 deletions alembic/versions/d6e5a9fde3c9_move_clingen_allele_id_to_mapped_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""move clingen allele id to mapped variants table

Revision ID: d6e5a9fde3c9
Revises: 695b73abe581
Create Date: 2025-02-19 10:51:07.319962

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "d6e5a9fde3c9"
down_revision = "695b73abe581"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index("ix_variants_clingen_allele_id", table_name="variants")
op.add_column("mapped_variants", sa.Column("clingen_allele_id", sa.String(), nullable=True))
op.execute(
"""
UPDATE mapped_variants
SET clingen_allele_id=variants.clingen_allele_id
FROM variants
WHERE variants.id=mapped_variants.variant_id
"""
)
op.drop_column("variants", "clingen_allele_id")
op.create_index(
op.f("ix_mapped_variants_clingen_allele_id"), "mapped_variants", ["clingen_allele_id"], unique=False
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_mapped_variants_clingen_allele_id"), table_name="mapped_variants")
op.add_column("variants", sa.Column("clingen_allele_id", sa.String(), nullable=True))
op.execute(
"""
UPDATE variants
SET clingen_allele_id=mapped_variants.clingen_allele_id
FROM mapped_variants
WHERE variants.id=mapped_variants.variant_id
"""
)
op.drop_column("mapped_variants", "clingen_allele_id")
op.create_index("ix_variants_clingen_allele_id", "variants", ["clingen_allele_id"], unique=False)
# ### end Alembic commands ###
27 changes: 27 additions & 0 deletions alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Add ClinGen allele IDs

Revision ID: e8a3b5d8f885
Revises: 4726e4dddde8
Create Date: 2025-01-27 18:55:09.283855

"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "e8a3b5d8f885"
down_revision = "4726e4dddde8"
branch_labels = None
depends_on = None


def upgrade():
op.add_column("variants", sa.Column("clingen_allele_id", sa.String(), nullable=True))
op.create_index(op.f("ix_variants_clingen_allele_id"), "variants", ["clingen_allele_id"], unique=False)


def downgrade():
op.drop_index(op.f("ix_variants_clingen_allele_id"), table_name="variants")
op.drop_column("variants", "clingen_allele_id")
4 changes: 4 additions & 0 deletions src/mavedb/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api"
MAVEDB_FRONTEND_URL = os.getenv("MAVE_FRONTEND_URL", "https://mavedb.org")
Empty file.
17 changes: 17 additions & 0 deletions src/mavedb/lib/clingen/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os

GENBOREE_ACCOUNT_NAME = os.getenv("GENBOREE_ACCOUNT_NAME")
GENBOREE_ACCOUNT_PASSWORD = os.getenv("GENBOREE_ACCOUNT_PASSWORD")

CLIN_GEN_TENANT = os.getenv("CLIN_GEN_TENANT")
LDH_TENANT = os.getenv("LDH_TENANT")

LDH_SUBMISSION_TYPE = "cg-ldh-ld-submission"
LDH_ENTITY_NAME = "MaveDBMapping"
LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/

DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100
LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}"
LDH_LINKED_DATA_URL = f"https://genboree.org/{LDH_TENANT}/{LDH_ENTITY_NAME}/id"

LINKED_DATA_RETRY_THRESHOLD = 0.95
66 changes: 66 additions & 0 deletions src/mavedb/lib/clingen/content_constructors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from datetime import datetime
from uuid import uuid4

from mavedb import __version__
from mavedb.constants import MAVEDB_BASE_GIT, MAVEDB_FRONTEND_URL
from mavedb.lib.types.clingen import LdhContentLinkedData, LdhContentSubject, LdhEvent, LdhSubmission
from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE
from mavedb.models.mapped_variant import MappedVariant
from mavedb.models.variant import Variant


def construct_ldh_submission_event(sbj: LdhContentSubject) -> LdhEvent:
return {
"type": LDH_SUBMISSION_TYPE,
"name": LDH_ENTITY_NAME,
"uuid": str(uuid4()),
"sbj": {"id": sbj["Variant"]["hgvs"], "type": "Variant", "format": "hgvs", "add": True},
"triggered": {
"by": {
"host": MAVEDB_BASE_GIT,
"id": "resource_published",
"iri": f"{MAVEDB_BASE_GIT}/releases/tag/v{__version__}",
},
"at": datetime.now().isoformat(),
},
}


def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject:
return {"Variant": {"hgvs": hgvs}}


def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVariant) -> LdhContentLinkedData:
return {
# TODO#372: We try to make all possible fields that are non-nullable represented that way.
"MaveDBMapping": [
{
"entContent": {
"mavedb_id": variant.urn, # type: ignore
"pre_mapped": mapped_variant.pre_mapped, # type: ignore
"post_mapped": mapped_variant.post_mapped, # type: ignore
"mapping_api_version": mapped_variant.mapping_api_version, # type: ignore
"score": variant.data["score_data"]["score"], # type: ignore
},
"entId": variant.urn, # type: ignore
"entIri": f"{MAVEDB_FRONTEND_URL}/{variant.urn}", # type: ignore
}
]
}


def construct_ldh_submission(variant_content: list[tuple[str, Variant, MappedVariant]]) -> list[LdhSubmission]:
content_submission: list[LdhSubmission] = []
for hgvs, variant, mapped_variant in variant_content:
subject = construct_ldh_submission_subject(hgvs)
event = construct_ldh_submission_event(subject)
entity = construct_ldh_submission_entity(variant, mapped_variant)

content_submission.append(
{
"event": event,
"content": {"sbj": subject, "ld": entity},
}
)

return content_submission
Loading