Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
bcbb0de
logged in users can now see 'public' datasets
tcnichol Apr 24, 2023
66268af
remove print statements
tcnichol Apr 24, 2023
0cae59c
public datasets visible
tcnichol Apr 24, 2023
23e117b
we can view file, but does not show up in dataset view
tcnichol Apr 24, 2023
543d7be
files visible for public datasets
tcnichol Apr 24, 2023
12a0658
fixing indentation
tcnichol Apr 24, 2023
b91bb1c
formatting
tcnichol Apr 24, 2023
9e937e7
adding new modal for dataset, just a copy of existing one, will change
tcnichol Apr 25, 2023
5011f53
we can now see the right status, button works, but status does not ch…
tcnichol Apr 25, 2023
50e4b55
status change now works, page reloads and shows new status
tcnichol Apr 25, 2023
db67278
change status instead of make public
tcnichol Apr 25, 2023
778403b
Merge branch 'main' into 464-public-datasets
tcnichol Apr 26, 2023
90910e5
import DatasetStatus
tcnichol Apr 26, 2023
ef59f00
Merge branch 'main' into 464-public-datasets
tcnichol Apr 27, 2023
09eab84
formatting
tcnichol Apr 27, 2023
e466e42
codegen
tcnichol Apr 27, 2023
28e6235
Merge branch 'main' into 464-public-datasets
tcnichol Apr 28, 2023
bccae8a
anonymous user added
tcnichol Apr 28, 2023
44efee4
as todos,
tcnichol May 1, 2023
92ec5ef
dataset page does not load if not logged in, uncaught error in consol…
tcnichol May 1, 2023
535354b
now redirects, does not let you see the page
tcnichol May 1, 2023
104c42b
reverting
tcnichol May 3, 2023
97025bf
adding a check file status
tcnichol May 3, 2023
60d6414
using checkfilestatus in files
tcnichol May 3, 2023
0423744
check public status for dataset metadata
tcnichol May 3, 2023
21ece1b
fixing capital PUBLIC
tcnichol May 3, 2023
5372627
formatting
tcnichol May 3, 2023
df0d7e5
matching main
tcnichol May 3, 2023
0a8bfba
new dependency uses anonymous user
tcnichol May 3, 2023
17d0d39
anonymous user can access public datasets
tcnichol May 3, 2023
0ec7553
formatting
tcnichol May 3, 2023
bd286fc
using get user or anonymous in method
tcnichol May 4, 2023
b24328f
formatting
tcnichol May 4, 2023
618eb33
TODO for later
tcnichol May 4, 2023
a5a405d
isAuthorized checks if dataset can be accessed without login
tcnichol May 5, 2023
53d764e
ispublic method works, but page reloads on dataset
tcnichol May 9, 2023
3138924
does not work on page
tcnichol May 10, 2023
c539705
Merge branch 'main' into 464-public-datasets
tcnichol May 18, 2023
f769831
cleanup after merge
tcnichol May 19, 2023
daa1418
no more anonymous user
tcnichol May 19, 2023
c612991
no more anonymous user
tcnichol May 19, 2023
5126224
no more anonymous user
tcnichol May 19, 2023
ee5064e
remove unused imports that were deleted in merge
tcnichol May 22, 2023
f282874
Merge branch 'main' into 464-public-datasets
tcnichol Jun 7, 2023
b19c58d
sharing tab disabled for public only access
tcnichol Jun 7, 2023
2784b92
add metadata button removed
tcnichol Jun 7, 2023
c93f5ee
the delete button is gone if user is viewer or dataset is public
tcnichol Jun 8, 2023
c66c27b
edit button now removed for all kinds of metadata
tcnichol Jun 8, 2023
538da6d
need to pass role to widget
tcnichol Jun 8, 2023
a09d267
Merge branch 'main' into 464-public-datasets
tcnichol Jun 20, 2023
3eb1090
MERGE
tcnichol Jun 20, 2023
7064316
error if no role on file, retutn public auth if does have role on file
tcnichol Jun 20, 2023
cb263ae
we will now see public datasets in explore
tcnichol Jun 20, 2023
816caa8
formatting
tcnichol Jun 20, 2023
00741d7
Merge branch 'main' into 464-public-datasets
tcnichol Jun 23, 2023
d39e200
Merge branch 'main' into 464-public-datasets
tcnichol Jun 26, 2023
5e122cd
fixing package log to match main
tcnichol Oct 10, 2023
7497d05
Merge branch 'main' into 464-public-datasets
tcnichol Oct 10, 2023
17cb71e
fixing changes lost in merge
tcnichol Oct 10, 2023
b7ded11
add metadata button disabled for users who see it as public dataset
tcnichol Oct 10, 2023
8c4b35a
Merge branch 'main' into 464-public-datasets
tcnichol Oct 19, 2023
91e72f4
Merge branch 'main' into 464-public-datasets
tcnichol Nov 1, 2023
762f482
fixing conflict
tcnichol Nov 1, 2023
5ee8728
adding datasetRole, fixing indent
tcnichol Nov 1, 2023
2e03ab7
remove published, not currently an option
tcnichol Nov 1, 2023
5ebad1e
no share or submit extractor public datasets
tcnichol Nov 1, 2023
1bc2c02
using 'authenticated' instead of 'public' for datasets all authentica…
tcnichol Nov 6, 2023
7949d3d
formatting
tcnichol Nov 6, 2023
a63cde2
changing name, fits better auth
tcnichol Nov 8, 2023
cd68945
Moved submission button to bottom dialog actions in line
lmarini Nov 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 105 additions & 5 deletions backend/app/deps/authorization_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from app.keycloak_auth import get_current_username
from app.models.authorization import RoleType, AuthorizationDB
from app.models.datasets import DatasetDB
from app.models.datasets import DatasetDB, DatasetStatus
from app.models.files import FileOut, FileDB
from app.models.groups import GroupOut, GroupDB
from app.models.metadata import MetadataDB
Expand Down Expand Up @@ -40,6 +40,24 @@ async def get_role_by_file(
AuthorizationDB.user_ids == current_user,
),
)
if authorization is None:
if (
dataset := await DatasetDB.get(PydanticObjectId(file.dataset_id))
) is not None:
if dataset.status == DatasetStatus.AUTHENTICATED.name:
auth_dict = {
"creator": dataset.author.email,
"dataset_id": file.dataset_id,
"user_ids": [current_user],
"role": RoleType.VIEWER,
}
authenticated_auth = AuthorizationDB(**auth_dict)
return authenticated_auth
else:
raise HTTPException(
status_code=403,
detail=f"User `{current_user} does not have role on file {file_id}",
)
return authorization.role
raise HTTPException(status_code=404, detail=f"File {file_id} not found")

Expand Down Expand Up @@ -96,6 +114,28 @@ async def get_role_by_group(
raise HTTPException(status_code=404, detail=f"Group {group_id} not found")


async def is_public_dataset(
dataset_id: str,
) -> bool:
"""Checks if a dataset is public."""
if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
if dataset_out.status == DatasetStatus.PUBLIC:
return True
else:
return False


async def is_authenticated_dataset(
dataset_id: str,
) -> bool:
"""Checks if a dataset is authenticated."""
if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
if dataset_out.status == DatasetStatus.AUTHENTICATED:
return True
else:
return False


class Authorization:
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
Expand Down Expand Up @@ -125,10 +165,24 @@ async def __call__(
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
)
else:
raise HTTPException(
status_code=403,
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
)
if (
current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
) is not None:
if (
current_dataset.status == DatasetStatus.AUTHENTICATED.name
and self.role == "viewer"
):
return True
else:
raise HTTPException(
status_code=403,
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
)
else:
raise HTTPException(
status_code=404,
detail=f"The dataset {dataset_id} is not found",
)


class FileAuthorization:
Expand Down Expand Up @@ -251,6 +305,52 @@ async def __call__(
raise HTTPException(status_code=404, detail=f"Group {group_id} not found")


class CheckStatus:
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""

def __init__(self, status: str):
self.status = status

async def __call__(
self,
dataset_id: str,
):
if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
if dataset.status == self.status:
return True
else:
return False
else:
return False


class CheckFileStatus:
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""

def __init__(self, status: str):
self.status = status

async def __call__(
self,
file_id: str,
):
if (file_out := await FileDB.get(PydanticObjectId(file_id))) is not None:
dataset_id = file_out.dataset_id
if (
dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
) is not None:
if dataset.status == self.status:
return True
else:
return False
else:
return False
else:
return False


def access(user_role: RoleType, role_required: RoleType) -> bool:
"""Enforce implied role hierarchy OWNER > EDITOR > UPLOADER > VIEWER"""
if user_role == RoleType.OWNER:
Expand Down
128 changes: 128 additions & 0 deletions backend/app/heartbeat_listener_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import logging
import pika
import json
from packaging import version
from pymongo import MongoClient

from app.config import settings
from app.models.search import SearchCriteria
from app.routers.feeds import FeedIn, FeedListener, FeedOut, FeedDB, associate_listener
from app.models.listeners import EventListenerDB, EventListenerOut, ExtractorInfo

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def callback(ch, method, properties, body):
"""This method receives messages from RabbitMQ and processes them.
the extractor info is parsed from the message and if the extractor is new
or is a later version, the db is updated.
"""
msg = json.loads(body.decode("utf-8"))

extractor_info = msg["extractor_info"]
extractor_name = extractor_info["name"]
extractor_db = EventListenerDB(
**extractor_info, properties=ExtractorInfo(**extractor_info)
)

mongo_client = MongoClient(settings.MONGODB_URL)
db = mongo_client[settings.MONGO_DATABASE]

# check to see if extractor alredy exists
existing_extractor = db["listeners"].find_one({"name": msg["queue"]})
if existing_extractor is not None:
# Update existing listener
existing_version = existing_extractor["version"]
new_version = extractor_db.version
if version.parse(new_version) > version.parse(existing_version):
# if this is a new version, add it to the database
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
# TODO - for now we are not deleting an older version of the extractor, just adding a new one
# removed = db["listeners"].delete_one({"_id": existing_extractor["_id"]})
extractor_out = EventListenerOut.from_mongo(found)
logger.info(
"%s updated from %s to %s"
% (extractor_name, existing_version, new_version)
)
return extractor_out
else:
# Register new listener
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
extractor_out = EventListenerOut.from_mongo(found)
logger.info("New extractor registered: " + extractor_name)

# Assign MIME-based listener if needed
if extractor_out.properties and extractor_out.properties.process:
process = extractor_out.properties.process
if "file" in process:
# Create a MIME-based feed for this v1 extractor
criteria_list = []
for mime in process["file"]:
main_type = mime.split("/")[0] if mime.find("/") > -1 else mime
sub_type = mime.split("/")[1] if mime.find("/") > -1 else None
if sub_type:
if sub_type == "*":
# If a wildcard, just match on main type
criteria_list.append(
SearchCriteria(
field="content_type_main", value=main_type
)
)
else:
# Otherwise match the whole string
criteria_list.append(
SearchCriteria(field="content_type", value=mime)
)
else:
criteria_list.append(
SearchCriteria(field="content_type", value=mime)
)

# TODO: Who should the author be for an auto-generated feed? Currently None.
new_feed = FeedDB(
name=extractor_name,
search={
"index_name": "file",
"criteria": criteria_list,
"mode": "or",
},
listeners=[
FeedListener(listener_id=extractor_out.id, automatic=True)
],
)
db["feeds"].insert_one(new_feed.to_mongo())

return extractor_out


def listen_for_heartbeats():
"""

this method runs continuously listening for extractor heartbeats send over rabbitmq

"""
credentials = pika.PlainCredentials(settings.RABBITMQ_USER, settings.RABBITMQ_PASS)
parameters = pika.ConnectionParameters(
settings.RABBITMQ_HOST, 5672, "/", credentials
)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()

channel.exchange_declare(
exchange=settings.HEARTBEAT_EXCHANGE, exchange_type="fanout", durable=True
)
result = channel.queue_declare(queue="", exclusive=True)
queue_name = result.method.queue
channel.queue_bind(exchange=settings.HEARTBEAT_EXCHANGE, queue=queue_name)

logger.info(" [*] Waiting for heartbeats. To exit press CTRL+C")
channel.basic_consume(queue=queue_name, on_message_callback=callback, auto_ack=True)
channel.start_consuming()


if __name__ == "__main__":
listen_for_heartbeats()
5 changes: 1 addition & 4 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,7 @@
dependencies=[Depends(get_current_username)],
)
api_router.include_router(
metadata_datasets.router,
prefix="/datasets",
tags=["metadata"],
dependencies=[Depends(get_current_username)],
metadata_datasets.router, prefix="/datasets", tags=["metadata"]
)
api_router.include_router(
folders.router,
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def _generate_next_value_(name, start, count, last_values):
class DatasetStatus(AutoName):
PRIVATE = auto()
PUBLIC = auto()
AUTHENTICATED = auto()
DEFAULT = auto()
TRIAL = auto()

Expand All @@ -35,6 +36,7 @@ class DatasetIn(DatasetBase):
class DatasetPatch(BaseModel):
name: Optional[str]
description: Optional[str]
status: Optional[str]


class DatasetDB(Document, DatasetBase):
Expand Down Expand Up @@ -63,6 +65,7 @@ class DatasetDBViewList(View, DatasetBase):
modified: datetime = Field(default_factory=datetime.utcnow)
auth: List[AuthorizationDB]
thumbnail_id: Optional[PydanticObjectId] = None
status: Optional[str]

class Settings:
source = DatasetDB
Expand Down
22 changes: 18 additions & 4 deletions backend/app/routers/authorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from bson import ObjectId
from fastapi import APIRouter, Depends
from fastapi.exceptions import HTTPException

from app.dependencies import get_elasticsearchclient
from app.deps.authorization_deps import (
Authorization,
Expand All @@ -25,6 +24,7 @@
DatasetRoles,
DatasetDB,
DatasetOut,
DatasetStatus,
)
from app.models.groups import GroupDB
from app.models.pyobjectid import PyObjectId
Expand Down Expand Up @@ -80,9 +80,23 @@ async def get_dataset_role(
),
)
) is None:
raise HTTPException(
status_code=404, detail=f"No authorization found for dataset: {dataset_id}"
)
if (
current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
) is not None:
if current_dataset.status == DatasetStatus.AUTHENTICATED.name:
public_authorization_in = {
"dataset_id": PydanticObjectId(dataset_id),
"role": RoleType.VIEWER,
}
authorization = AuthorizationDB(
**public_authorization_in, creator=current_dataset.creator.email
)
return authorization.dict()
else:
raise HTTPException(
status_code=404,
detail=f"No authorization found for dataset: {dataset_id}",
)
else:
return auth_db.dict()

Expand Down
Loading