clowder-framework · lmarini · Nov 17, 2023 · Apr 24, 2023 · Apr 24, 2023 · Apr 24, 2023
@@ -5,7 +5,7 @@
 
 from app.keycloak_auth import get_current_username
 from app.models.authorization import RoleType, AuthorizationDB
-from app.models.datasets import DatasetDB
+from app.models.datasets import DatasetDB, DatasetStatus
 from app.models.files import FileOut, FileDB
 from app.models.groups import GroupOut, GroupDB
 from app.models.metadata import MetadataDB
@@ -40,6 +40,24 @@ async def get_role_by_file(
                 AuthorizationDB.user_ids == current_user,
             ),
         )
+        if authorization is None:
+            if (
+                dataset := await DatasetDB.get(PydanticObjectId(file.dataset_id))
+            ) is not None:
+                if dataset.status == DatasetStatus.AUTHENTICATED.name:
+                    auth_dict = {
+                        "creator": dataset.author.email,
+                        "dataset_id": file.dataset_id,
+                        "user_ids": [current_user],
+                        "role": RoleType.VIEWER,
+                    }
+                    authenticated_auth = AuthorizationDB(**auth_dict)
+                    return authenticated_auth
+                else:
+                    raise HTTPException(
+                        status_code=403,
+                        detail=f"User `{current_user} does not have role on file {file_id}",
+                    )
         return authorization.role
     raise HTTPException(status_code=404, detail=f"File {file_id} not found")
 
@@ -96,6 +114,28 @@ async def get_role_by_group(
     raise HTTPException(status_code=404, detail=f"Group {group_id} not found")
 
 
+async def is_public_dataset(
+    dataset_id: str,
+) -> bool:
+    """Checks if a dataset is public."""
+    if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
+        if dataset_out.status == DatasetStatus.PUBLIC:
+            return True
+    else:
+        return False
+
+
+async def is_authenticated_dataset(
+    dataset_id: str,
+) -> bool:
+    """Checks if a dataset is authenticated."""
+    if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
+        if dataset_out.status == DatasetStatus.AUTHENTICATED:
+            return True
+    else:
+        return False
+
+
 class Authorization:
     """We use class dependency so that we can provide the `permission` parameter to the dependency.
     For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
@@ -125,10 +165,24 @@ async def __call__(
                     detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
                 )
         else:
-            raise HTTPException(
-                status_code=403,
-                detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
-            )
+            if (
+                current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
+            ) is not None:
+                if (
+                    current_dataset.status == DatasetStatus.AUTHENTICATED.name
+                    and self.role == "viewer"
+                ):
+                    return True
+                else:
+                    raise HTTPException(
+                        status_code=403,
+                        detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
+                    )
+            else:
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"The dataset {dataset_id} is not found",
+                )
 
 
 class FileAuthorization:
@@ -251,6 +305,52 @@ async def __call__(
         raise HTTPException(status_code=404, detail=f"Group {group_id} not found")
 
 
+class CheckStatus:
+    """We use class dependency so that we can provide the `permission` parameter to the dependency.
+    For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
+
+    def __init__(self, status: str):
+        self.status = status
+
+    async def __call__(
+        self,
+        dataset_id: str,
+    ):
+        if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
+            if dataset.status == self.status:
+                return True
+            else:
+                return False
+        else:
+            return False
+
+
+class CheckFileStatus:
+    """We use class dependency so that we can provide the `permission` parameter to the dependency.
+    For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
+
+    def __init__(self, status: str):
+        self.status = status
+
+    async def __call__(
+        self,
+        file_id: str,
+    ):
+        if (file_out := await FileDB.get(PydanticObjectId(file_id))) is not None:
+            dataset_id = file_out.dataset_id
+            if (
+                dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
+            ) is not None:
+                if dataset.status == self.status:
+                    return True
+                else:
+                    return False
+            else:
+                return False
+        else:
+            return False
+
+
 def access(user_role: RoleType, role_required: RoleType) -> bool:
     """Enforce implied role hierarchy OWNER > EDITOR > UPLOADER > VIEWER"""
     if user_role == RoleType.OWNER:

@@ -0,0 +1,128 @@
+import logging
+import pika
+import json
+from packaging import version
+from pymongo import MongoClient
+
+from app.config import settings
+from app.models.search import SearchCriteria
+from app.routers.feeds import FeedIn, FeedListener, FeedOut, FeedDB, associate_listener
+from app.models.listeners import EventListenerDB, EventListenerOut, ExtractorInfo
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def callback(ch, method, properties, body):
+    """This method receives messages from RabbitMQ and processes them.
+    the extractor info is parsed from the message and if the extractor is new
+    or is a later version, the db is updated.
+    """
+    msg = json.loads(body.decode("utf-8"))
+
+    extractor_info = msg["extractor_info"]
+    extractor_name = extractor_info["name"]
+    extractor_db = EventListenerDB(
+        **extractor_info, properties=ExtractorInfo(**extractor_info)
+    )
+
+    mongo_client = MongoClient(settings.MONGODB_URL)
+    db = mongo_client[settings.MONGO_DATABASE]
+
+    # check to see if extractor alredy exists
+    existing_extractor = db["listeners"].find_one({"name": msg["queue"]})
+    if existing_extractor is not None:
+        # Update existing listener
+        existing_version = existing_extractor["version"]
+        new_version = extractor_db.version
+        if version.parse(new_version) > version.parse(existing_version):
+            # if this is a new version, add it to the database
+            new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
+            found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
+            # TODO - for now we are not deleting an older version of the extractor, just adding a new one
+            # removed = db["listeners"].delete_one({"_id": existing_extractor["_id"]})
+            extractor_out = EventListenerOut.from_mongo(found)
+            logger.info(
+                "%s updated from %s to %s"
+                % (extractor_name, existing_version, new_version)
+            )
+            return extractor_out
+    else:
+        # Register new listener
+        new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
+        found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
+        extractor_out = EventListenerOut.from_mongo(found)
+        logger.info("New extractor registered: " + extractor_name)
+
+        # Assign MIME-based listener if needed
+        if extractor_out.properties and extractor_out.properties.process:
+            process = extractor_out.properties.process
+            if "file" in process:
+                # Create a MIME-based feed for this v1 extractor
+                criteria_list = []
+                for mime in process["file"]:
+                    main_type = mime.split("/")[0] if mime.find("/") > -1 else mime
+                    sub_type = mime.split("/")[1] if mime.find("/") > -1 else None
+                    if sub_type:
+                        if sub_type == "*":
+                            # If a wildcard, just match on main type
+                            criteria_list.append(
+                                SearchCriteria(
+                                    field="content_type_main", value=main_type
+                                )
+                            )
+                        else:
+                            # Otherwise match the whole string
+                            criteria_list.append(
+                                SearchCriteria(field="content_type", value=mime)
+                            )
+                    else:
+                        criteria_list.append(
+                            SearchCriteria(field="content_type", value=mime)
+                        )
+
+                # TODO: Who should the author be for an auto-generated feed? Currently None.
+                new_feed = FeedDB(
+                    name=extractor_name,
+                    search={
+                        "index_name": "file",
+                        "criteria": criteria_list,
+                        "mode": "or",
+                    },
+                    listeners=[
+                        FeedListener(listener_id=extractor_out.id, automatic=True)
+                    ],
+                )
+                db["feeds"].insert_one(new_feed.to_mongo())
+
+        return extractor_out
+
+
+def listen_for_heartbeats():
+    """
+
+    this method runs continuously listening for extractor heartbeats send over rabbitmq
+
+    """
+    credentials = pika.PlainCredentials(settings.RABBITMQ_USER, settings.RABBITMQ_PASS)
+    parameters = pika.ConnectionParameters(
+        settings.RABBITMQ_HOST, 5672, "/", credentials
+    )
+    connection = pika.BlockingConnection(parameters)
+    channel = connection.channel()
+
+    channel.exchange_declare(
+        exchange=settings.HEARTBEAT_EXCHANGE, exchange_type="fanout", durable=True
+    )
+    result = channel.queue_declare(queue="", exclusive=True)
+    queue_name = result.method.queue
+    channel.queue_bind(exchange=settings.HEARTBEAT_EXCHANGE, queue=queue_name)
+
+    logger.info(" [*] Waiting for heartbeats. To exit press CTRL+C")
+    channel.basic_consume(queue=queue_name, on_message_callback=callback, auto_ack=True)
+    channel.start_consuming()
+
+
+if __name__ == "__main__":
+    listen_for_heartbeats()
@@ -122,10 +122,7 @@
     dependencies=[Depends(get_current_username)],
 )
 api_router.include_router(
-    metadata_datasets.router,
-    prefix="/datasets",
-    tags=["metadata"],
-    dependencies=[Depends(get_current_username)],
+    metadata_datasets.router, prefix="/datasets", tags=["metadata"]
 )
 api_router.include_router(
     folders.router,

@@ -19,6 +19,7 @@ def _generate_next_value_(name, start, count, last_values):
 class DatasetStatus(AutoName):
     PRIVATE = auto()
     PUBLIC = auto()
+    AUTHENTICATED = auto()
     DEFAULT = auto()
     TRIAL = auto()
 
@@ -35,6 +36,7 @@ class DatasetIn(DatasetBase):
 class DatasetPatch(BaseModel):
     name: Optional[str]
     description: Optional[str]
+    status: Optional[str]
 
 
 class DatasetDB(Document, DatasetBase):
@@ -63,6 +65,7 @@ class DatasetDBViewList(View, DatasetBase):
     modified: datetime = Field(default_factory=datetime.utcnow)
     auth: List[AuthorizationDB]
     thumbnail_id: Optional[PydanticObjectId] = None
+    status: Optional[str]
 
     class Settings:
         source = DatasetDB

@@ -3,7 +3,6 @@
 from bson import ObjectId
 from fastapi import APIRouter, Depends
 from fastapi.exceptions import HTTPException
-
 from app.dependencies import get_elasticsearchclient
 from app.deps.authorization_deps import (
     Authorization,
@@ -25,6 +24,7 @@
     DatasetRoles,
     DatasetDB,
     DatasetOut,
+    DatasetStatus,
 )
 from app.models.groups import GroupDB
 from app.models.pyobjectid import PyObjectId
@@ -80,9 +80,23 @@ async def get_dataset_role(
             ),
         )
     ) is None:
-        raise HTTPException(
-            status_code=404, detail=f"No authorization found for dataset: {dataset_id}"
-        )
+        if (
+            current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
+        ) is not None:
+            if current_dataset.status == DatasetStatus.AUTHENTICATED.name:
+                public_authorization_in = {
+                    "dataset_id": PydanticObjectId(dataset_id),
+                    "role": RoleType.VIEWER,
+                }
+                authorization = AuthorizationDB(
+                    **public_authorization_in, creator=current_dataset.creator.email
+                )
+                return authorization.dict()
+            else:
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"No authorization found for dataset: {dataset_id}",
+                )
     else:
         return auth_db.dict()