Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 0 additions & 74 deletions backend/app/heartbeat_listener_sync.py

This file was deleted.

2 changes: 1 addition & 1 deletion backend/app/models/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class FeedIn(JobFeed):


class FeedDB(JobFeed, MongoModel):
author: UserOut
author: Optional[UserOut] = None
updated: datetime = Field(default_factory=datetime.utcnow)


Expand Down
17 changes: 14 additions & 3 deletions backend/app/models/files.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
from datetime import datetime
from typing import Optional

from pydantic import Field
from pydantic import Field, BaseModel

from app.models.mongomodel import MongoModel
from app.models.pyobjectid import PyObjectId
from app.models.users import UserOut


class FileContentType(BaseModel):
"""This model describes the content type of a file uploaded to Clowder. A typical example is "text/plain" for .txt.
In Clowder v1 extractors, "text/*" syntax is acceptable for wildcard matches. To support this, the content type is
split into main ("text") and secondary ("plain") parts so the dynamic matching with * can still be done.

"""

content_type: str = "N/A"
main_type: str = "N/A"


class FileVersion(MongoModel):
version_id: str
version_num: int = 1
file_id: PyObjectId
creator: UserOut
bytes: int = 0
content_type: str = "N/A"
content_type: FileContentType = FileContentType()
created: datetime = Field(default_factory=datetime.utcnow)


Expand All @@ -36,7 +47,7 @@ class FileDB(FileBase):
views: int = 0
downloads: int = 0
bytes: int = 0
content_type: str = "N/A"
content_type: FileContentType = FileContentType()


class FileOut(FileDB):
Expand Down
11 changes: 0 additions & 11 deletions backend/app/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,6 @@
from typing import Optional, List


# TODO: may eventually be split by index (resource type)
class SearchIndexContents(BaseModel):
"""This describes what is indexed in Elasticsearch for a given resource."""

id: str
name: str
creator: str # currently just email
created: datetime
download: int


class SearchCriteria(BaseModel):
field: str
operator: str = "=="
Expand Down
44 changes: 44 additions & 0 deletions backend/app/rabbitmq/heartbeat_listener_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pymongo import MongoClient

from app.config import settings
from app.models.search import SearchCriteria
from app.routers.feeds import FeedIn, FeedListener, FeedOut, FeedDB, associate_listener
from app.models.listeners import EventListenerDB, EventListenerOut, ExtractorInfo

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -51,6 +53,48 @@ def callback(ch, method, properties, body):
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
extractor_out = EventListenerOut.from_mongo(found)
logger.info("New extractor registered: " + extractor_name)

# Assign MIME-based listener if needed
if extractor_out.properties and extractor_out.properties.process:
process = extractor_out.properties.process
if "file" in process:
# Create a MIME-based feed for this v1 extractor
criteria_list = []
for mime in process["file"]:
main_type = mime.split("/")[0] if mime.find("/") > -1 else mime
sub_type = mime.split("/")[1] if mime.find("/") > -1 else None
if sub_type:
if sub_type == "*":
# If a wildcard, just match on main type
criteria_list.append(
SearchCriteria(
field="content_type_main", value=main_type
)
)
else:
# Otherwise match the whole string
criteria_list.append(
SearchCriteria(field="content_type", value=mime)
)
else:
criteria_list.append(
SearchCriteria(field="content_type", value=mime)
)

# TODO: Who should the author be for an auto-generated feed? Currently None.
new_feed = FeedDB(
name=extractor_name,
search={
"index_name": "file",
"criteria": criteria_list,
"mode": "or",
},
listeners=[
FeedListener(listener_id=extractor_out.id, automatic=True)
],
)
db["feeds"].insert_one(new_feed.to_mongo())

return extractor_out


Expand Down
1 change: 0 additions & 1 deletion backend/app/routers/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
FeedDB,
FeedOut,
)
from app.models.search import SearchIndexContents
from app.search.connect import check_search_result
from app.rabbitmq.listeners import submit_file_job

Expand Down
12 changes: 7 additions & 5 deletions backend/app/routers/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
update_record,
delete_document_by_query,
)
from app.models.files import FileIn, FileOut, FileVersion, FileDB
from app.models.files import FileIn, FileOut, FileVersion, FileContentType, FileDB
from app.models.users import UserOut
from app.routers.feeds import check_feed_listeners
from app.keycloak_auth import get_user, get_current_user, get_token
Expand Down Expand Up @@ -70,6 +70,8 @@ async def add_file_entry(
if content_type is None:
content_type = mimetypes.guess_type(file_db.name)
content_type = content_type[0] if len(content_type) > 1 else content_type
type_main = content_type.split("/")[0] if type(content_type) is str else "N/A"
content_type_obj = FileContentType(content_type=content_type, main_type=type_main)

# Use unique ID as key for Minio and get initial version ID
response = fs.put_object(
Expand All @@ -87,7 +89,7 @@ async def add_file_entry(
file_db.version_id = version_id
file_db.version_num = 1
file_db.bytes = bytes
file_db.content_type = content_type if type(content_type) is str else "N/A"
file_db.content_type = content_type_obj
await db["files"].replace_one({"_id": ObjectId(new_file_id)}, file_db.to_mongo())
file_out = FileOut(**file_db.dict())

Expand All @@ -97,7 +99,7 @@ async def add_file_entry(
file_id=new_file_id,
creator=user,
bytes=bytes,
content_type=file_db.content_type,
content_type=content_type_obj,
)
await db["file_versions"].insert_one(new_version.to_mongo())

Expand All @@ -110,7 +112,8 @@ async def add_file_entry(
"dataset_id": str(file_db.dataset_id),
"folder_id": str(file_db.folder_id),
"bytes": file_db.bytes,
"content_type": file_db.content_type,
"content_type": content_type_obj.content_type,
"content_type_main": content_type_obj.main_type,
}
insert_record(es, "file", doc, file_db.id)

Expand Down Expand Up @@ -210,7 +213,6 @@ async def update_file(
{"resource.resource_id": ObjectId(updated_file.id)}
)
) is not None:
print("updating metadata")
doc = {
"doc": {
"name": updated_file.name,
Expand Down