-
Notifications
You must be signed in to change notification settings - Fork 116
Refactor fulltext search #2358
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Refactor fulltext search #2358
Changes from all commits
Commits
Show all changes
38 commits
Select commit
Hold shift + click to select a range
126b5e7
update
shengquan-ni d671646
update
shengquan-ni eeda580
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 34d65bc
update
shengquan-ni 9495239
update
shengquan-ni ee3beb8
Update DashboardResource.scala
shengquan-ni 9dcb28c
refactor: use union query
shengquan-ni d8578bd
Update FileSearchQueryBuilder.scala
shengquan-ni 712bf25
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 7bc812d
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni ad8ae30
reformat
shengquan-ni 8c57a31
Update DashboardResource.scala
shengquan-ni f9dbe68
renaming
shengquan-ni 5ea3040
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 38ecd86
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 3d3ad5c
fixing tests
shengquan-ni e822204
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni d61cca4
fix test
shengquan-ni 5c377b5
Merge branch 'shengquan-fix-fulltext-search' of https://github.com/Te…
shengquan-ni 6d58a9f
fix test
shengquan-ni 72963bb
fix
shengquan-ni e55f176
Update UnifiedResourceSchema.scala
shengquan-ni 70c1122
update
shengquan-ni 182d0b6
fix
shengquan-ni 8d6b5e7
Update SearchQueryBuilder.scala
shengquan-ni cd7b1d4
Update WorkflowWorker.scala
shengquan-ni 37cbb58
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 3caf919
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni 47b58e6
partial update
shengquan-ni 240dad2
fix
shengquan-ni 1519dbe
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni e981601
update
shengquan-ni 52fdaa3
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni ce5fe39
Update UnifiedResourceSchema.scala
shengquan-ni b3f8daf
Update UnifiedResourceSchema.scala
shengquan-ni 43c41f9
Update FulltextSearchQueryUtils.scala
shengquan-ni ae5f3ec
Merge branch 'master' into shengquan-fix-fulltext-search
shengquan-ni d4472e0
update
shengquan-ni File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
804 changes: 118 additions & 686 deletions
804
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/DashboardResource.scala
Large diffs are not rendered by default.
Oops, something went wrong.
87 changes: 87 additions & 0 deletions
87
...ber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/FileSearchQueryBuilder.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| package edu.uci.ics.texera.web.resource.dashboard | ||
| import edu.uci.ics.texera.web.model.jooq.generated.Tables.{FILE, USER, USER_FILE_ACCESS} | ||
| import edu.uci.ics.texera.web.model.jooq.generated.enums.UserFileAccessPrivilege | ||
| import edu.uci.ics.texera.web.model.jooq.generated.tables.pojos.File | ||
| import edu.uci.ics.texera.web.resource.dashboard.DashboardResource.DashboardClickableFileEntry | ||
| import edu.uci.ics.texera.web.resource.dashboard.FulltextSearchQueryUtils.{ | ||
| getFullTextSearchFilter, | ||
| getSubstringSearchFilter, | ||
| getContainsFilter, | ||
| getDateFilter | ||
| } | ||
| import edu.uci.ics.texera.web.resource.dashboard.user.file.UserFileResource.DashboardFile | ||
| import org.jooq.{Condition, GroupField, Record, TableLike} | ||
| import org.jooq.impl.DSL | ||
| import org.jooq.types.UInteger | ||
|
|
||
| import scala.jdk.CollectionConverters.CollectionHasAsScala | ||
| object FileSearchQueryBuilder extends SearchQueryBuilder { | ||
|
|
||
| override val mappedResourceSchema: UnifiedResourceSchema = UnifiedResourceSchema( | ||
| resourceType = DSL.inline(SearchQueryBuilder.FILE_RESOURCE_TYPE), | ||
| name = FILE.NAME, | ||
| description = FILE.DESCRIPTION, | ||
| creationTime = FILE.UPLOAD_TIME, | ||
| fid = FILE.FID, | ||
| ownerId = FILE.OWNER_UID, | ||
| lastModifiedTime = FILE.UPLOAD_TIME, | ||
| filePath = FILE.PATH, | ||
| fileSize = FILE.SIZE, | ||
| userEmail = USER.EMAIL, | ||
| fileUserAccess = USER_FILE_ACCESS.PRIVILEGE | ||
| ) | ||
|
|
||
| override protected def constructFromClause( | ||
| uid: UInteger, | ||
| params: DashboardResource.SearchQueryParams | ||
| ): TableLike[_] = { | ||
| FILE | ||
| .leftJoin(USER_FILE_ACCESS) | ||
| .on(USER_FILE_ACCESS.FID.eq(FILE.FID)) | ||
| .leftJoin(USER) | ||
| .on(FILE.OWNER_UID.eq(USER.UID)) | ||
| .where(USER_FILE_ACCESS.UID.eq(uid)) | ||
| } | ||
|
|
||
| override protected def constructWhereClause( | ||
| uid: UInteger, | ||
| params: DashboardResource.SearchQueryParams | ||
| ): Condition = { | ||
| val splitKeywords = params.keywords.asScala | ||
| .flatMap(_.split("[+\\-()<>~*@\"]")) | ||
| .filter(_.nonEmpty) | ||
| .toSeq | ||
|
|
||
| getDateFilter( | ||
| params.creationStartDate, | ||
| params.creationEndDate, | ||
| FILE.UPLOAD_TIME | ||
| ) | ||
| .and(getContainsFilter(params.owners, USER.EMAIL)) | ||
| .and( | ||
| getFullTextSearchFilter( | ||
| splitKeywords, | ||
| List(FILE.NAME, FILE.DESCRIPTION) | ||
| ).or(getSubstringSearchFilter(splitKeywords, List(FILE.NAME, FILE.DESCRIPTION))) | ||
| ) | ||
| } | ||
|
|
||
| override protected def getGroupByFields: Seq[GroupField] = Seq.empty | ||
|
|
||
| override def toEntryImpl( | ||
| uid: UInteger, | ||
| record: Record | ||
| ): DashboardResource.DashboardClickableFileEntry = { | ||
| val df = DashboardFile( | ||
| record.into(USER).getEmail, | ||
| record | ||
| .get( | ||
| USER_FILE_ACCESS.PRIVILEGE, | ||
| classOf[UserFileAccessPrivilege] | ||
| ) | ||
| .toString, | ||
| record.into(FILE).into(classOf[File]) | ||
| ) | ||
| DashboardClickableFileEntry(SearchQueryBuilder.FILE_RESOURCE_TYPE, file = Some(df)) | ||
| } | ||
| } |
127 changes: 127 additions & 0 deletions
127
...r/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/FulltextSearchQueryUtils.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| package edu.uci.ics.texera.web.resource.dashboard | ||
|
|
||
| import org.jooq.{Condition, Field} | ||
| import org.jooq.impl.DSL.{condition, noCondition} | ||
|
|
||
| import java.sql.Timestamp | ||
| import java.text.{ParseException, SimpleDateFormat} | ||
| import java.util.concurrent.TimeUnit | ||
| import scala.jdk.CollectionConverters.CollectionHasAsScala | ||
|
|
||
| object FulltextSearchQueryUtils { | ||
|
|
||
| def getFullTextSearchFilter( | ||
| keywords: Seq[String], | ||
| fields: List[Field[String]] | ||
| ): Condition = { | ||
| if (fields.isEmpty) return noCondition() | ||
| val trimmedKeywords = keywords.filter(_.nonEmpty).map(_.trim) | ||
| val fullFieldNames = fields.map(_.toString.replace("\"", "")) | ||
| val indexedCompoundFields = fullFieldNames.mkString(",") | ||
| trimmedKeywords.foldLeft(noCondition()) { (acc, key) => | ||
| val words = key.split("\\s+") | ||
| acc.and( | ||
| condition( | ||
| s"MATCH($indexedCompoundFields) AGAINST('${words.mkString("+", " +", "")}' IN BOOLEAN MODE)", | ||
| key | ||
| ) | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| def getSubstringSearchFilter( | ||
| keywords: Seq[String], | ||
| fields: List[Field[String]] | ||
| ): Condition = { | ||
| if (fields.isEmpty) return noCondition() | ||
| val trimmedKeywords = keywords.filter(_.nonEmpty).map(_.trim) | ||
| val fullFieldNames = fields.map(_.toString.replace("\"", "")) | ||
| fullFieldNames.foldLeft(noCondition()) { (acc, fieldName) => | ||
| acc.or(trimmedKeywords.foldLeft(noCondition()) { (accInner, key) => | ||
| accInner.and(s"$fieldName LIKE '%$key%'") | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Generates a filter condition for querying based on whether a specified field contains any of the given values. | ||
| * | ||
| * This method converts a Java list of values into a Scala set to ensure uniqueness, and then iterates over each unique value, | ||
| * constructing a filter condition that checks if the specified field equals any of those values. The resulting condition | ||
| * is a disjunction (`OR`) of all these equality conditions, which can be used in database queries to find records where | ||
| * the field matches any of the provided values. | ||
| * | ||
| * @tparam T The type of the elements in the `values` list and the type of the field being compared. | ||
| * @param values A Java list of values to be checked against the field. The list is converted to a Scala set to remove duplicates. | ||
| * @param field The field to be checked for containing any of the values in the `values` list. This is typically a field in a database table. | ||
| * @return A `Condition` that represents the disjunction of equality checks between the field and each unique value in the input list. | ||
| * This condition can be used as part of a query to select records where the field matches any of the specified values. | ||
| */ | ||
| def getContainsFilter[T](values: java.util.List[T], field: Field[T]): Condition = { | ||
| val valueSet = values.asScala.toSet | ||
| var filterForOneField: Condition = noCondition() | ||
| for (value <- valueSet) { | ||
| filterForOneField = filterForOneField.or(field.eq(value)) | ||
| } | ||
| filterForOneField | ||
| } | ||
|
|
||
| /** | ||
| * Returns a date filter condition for the specified date range and date type. | ||
| * | ||
| * @param startDate A string representing the start date of the filter range in "yyyy-MM-dd" format. | ||
| * If empty, the default value "1970-01-01" will be used. | ||
| * @param endDate A string representing the end date of the filter range in "yyyy-MM-dd" format. | ||
| * If empty, the default value "9999-12-31" will be used. | ||
| * @param fieldToFilterOn the field for applying the start and end dates. | ||
| * @return A Condition object that can be used to filter workflows based on the date range and type. | ||
| */ | ||
| @throws[ParseException] | ||
| def getDateFilter( | ||
| startDate: String, | ||
| endDate: String, | ||
| fieldToFilterOn: Field[Timestamp] | ||
| ): Condition = { | ||
| if (startDate.nonEmpty || endDate.nonEmpty) { | ||
| val start = if (startDate.nonEmpty) startDate else "1970-01-01" | ||
| val end = if (endDate.nonEmpty) endDate else "9999-12-31" | ||
| val dateFormat = new SimpleDateFormat("yyyy-MM-dd") | ||
|
|
||
| val startTimestamp = new Timestamp(dateFormat.parse(start).getTime) | ||
| val endTimestamp = | ||
| if (end == "9999-12-31") { | ||
| new Timestamp(dateFormat.parse(end).getTime) | ||
| } else { | ||
| new Timestamp( | ||
| dateFormat.parse(end).getTime + TimeUnit.DAYS.toMillis(1) - 1 | ||
| ) | ||
| } | ||
| fieldToFilterOn.between(startTimestamp, endTimestamp) | ||
| } else { | ||
| noCondition() | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Helper function to retrieve the operators filter. | ||
| * Applies a filter based on the specified operators. | ||
| * | ||
| * @param operators The list of operators to filter by. | ||
| * @return The operators filter. | ||
| */ | ||
| def getOperatorsFilter( | ||
| operators: java.util.List[String], | ||
| field: Field[String] | ||
| ): Condition = { | ||
| val operatorSet = operators.asScala.toSet | ||
| var fieldFilter = noCondition() | ||
| for (operator <- operatorSet) { | ||
| val quotes = "\"" | ||
| val searchKey = | ||
| "%" + quotes + "operatorType" + quotes + ":" + quotes + operator + quotes + "%" | ||
| fieldFilter = fieldFilter.or(field.likeIgnoreCase(searchKey)) | ||
| } | ||
| fieldFilter | ||
| } | ||
|
|
||
| } | ||
74 changes: 74 additions & 0 deletions
74
.../src/main/scala/edu/uci/ics/texera/web/resource/dashboard/ProjectSearchQueryBuilder.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| package edu.uci.ics.texera.web.resource.dashboard | ||
| import edu.uci.ics.texera.web.model.jooq.generated.Tables.{PROJECT, PROJECT_USER_ACCESS} | ||
| import edu.uci.ics.texera.web.model.jooq.generated.tables.pojos.Project | ||
| import edu.uci.ics.texera.web.resource.dashboard.DashboardResource.DashboardClickableFileEntry | ||
| import edu.uci.ics.texera.web.resource.dashboard.FulltextSearchQueryUtils.{ | ||
| getFullTextSearchFilter, | ||
| getSubstringSearchFilter, | ||
| getContainsFilter, | ||
| getDateFilter | ||
| } | ||
| import org.jooq.{Condition, GroupField, Record, TableLike} | ||
| import org.jooq.impl.DSL | ||
| import org.jooq.types.UInteger | ||
|
|
||
| import scala.jdk.CollectionConverters.CollectionHasAsScala | ||
| object ProjectSearchQueryBuilder extends SearchQueryBuilder { | ||
|
|
||
| override val mappedResourceSchema: UnifiedResourceSchema = UnifiedResourceSchema( | ||
| resourceType = DSL.inline(SearchQueryBuilder.PROJECT_RESOURCE_TYPE), | ||
| name = PROJECT.NAME, | ||
| description = PROJECT.DESCRIPTION, | ||
| creationTime = PROJECT.CREATION_TIME, | ||
| lastModifiedTime = PROJECT.CREATION_TIME, | ||
| pid = PROJECT.PID, | ||
| ownerId = PROJECT.OWNER_ID, | ||
| projectColor = PROJECT.COLOR | ||
| ) | ||
|
|
||
| override protected def constructFromClause( | ||
| uid: UInteger, | ||
| params: DashboardResource.SearchQueryParams | ||
| ): TableLike[_] = { | ||
| PROJECT | ||
| .leftJoin(PROJECT_USER_ACCESS) | ||
| .on(PROJECT_USER_ACCESS.PID.eq(PROJECT.PID)) | ||
| .where(PROJECT_USER_ACCESS.UID.eq(uid)) | ||
| } | ||
|
|
||
| override protected def constructWhereClause( | ||
| uid: UInteger, | ||
| params: DashboardResource.SearchQueryParams | ||
| ): Condition = { | ||
| val splitKeywords = params.keywords.asScala | ||
| .flatMap(_.split("[+\\-()<>~*@\"]")) | ||
| .filter(_.nonEmpty) | ||
| .toSeq | ||
|
|
||
| getDateFilter( | ||
| params.creationStartDate, | ||
| params.creationEndDate, | ||
| PROJECT.CREATION_TIME | ||
| ) | ||
| .and(getContainsFilter(params.projectIds, PROJECT.PID)) | ||
| .and( | ||
| getFullTextSearchFilter(splitKeywords, List(PROJECT.NAME, PROJECT.DESCRIPTION)) | ||
| .or( | ||
| getSubstringSearchFilter( | ||
| splitKeywords, | ||
| List(PROJECT.NAME, PROJECT.DESCRIPTION) | ||
| ) | ||
| ) | ||
| ) | ||
| } | ||
|
|
||
| override protected def getGroupByFields: Seq[GroupField] = Seq.empty | ||
|
|
||
| override def toEntryImpl( | ||
| uid: UInteger, | ||
| record: Record | ||
| ): DashboardResource.DashboardClickableFileEntry = { | ||
| val dp = record.into(PROJECT).into(classOf[Project]) | ||
| DashboardClickableFileEntry(SearchQueryBuilder.PROJECT_RESOURCE_TYPE, project = Some(dp)) | ||
| } | ||
| } |
54 changes: 54 additions & 0 deletions
54
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/SearchQueryBuilder.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| package edu.uci.ics.texera.web.resource.dashboard | ||
|
|
||
| import edu.uci.ics.texera.web.SqlServer | ||
| import edu.uci.ics.texera.web.resource.dashboard.DashboardResource.{ | ||
| DashboardClickableFileEntry, | ||
| SearchQueryParams | ||
| } | ||
| import edu.uci.ics.texera.web.resource.dashboard.SearchQueryBuilder.context | ||
| import org.jooq.types.UInteger | ||
| import org.jooq.{Condition, GroupField, Record, SelectGroupByStep, SelectHavingStep, TableLike} | ||
| object SearchQueryBuilder { | ||
|
|
||
| final lazy val context = SqlServer.createDSLContext() | ||
| val FILE_RESOURCE_TYPE = "file" | ||
| val WORKFLOW_RESOURCE_TYPE = "workflow" | ||
| val PROJECT_RESOURCE_TYPE = "project" | ||
| val ALL_RESOURCE_TYPE = "" | ||
| } | ||
|
|
||
| trait SearchQueryBuilder { | ||
|
|
||
| protected val mappedResourceSchema: UnifiedResourceSchema | ||
|
|
||
| protected def constructFromClause(uid: UInteger, params: SearchQueryParams): TableLike[_] | ||
|
|
||
| protected def constructWhereClause(uid: UInteger, params: SearchQueryParams): Condition | ||
|
|
||
| protected def getGroupByFields: Seq[GroupField] = Seq.empty | ||
|
|
||
| protected def toEntryImpl(uid: UInteger, record: Record): DashboardClickableFileEntry | ||
|
|
||
| private def translateRecord(record: Record): Record = mappedResourceSchema.translateRecord(record) | ||
|
|
||
| def toEntry(uid: UInteger, record: Record): DashboardClickableFileEntry = { | ||
| toEntryImpl(uid, translateRecord(record)) | ||
| } | ||
|
|
||
| final def constructQuery( | ||
| uid: UInteger, | ||
| params: SearchQueryParams | ||
| ): SelectHavingStep[Record] = { | ||
| val query: SelectGroupByStep[Record] = context | ||
| .select(mappedResourceSchema.allFields: _*) | ||
| .from(constructFromClause(uid, params)) | ||
| .where(constructWhereClause(uid, params)) | ||
| val groupByFields = getGroupByFields | ||
| if (groupByFields.nonEmpty) { | ||
| query.groupBy(groupByFields: _*) | ||
| } else { | ||
| query | ||
| } | ||
| } | ||
|
|
||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.