Skip to content

Commit

Permalink
MODINV-1114 - Extend matching records endpoint to support multiple ma…
Browse files Browse the repository at this point in the history
…rc-bib match results processing (#670)

* Implemented support of multiple criterias for record matching endpoint

* Removed no longer needed fallback query logic
  • Loading branch information
RuslanLavrov authored Feb 16, 2025
1 parent 7a525da commit 59b04d5
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 198 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* [MODDATAIMP-1133](https://folio-org.atlassian.net/browse/MODDATAIMP-1133) Second update of the same MARC authority / MARC holdings record completes with errors
* [MODSOURCE-832](https://folio-org.atlassian.net/browse/MODSOURCE-832) Add consistent handling and updating for same Marc Bib records linked to Authority by two or more consumers
* [MODSOURCE-860](https://folio-org.atlassian.net/browse/MODSOURCE-860) "Numerics only" option of existing record section does not work during MARC-BIB to MARC-BIB matching
* [MODINV-1114](https://folio-org.atlassian.net/browse/MODINV-1114) Extend matching records endpoint to support multiple marc-bib match results processing
* [MODSOURCE-863](https://folio-org.atlassian.net/browse/MODSOURCE-863) Add index to speed up the querying of a composite record

## 2024-10-28 5.9.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.function.Function;

import net.sf.jsqlparser.JSQLParserException;
import org.folio.dao.util.CompositeMatchField;
import org.folio.dao.util.IdType;
import org.folio.dao.util.MatchField;
import org.folio.dao.util.RecordType;
Expand Down Expand Up @@ -79,10 +80,12 @@ public interface RecordDao {
Future<StrippedParsedRecordCollection> getStrippedParsedRecords(List<String> externalIds, IdType idType, RecordType recordType, Boolean includeDeleted, String tenantId);

/**
* Searches for {@link Record} by {@link MatchField} with offset and limit
* Searches for {@link Record} by {@link MatchField} with offset and limit
*
* @param matchField Marc field that needs to be matched
* @param comparisonPartType describes type of comparison part
* @param matchedRecordIds list of records IDs that will be used as additional criteria for filtering records
* that match the specified {@code MatchField} criteria
* @param recordType record type
* @param externalIdRequired specifies whether necessary not to consider records with {@code externalId == null} while searching
* @param offset starting index in a list of results
Expand All @@ -99,7 +102,6 @@ Future<List<Record>> getMatchedRecords(MatchField matchField, Filter.ComparisonP
* and returns {@link RecordsIdentifiersCollection} representing list of pairs of recordId and externalId
*
* @param matchedField describes searching condition
* @param comparisonPartType describes type of comparison part
* @param returnTotalRecords indicates that amount of total records should/shouldn't be calculated
* and populated into {@link RecordsIdentifiersCollection#totalRecords}
* @param typeConnection record type
Expand All @@ -109,9 +111,9 @@ Future<List<Record>> getMatchedRecords(MatchField matchField, Filter.ComparisonP
* @param tenantId tenant id
* @return {@link Future} of {@link RecordsIdentifiersCollection}
*/
Future<RecordsIdentifiersCollection> getMatchedRecordsIdentifiers(MatchField matchedField, Filter.ComparisonPartType comparisonPartType,
boolean returnTotalRecords, TypeConnection typeConnection,
boolean externalIdRequired, int offset, int limit, String tenantId);
Future<RecordsIdentifiersCollection> getMatchedRecordsIdentifiers(CompositeMatchField matchedField, boolean returnTotalRecords,
TypeConnection typeConnection, boolean externalIdRequired,
int offset, int limit, String tenantId);

/**
* Streams {@link Record} by {@link Condition} and ordered by collection of {@link OrderField}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
import static org.folio.dao.util.RawRecordDaoUtil.RAW_RECORD_CONTENT;
import static org.folio.dao.util.RecordDaoUtil.RECORD_NOT_FOUND_TEMPLATE;
import static org.folio.dao.util.RecordDaoUtil.ensureRecordForeignKeys;
import static org.folio.dao.util.RecordDaoUtil.filterRecordByExternalHridValuesWithQualifier;
import static org.folio.dao.util.RecordDaoUtil.filterRecordByMultipleIds;
import static org.folio.dao.util.RecordDaoUtil.filterRecordByExternalIdNonNull;
import static org.folio.dao.util.RecordDaoUtil.filterRecordByState;
import static org.folio.dao.util.RecordDaoUtil.filterRecordByType;
import static org.folio.dao.util.RecordDaoUtil.getExternalHrid;
import static org.folio.dao.util.RecordDaoUtil.getExternalId;
import static org.folio.dao.util.RecordDaoUtil.getExternalIdType;
import static org.folio.dao.util.RecordDaoUtil.getExternalIdsConditionWithQualifier;
import static org.folio.dao.util.SnapshotDaoUtil.SNAPSHOT_NOT_FOUND_TEMPLATE;
import static org.folio.dao.util.SnapshotDaoUtil.SNAPSHOT_NOT_STARTED_MESSAGE_TEMPLATE;
import static org.folio.rest.jaxrs.model.RecordMatchingDto.LogicalOperator.AND;
import static org.folio.rest.jooq.Tables.ERROR_RECORDS_LB;
import static org.folio.rest.jooq.Tables.MARC_RECORDS_LB;
import static org.folio.rest.jooq.Tables.MARC_RECORDS_TRACKING;
Expand Down Expand Up @@ -81,6 +84,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.folio.dao.util.CompositeMatchField;
import org.folio.dao.util.ErrorRecordDaoUtil;
import org.folio.dao.util.IdType;
import org.folio.dao.util.MatchField;
Expand All @@ -107,6 +111,7 @@
import org.folio.rest.jaxrs.model.Record;
import org.folio.rest.jaxrs.model.RecordCollection;
import org.folio.rest.jaxrs.model.RecordIdentifiersDto;
import org.folio.rest.jaxrs.model.RecordMatchingDto;
import org.folio.rest.jaxrs.model.RecordsBatchResponse;
import org.folio.rest.jaxrs.model.RecordsIdentifiersCollection;
import org.folio.rest.jaxrs.model.SourceRecord;
Expand Down Expand Up @@ -220,9 +225,6 @@ public class RecordDaoImpl implements RecordDao {
private final PostgresClientFactory postgresClientFactory;
private final RecordDomainEventPublisher recordDomainEventPublisher;

@org.springframework.beans.factory.annotation.Value("${srs.record.matching.fallback-query.enable:false}")
private boolean enableFallbackQuery;

@Autowired
public RecordDaoImpl(final PostgresClientFactory postgresClientFactory,
final RecordDomainEventPublisher recordDomainEventPublisher) {
Expand Down Expand Up @@ -309,44 +311,10 @@ public Future<List<Record>> getMatchedRecords(MatchField matchedField, Filter.Co
.offset(offset)
.limit(limit > 0 ? limit : DEFAULT_LIMIT_FOR_GET_RECORDS);
}
)).compose(queryResult -> handleMatchedRecordsSearchResult(queryResult, matchedField, comparisonPartType, typeConnection, externalIdRequired, offset, limit, tenantId));
}

private Future<List<Record>> handleMatchedRecordsSearchResult(QueryResult queryResult, MatchField matchedField, Filter.ComparisonPartType comparisonPartType,
TypeConnection typeConnection,
boolean externalIdRequired, int offset, int limit, String tenantId) {
if (enableFallbackQuery && !queryResult.hasResults()) {
return getMatchedRecordsWithoutIndexersVersionUsage(matchedField, comparisonPartType, typeConnection, externalIdRequired, offset, limit, tenantId);
}
return Future.succeededFuture(queryResult.stream().map(res -> asRow(res.unwrap())).map(this::toRecord).toList());
}

public Future<List<Record>> getMatchedRecordsWithoutIndexersVersionUsage(MatchField matchedField, Filter.ComparisonPartType comparisonPartType,
TypeConnection typeConnection, boolean externalIdRequired,
int offset, int limit, String tenantId) {
Name prt = name(typeConnection.getDbType().getTableName());
Table<org.jooq.Record> marcIndexersPartitionTable = table(name(MARC_INDEXERS_PARTITION_PREFIX + matchedField.getTag()));
return getQueryExecutor(tenantId).transaction(txQE -> txQE.query(dsl -> dsl
.select(getAllRecordFields(prt))
.distinctOn(RECORDS_LB.ID)
.from(RECORDS_LB)
.leftJoin(table(prt)).on(RECORDS_LB.ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, prt, name(ID))))
.leftJoin(RAW_RECORDS_LB).on(RECORDS_LB.ID.eq(RAW_RECORDS_LB.ID))
.leftJoin(ERROR_RECORDS_LB).on(RECORDS_LB.ID.eq(ERROR_RECORDS_LB.ID))
.innerJoin(marcIndexersPartitionTable).on(RECORDS_LB.ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID))))
.where(
filterRecordByType(typeConnection.getRecordType().value())
.and(filterRecordByState(Record.State.ACTUAL.value()))
.and(externalIdRequired ? filterRecordByExternalIdNonNull() : DSL.noCondition())
.and(getMatchedFieldCondition(matchedField, comparisonPartType, marcIndexersPartitionTable.getName()))
)
.offset(offset)
.limit(limit > 0 ? limit : DEFAULT_LIMIT_FOR_GET_RECORDS)
)).map(queryResult -> queryResult.stream()
.map(res -> asRow(res.unwrap()))
.map(this::toRecord)
.toList()
);
.toList());
}

private Condition getMatchedFieldCondition(MatchField matchedField, Filter.ComparisonPartType comparisonPartType, String partition) {
Expand Down Expand Up @@ -377,17 +345,17 @@ private Condition getMatchedFieldCondition(MatchField matchedField, Filter.Compa
@SuppressWarnings("squid:S125")
private static String getComparisonValue(Filter.ComparisonPartType comparisonPartType) {

String DEFAULT_VALUE = "\"{partition}\".\"value\"";
String defaultValue = "\"{partition}\".\"value\"";
if (comparisonPartType == null) {
return DEFAULT_VALUE;
return defaultValue;
}

return switch (comparisonPartType) {
//case ALPHANUMERICS_ONLY -> "regexp_replace(\"{partition}\".\"value\", '[^[:alnum:]]', '', 'g')";
case ALPHANUMERICS_ONLY -> "regexp_replace(\"{partition}\".\"value\", '[^\\w]|_', '', 'g')";
// case NUMERICS_ONLY -> "regexp_replace(\"{partition}\".\"value\", '[^[:digit:]]', '', 'g')";
case NUMERICS_ONLY -> "regexp_replace(\"{partition}\".\"value\", '[^\\d]', '', 'g')";
default -> DEFAULT_VALUE;
default -> defaultValue;
};
}

Expand All @@ -410,7 +378,7 @@ private String getSqlQualifier(MatchField.QualifierMatch qualifierMatch) {
};
}

private String getValueInSqlFormat(Value value) {
private String getValueInSqlFormat(Value<?> value) {
if (Value.ValueType.STRING.equals(value.getType())) {
return format(VALUE_IN_SINGLE_QUOTES, value.getValue());
}
Expand Down Expand Up @@ -521,7 +489,7 @@ private String buildCteWhereCondition(String whereExpression) throws JSQLParserE
public Flowable<Row> streamMarcRecordIds(ParseLeaderResult parseLeaderResult, ParseFieldsResult parseFieldsResult,
RecordSearchParameters searchParameters, String tenantId) throws JSQLParserException {
/* Building a search query */
//TODO: adjust bracets in condtion statements
//TODO: adjust brackets in condition statements
CommonTableExpression commonTableExpression = null;
if (parseFieldsResult.isEnabled()) {
String cteWhereExpression = buildCteWhereCondition(parseFieldsResult.getWhereExpression());
Expand Down Expand Up @@ -603,47 +571,36 @@ private void appendWhere(SelectJoinStep step, ParseLeaderResult parseLeaderResul
}

@Override
public Future<RecordsIdentifiersCollection> getMatchedRecordsIdentifiers(MatchField matchedField, Filter.ComparisonPartType comparisonPartType,
boolean returnTotalRecords, TypeConnection typeConnection,
boolean externalIdRequired, int offset, int limit, String tenantId) {
Table<org.jooq.Record> marcIndexersPartitionTable = table(name(MARC_INDEXERS_PARTITION_PREFIX + matchedField.getTag()));
if (matchedField.getValue() instanceof MissingValue) {
return Future.succeededFuture(new RecordsIdentifiersCollection().withTotalRecords(0));
}

public Future<RecordsIdentifiersCollection> getMatchedRecordsIdentifiers(CompositeMatchField matchedField, boolean returnTotalRecords,
TypeConnection typeConnection, boolean externalIdRequired,
int offset, int limit, String tenantId) {
return getQueryExecutor(tenantId).transaction(txQE -> txQE.query(dsl -> {
TableLike<Record1<Integer>> countQuery;
if (returnTotalRecords) {
countQuery = select(countDistinct(RECORDS_LB.ID))
.from(RECORDS_LB)
.innerJoin(marcIndexersPartitionTable)
.on(RECORDS_LB.ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID))))
.innerJoin(MARC_RECORDS_TRACKING)
.on(MARC_RECORDS_TRACKING.MARC_ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID)))
.and(MARC_RECORDS_TRACKING.VERSION.eq(field(TABLE_FIELD_TEMPLATE, Integer.class, marcIndexersPartitionTable, name(VERSION)))))
SelectJoinStep<Record1<Integer>> baseCountQuery = select(countDistinct(RECORDS_LB.ID)).from(RECORDS_LB);
joinOnTablesForSearchByMarcFields(baseCountQuery, matchedField);
countQuery = baseCountQuery
.where(filterRecordByType(typeConnection.getRecordType().value())
.and(filterRecordByState(Record.State.ACTUAL.value()))
.and(externalIdRequired ? filterRecordByExternalIdNonNull() : DSL.noCondition())
.and(getMatchedFieldCondition(matchedField, comparisonPartType, marcIndexersPartitionTable.getName())));
.and(getCompositeMatchedFieldCondition(matchedField)));
} else {
countQuery = select(inline(null, Integer.class).as(COUNT));
}

SelectConditionStep<org.jooq.Record> searchQuery = dsl
SelectJoinStep<org.jooq.Record> baseSearchQuery = dsl
.select(List.of(RECORDS_LB.ID, RECORDS_LB.EXTERNAL_ID))
.distinctOn(RECORDS_LB.ID)
.from(RECORDS_LB)
.innerJoin(marcIndexersPartitionTable)
.on(RECORDS_LB.ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID))))
.innerJoin(MARC_RECORDS_TRACKING)
.on(MARC_RECORDS_TRACKING.MARC_ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID)))
.and(MARC_RECORDS_TRACKING.VERSION.eq(field(TABLE_FIELD_TEMPLATE, Integer.class, marcIndexersPartitionTable, name(VERSION)))))
.from(RECORDS_LB);

joinOnTablesForSearchByMarcFields(baseSearchQuery, matchedField);
SelectConditionStep<org.jooq.Record> searchQuery = baseSearchQuery
.where(filterRecordByType(typeConnection.getRecordType().value())
.and(filterRecordByState(Record.State.ACTUAL.value()))
.and(externalIdRequired ? filterRecordByExternalIdNonNull() : DSL.noCondition())
.and(getMatchedFieldCondition(matchedField, comparisonPartType, marcIndexersPartitionTable.getName())));
.and(getCompositeMatchedFieldCondition(matchedField)));

return DSL.select()
return dsl.select()
.from(searchQuery)
.rightJoin(countQuery).on(DSL.trueCondition())
.orderBy(searchQuery.field(ID).asc())
Expand All @@ -652,6 +609,57 @@ public Future<RecordsIdentifiersCollection> getMatchedRecordsIdentifiers(MatchFi
})).map(result -> toRecordsIdentifiersCollection(result, returnTotalRecords));
}

private void joinOnTablesForSearchByMarcFields(SelectJoinStep<?> selectJoinStep, CompositeMatchField compositeMatchField) {
if (compositeMatchField.isDefaultField()) {
return;
}

Set<String> processedFields = new HashSet<>();
selectJoinStep = selectJoinStep.innerJoin(MARC_RECORDS_TRACKING)
.on(RECORDS_LB.ID.eq(field(MARC_RECORDS_TRACKING.MARC_ID)));

for (MatchField matchField : compositeMatchField.getMatchFields()) {
if (!matchField.isDefaultField() && !processedFields.contains(matchField.getTag())) {
processedFields.add(matchField.getTag());
Table<org.jooq.Record> marcIndexersPartitionTable = table(name(MARC_INDEXERS_PARTITION_PREFIX + matchField.getTag()));
selectJoinStep = selectJoinStep.innerJoin(marcIndexersPartitionTable)
.on(MARC_RECORDS_TRACKING.MARC_ID.eq(field(TABLE_FIELD_TEMPLATE, UUID.class, marcIndexersPartitionTable, name(MARC_ID)))
.and(MARC_RECORDS_TRACKING.VERSION.eq(field(TABLE_FIELD_TEMPLATE, Integer.class, marcIndexersPartitionTable, name(VERSION)))));
}
}
}

private Condition getCompositeMatchedFieldCondition(CompositeMatchField compositeMatchField) {
Condition condition = DSL.noCondition();
RecordMatchingDto.LogicalOperator logicalOperator = compositeMatchField.getLogicalOperator();

for (MatchField matchField : compositeMatchField.getMatchFields()) {
Table<org.jooq.Record> marcIndexersPartitionTable = table(name(MARC_INDEXERS_PARTITION_PREFIX + matchField.getTag()));

Condition matchFieldCondition = matchField.isDefaultField()
? getDefaultMatchFieldCondition(matchField)
: getMatchedFieldCondition(matchField, matchField.getComparisonPartType(), marcIndexersPartitionTable.getName());

condition = logicalOperator == AND ? condition.and(matchFieldCondition) : condition.or(matchFieldCondition);
}
return condition;
}

private Condition getDefaultMatchFieldCondition(MatchField matchField) {
List<String> values = ((ListValue) matchField.getValue()).getValue();
MatchField.QualifierMatch qualifier = matchField.getQualifierMatch();

// ComparisonPartType is not used in this case because it is illogical to apply filters of this type to UUID values.
if (matchField.isMatchedId()) {
return getExternalIdsConditionWithQualifier(values, IdType.RECORD, qualifier);
} else if (matchField.isExternalId()) {
return getExternalIdsConditionWithQualifier(values, IdType.EXTERNAL, qualifier);
} else if (matchField.isExternalHrid()) {
return filterRecordByExternalHridValuesWithQualifier(values, qualifier);
}
return DSL.noCondition();
}

private RecordsIdentifiersCollection toRecordsIdentifiersCollection(QueryResult result, boolean returnTotalRecords) {
Integer countResult = asRow(result.unwrap()).getInteger(COUNT);
if (returnTotalRecords && (countResult == null || countResult == 0)) {
Expand Down Expand Up @@ -1413,11 +1421,9 @@ private static void formUpdateConditions(Record aRecord, List<Record> records, L
}
}

if (Objects.nonNull(additionalInfo)) {
if (Objects.nonNull(additionalInfo.getSuppressDiscovery())) {
updateStep = (Objects.isNull(updateStep) ? updateFirstStep : updateStep)
.set(RECORDS_LB.SUPPRESS_DISCOVERY, additionalInfo.getSuppressDiscovery());
}
if (Objects.nonNull(additionalInfo) && Objects.nonNull(additionalInfo.getSuppressDiscovery())) {
updateStep = (Objects.isNull(updateStep) ? updateFirstStep : updateStep)
.set(RECORDS_LB.SUPPRESS_DISCOVERY, additionalInfo.getSuppressDiscovery());
}

if (Objects.nonNull(metadata)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.folio.dao.util;

import org.folio.rest.jaxrs.model.RecordMatchingDto;

import java.util.List;

/**
* The model of the field containing multiple MatchField that need to be matched.
*/
public class CompositeMatchField {

private final List<MatchField> matchFields;
private final RecordMatchingDto.LogicalOperator logicalOperator;

public CompositeMatchField(List<MatchField> matchFields, RecordMatchingDto.LogicalOperator logicalOperator) {
this.matchFields = matchFields;
this.logicalOperator = logicalOperator;
}

public List<MatchField> getMatchFields() {
return matchFields;
}

public RecordMatchingDto.LogicalOperator getLogicalOperator() {
return logicalOperator;
}

public boolean isDefaultField() {
return matchFields.stream().allMatch(MatchField::isDefaultField);
}

}
Loading

0 comments on commit 59b04d5

Please sign in to comment.