From aeacca480b243c87f4eb88d6ec1d097ea580d2d6 Mon Sep 17 00:00:00 2001 From: Aron Roberts Date: Mon, 17 Nov 2014 18:38:54 -0800 Subject: [PATCH] CSPACE-6258: Fixed bug that prevented non-preferred terms from being returned in partial term matching queries that used wildcards and/or anchor characters. --- .../AuthorityItemDocumentModelHandler.java | 108 ++++++++++++++++-- .../client/java/RepositoryJavaClientImpl.java | 10 +- 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/services/authority/service/src/main/java/org/collectionspace/services/common/vocabulary/nuxeo/AuthorityItemDocumentModelHandler.java b/services/authority/service/src/main/java/org/collectionspace/services/common/vocabulary/nuxeo/AuthorityItemDocumentModelHandler.java index 8f75f91ac..d8d031d4e 100644 --- a/services/authority/service/src/main/java/org/collectionspace/services/common/vocabulary/nuxeo/AuthorityItemDocumentModelHandler.java +++ b/services/authority/service/src/main/java/org/collectionspace/services/common/vocabulary/nuxeo/AuthorityItemDocumentModelHandler.java @@ -67,6 +67,7 @@ import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; //import org.collectionspace.services.common.authority.AuthorityItemRelations; /** @@ -90,6 +91,7 @@ public abstract class AuthorityItemDocumentModelHandler // Used to determine when the displayName changes as part of the update. protected String oldDisplayNameOnUpdate = null; private final static String LIST_SUFFIX = "List"; + private final static String ZERO_OR_MORE_ANY_CHAR_REGEX = ".*"; public AuthorityItemDocumentModelHandler(String authorityItemCommonSchemaName) { this.authorityItemCommonSchemaName = authorityItemCommonSchemaName; @@ -557,18 +559,108 @@ public abstract class AuthorityItemDocumentModelHandler } } + /** + * Returns the items in a list of term display names whose names contain + * a partial term (as might be submitted in a search query, for instance). + * @param termDisplayNameList a list of term display names. + * @param partialTerm a partial term display name; that is, a portion + * of a display name that might be expected to match 0-n terms in the list. + * @return a list of term display names that matches the partial term. + * Matches are case-insensitive. As well, before matching is performed, any + * special-purpose characters that may appear in the partial term (such as + * wildcards and anchor characters) are filtered out from both compared terms. + */ protected List getPartialTermDisplayNameMatches(List termDisplayNameList, String partialTerm) { - List result = new ArrayList(); - - for (String termDisplayName : termDisplayNameList) { - if (termDisplayName.toLowerCase().contains(partialTerm.toLowerCase()) == true) { - result.add(termDisplayName); - } - } - + List result = new ArrayList<>(); + String partialTermMatchExpression = filterAnchorAndWildcardChars(partialTerm).toLowerCase(); + try { + for (String termDisplayName : termDisplayNameList) { + if (termDisplayName.toLowerCase() + .matches(partialTermMatchExpression) == true) { + result.add(termDisplayName); + } + } + } catch (PatternSyntaxException pse) { + logger.warn("Error in regex match pattern '%s' for term display names: %s", + partialTermMatchExpression, pse.getMessage()); + } return result; } + /** + * Filters user-supplied anchor and wildcard characters in a string, + * replacing them with equivalent regular expressions. + * @param term a term in which to filter anchor and wildcard characters. + * @return the term with those characters filtered. + */ + protected String filterAnchorAndWildcardChars(String term) { + if (Tools.isBlank(term)) { + return term; + } + if (term.length() < 3) { + return term; + } + if (logger.isTraceEnabled()) { + logger.trace(String.format("Term = %s", term)); + } + Boolean anchorAtStart = false; + Boolean anchorAtEnd = false; + String filteredTerm; + StringBuilder filteredTermBuilder = new StringBuilder(term); + // Term contains no anchor or wildcard characters. + if ( (! term.contains(RepositoryJavaClientImpl.USER_SUPPLIED_ANCHOR_CHAR)) + && (! term.contains(RepositoryJavaClientImpl.USER_SUPPLIED_WILDCARD)) ) { + filteredTerm = term; + } else { + // Term contains at least one such character. + try { + // Filter the starting anchor or wildcard character, if any. + String firstChar = filteredTermBuilder.substring(0,1); + switch (firstChar) { + case RepositoryJavaClientImpl.USER_SUPPLIED_ANCHOR_CHAR: + anchorAtStart = true; + break; + case RepositoryJavaClientImpl.USER_SUPPLIED_WILDCARD: + filteredTermBuilder.deleteCharAt(0); + break; + } + if (logger.isTraceEnabled()) { + logger.trace(String.format("After first char filtering = %s", filteredTermBuilder.toString())); + } + // Filter the ending anchor or wildcard character, if any. + int lastPos = filteredTermBuilder.length() - 1; + String lastChar = filteredTermBuilder.substring(lastPos); + switch (lastChar) { + case RepositoryJavaClientImpl.USER_SUPPLIED_ANCHOR_CHAR: + filteredTermBuilder.deleteCharAt(lastPos); + filteredTermBuilder.insert(filteredTermBuilder.length(), RepositoryJavaClientImpl.ENDING_ANCHOR_CHAR); + anchorAtEnd = true; + break; + case RepositoryJavaClientImpl.USER_SUPPLIED_WILDCARD: + filteredTermBuilder.deleteCharAt(lastPos); + break; + } + if (logger.isTraceEnabled()) { + logger.trace(String.format("After last char filtering = %s", filteredTermBuilder.toString())); + } + filteredTerm = filteredTermBuilder.toString(); + // Filter all other wildcards, if any. + filteredTerm = filteredTerm.replaceAll(RepositoryJavaClientImpl.USER_SUPPLIED_WILDCARD_REGEX, ZERO_OR_MORE_ANY_CHAR_REGEX); + if (logger.isTraceEnabled()) { + logger.trace(String.format("After replacing user wildcards = %s", filteredTerm)); + } + } catch (Exception e) { + logger.warn(String.format("Error filtering anchor and wildcard characters from string: %s", e.getMessage())); + return term; + } + } + // Wrap the term in beginning and ending regex wildcards, unless a + // starting or ending anchor character was present. + return (anchorAtStart ? "" : ZERO_OR_MORE_ANY_CHAR_REGEX) + + filteredTerm + + (anchorAtEnd ? "" : ZERO_OR_MORE_ANY_CHAR_REGEX); + } + @SuppressWarnings("unchecked") private List getPartialTermDisplayNameMatches(DocumentModel docModel, // REM - CSPACE-5133 String schema, ListResultField field, String partialTerm) { diff --git a/services/common/src/main/java/org/collectionspace/services/nuxeo/client/java/RepositoryJavaClientImpl.java b/services/common/src/main/java/org/collectionspace/services/nuxeo/client/java/RepositoryJavaClientImpl.java index c8bb4a62e..0b8cdc93d 100644 --- a/services/common/src/main/java/org/collectionspace/services/nuxeo/client/java/RepositoryJavaClientImpl.java +++ b/services/common/src/main/java/org/collectionspace/services/nuxeo/client/java/RepositoryJavaClientImpl.java @@ -105,9 +105,13 @@ public class RepositoryJavaClientImpl implements RepositoryClient