]> git.aero2k.de Git - tmp/jakarta-migration.git/commitdiff
CSPACE-4302 Add support for booleans (AND, OR, NOT) and phrase search (latter waiting...
authorPatrick Schmitz <pschmitz@berkeley.edu>
Sat, 3 Dec 2011 00:29:30 +0000 (00:29 +0000)
committerPatrick Schmitz <pschmitz@berkeley.edu>
Sat, 3 Dec 2011 00:29:30 +0000 (00:29 +0000)
services/common/src/main/java/org/collectionspace/services/common/query/nuxeo/QueryManagerNuxeoImpl.java

index 242f715b3c7fb1b6e5f0d9c53b4fe6d05486d2fa..151f745fb9e09b8558142a4377bd3dc0bbf0fdda 100644 (file)
@@ -29,6 +29,7 @@ package org.collectionspace.services.common.query.nuxeo;
 import org.slf4j.Logger;\r
 import org.slf4j.LoggerFactory;\r
 \r
+import java.util.regex.Matcher;\r
 import java.util.regex.Pattern;\r
 \r
 import org.nuxeo.ecm.core.api.DocumentModel;\r
@@ -61,10 +62,10 @@ public class QueryManagerNuxeoImpl implements IQueryManager {
        // words\r
        private static Pattern nonWordChars = Pattern\r
                        .compile("[^\\p{L}\\p{M}\\p{N}_']");\r
+       private static Pattern kwdTokenizer = Pattern.compile("(?:(['\"])(.*?)(?<!\\\\)(?>\\\\\\\\)*\\1|([^ ]+))");\r
        private static Pattern unescapedDblQuotes = Pattern.compile("(?<!\\\\)\"");\r
        private static Pattern unescapedSingleQuote = Pattern.compile("(?<!\\\\)'");\r
-       private static Pattern kwdSearchProblemChars = Pattern\r
-                       .compile("[\\:\\(\\)]");\r
+       private static Pattern kwdSearchProblemChars = Pattern.compile("[\\:\\(\\)]");\r
        private static Pattern kwdSearchHyphen = Pattern.compile(" - ");\r
 \r
        private static String getLikeForm() {\r
@@ -155,6 +156,80 @@ public class QueryManagerNuxeoImpl implements IQueryManager {
        // in NXQL\r
        @Override\r
        public String createWhereClauseFromKeywords(String keywords) {\r
+               String result = null;\r
+               StringBuffer fullTextWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
+               // StringBuffer phraseWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
+               // Split on unescaped double quotes to handle phrases\r
+               Matcher regexMatcher = kwdTokenizer.matcher(keywords.trim());\r
+               boolean addNOT = false;\r
+               boolean newWordSet = true;\r
+               while (regexMatcher.find()) {\r
+                       String phrase = regexMatcher.group();\r
+                       // Not needed - already trimmed by split: \r
+                       // String trimmed = phrase.trim();\r
+                       // Ignore empty strings from match, or goofy input\r
+                       if (phrase.isEmpty())\r
+                               continue;\r
+                       // Note we let OR through as is\r
+                       if("AND".equalsIgnoreCase(phrase)) {\r
+                               continue;       // AND is default\r
+                       } else if("NOT".equalsIgnoreCase(phrase)) {\r
+                               addNOT = true;\r
+                               continue;\r
+                       }\r
+                       if (newWordSet) {\r
+                               fullTextWhereClause.append(ECM_FULLTEXT_LIKE + "'");\r
+                               newWordSet = false;\r
+                       } else {\r
+                               fullTextWhereClause.append(SEARCH_TERM_SEPARATOR);\r
+                       }\r
+                       if(addNOT) {\r
+                               fullTextWhereClause.append("-");        // Negate the next term\r
+                               addNOT = false;\r
+                       }\r
+                       // Next comment block of questionable value...\r
+                       \r
+                       // ignore the special chars except single quote here - can't hurt\r
+                       // TODO this should become a special function that strips things the\r
+                       // fulltext will ignore, including non-word chars and too-short\r
+                       // words,\r
+                       // and escaping single quotes. Can return a boolean for anything\r
+                       // stripped,\r
+                       // which triggers the back-up search. We can think about whether\r
+                       // stripping\r
+                       // short words not in a quoted phrase should trigger the backup.\r
+                       phrase = unescapedSingleQuote.matcher(phrase).replaceAll("\\\\'");\r
+                       // If there are non-word chars in the phrase, we need to match the\r
+                       // phrase exactly against the fulltext table for this object\r
+                       // if(nonWordChars.matcher(trimmed).matches()) {\r
+                       // }\r
+                       // Replace problem chars with spaces. Patches CSPACE-4147,\r
+                       // CSPACE-4106\r
+                       phrase = kwdSearchProblemChars.matcher(phrase).replaceAll(" ");\r
+                       phrase = kwdSearchHyphen.matcher(phrase).replaceAll(" ");\r
+\r
+                       fullTextWhereClause.append(phrase);\r
+                       \r
+                       if (logger.isTraceEnabled() == true) {\r
+                               logger.trace("Current built whereClause is: "\r
+                                               + fullTextWhereClause.toString());\r
+                       }\r
+               }\r
+               if (fullTextWhereClause.length()==0) {\r
+                       throw new RuntimeException(\r
+                                       "No usable keywords specified in string:[" + keywords + "]");\r
+               }\r
+               fullTextWhereClause.append("'" + SEARCH_GROUP_CLOSE);\r
+\r
+               result = fullTextWhereClause.toString();\r
+               if (logger.isDebugEnabled()) {\r
+                       logger.debug("Final built WHERE clause is: " + result);\r
+               }\r
+\r
+               return result;\r
+       }\r
+\r
+       public String createWhereClauseFromKeywordsOld(String keywords) {\r
                String result = null;\r
                StringBuffer fullTextWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
                // StringBuffer phraseWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r