import org.slf4j.Logger;\r
import org.slf4j.LoggerFactory;\r
\r
+import java.util.regex.Matcher;\r
import java.util.regex.Pattern;\r
\r
import org.nuxeo.ecm.core.api.DocumentModel;\r
// words\r
private static Pattern nonWordChars = Pattern\r
.compile("[^\\p{L}\\p{M}\\p{N}_']");\r
+ private static Pattern kwdTokenizer = Pattern.compile("(?:(['\"])(.*?)(?<!\\\\)(?>\\\\\\\\)*\\1|([^ ]+))");\r
private static Pattern unescapedDblQuotes = Pattern.compile("(?<!\\\\)\"");\r
private static Pattern unescapedSingleQuote = Pattern.compile("(?<!\\\\)'");\r
- private static Pattern kwdSearchProblemChars = Pattern\r
- .compile("[\\:\\(\\)]");\r
+ private static Pattern kwdSearchProblemChars = Pattern.compile("[\\:\\(\\)]");\r
private static Pattern kwdSearchHyphen = Pattern.compile(" - ");\r
\r
private static String getLikeForm() {\r
// in NXQL\r
@Override\r
public String createWhereClauseFromKeywords(String keywords) {\r
+ String result = null;\r
+ StringBuffer fullTextWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
+ // StringBuffer phraseWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
+ // Split on unescaped double quotes to handle phrases\r
+ Matcher regexMatcher = kwdTokenizer.matcher(keywords.trim());\r
+ boolean addNOT = false;\r
+ boolean newWordSet = true;\r
+ while (regexMatcher.find()) {\r
+ String phrase = regexMatcher.group();\r
+ // Not needed - already trimmed by split: \r
+ // String trimmed = phrase.trim();\r
+ // Ignore empty strings from match, or goofy input\r
+ if (phrase.isEmpty())\r
+ continue;\r
+ // Note we let OR through as is\r
+ if("AND".equalsIgnoreCase(phrase)) {\r
+ continue; // AND is default\r
+ } else if("NOT".equalsIgnoreCase(phrase)) {\r
+ addNOT = true;\r
+ continue;\r
+ }\r
+ if (newWordSet) {\r
+ fullTextWhereClause.append(ECM_FULLTEXT_LIKE + "'");\r
+ newWordSet = false;\r
+ } else {\r
+ fullTextWhereClause.append(SEARCH_TERM_SEPARATOR);\r
+ }\r
+ if(addNOT) {\r
+ fullTextWhereClause.append("-"); // Negate the next term\r
+ addNOT = false;\r
+ }\r
+ // Next comment block of questionable value...\r
+ \r
+ // ignore the special chars except single quote here - can't hurt\r
+ // TODO this should become a special function that strips things the\r
+ // fulltext will ignore, including non-word chars and too-short\r
+ // words,\r
+ // and escaping single quotes. Can return a boolean for anything\r
+ // stripped,\r
+ // which triggers the back-up search. We can think about whether\r
+ // stripping\r
+ // short words not in a quoted phrase should trigger the backup.\r
+ phrase = unescapedSingleQuote.matcher(phrase).replaceAll("\\\\'");\r
+ // If there are non-word chars in the phrase, we need to match the\r
+ // phrase exactly against the fulltext table for this object\r
+ // if(nonWordChars.matcher(trimmed).matches()) {\r
+ // }\r
+ // Replace problem chars with spaces. Patches CSPACE-4147,\r
+ // CSPACE-4106\r
+ phrase = kwdSearchProblemChars.matcher(phrase).replaceAll(" ");\r
+ phrase = kwdSearchHyphen.matcher(phrase).replaceAll(" ");\r
+\r
+ fullTextWhereClause.append(phrase);\r
+ \r
+ if (logger.isTraceEnabled() == true) {\r
+ logger.trace("Current built whereClause is: "\r
+ + fullTextWhereClause.toString());\r
+ }\r
+ }\r
+ if (fullTextWhereClause.length()==0) {\r
+ throw new RuntimeException(\r
+ "No usable keywords specified in string:[" + keywords + "]");\r
+ }\r
+ fullTextWhereClause.append("'" + SEARCH_GROUP_CLOSE);\r
+\r
+ result = fullTextWhereClause.toString();\r
+ if (logger.isDebugEnabled()) {\r
+ logger.debug("Final built WHERE clause is: " + result);\r
+ }\r
+\r
+ return result;\r
+ }\r
+\r
+ public String createWhereClauseFromKeywordsOld(String keywords) {\r
String result = null;\r
StringBuffer fullTextWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r
// StringBuffer phraseWhereClause = new StringBuffer(SEARCH_GROUP_OPEN);\r