CSPACE-5870 Added more efficient support for pagination of refObjs results.

author Patrick Schmitz <pschmitz@berkeley.edu>

Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)

committer Patrick Schmitz <pschmitz@berkeley.edu>

Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)
author Patrick Schmitz <pschmitz@berkeley.edu>
Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)
committer Patrick Schmitz <pschmitz@berkeley.edu>
Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)
diff --git a/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java

index bd60deed5c1cc48e48aa2ef180e842c64aa09fb3..96a50c23ecf9251c8878bce3580c9f3d36cc379a 100644 (file)
--- a/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java
+++ b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java
@@ -303,18 +303,24 @@ public class RefNameServiceUtils {
              // Strip off displayName and only match the base, so we get references to all \r
              // the NPTs as well as the PT.\r
                 String strippedRefName = RefNameUtils.stripAuthorityTermDisplayName(refName);\r
-            int nRefsFound = processRefObjsDocList(docList, ctx.getTenantId(), strippedRefName, true, queriedServiceBindings, authRefFieldsByService, // the actual list size needs to be updated to the size of "list"\r
-                    list, null);\r
-\r
+               \r
+               // *** Need to pass in pagination info here. \r
+            int nRefsFound = processRefObjsDocListForList(docList, ctx.getTenantId(), strippedRefName, \r
+                       queriedServiceBindings, authRefFieldsByService, // the actual list size needs to be updated to the size of "list"\r
+                    list, pageSize, pageNum);\r
+               \r
              commonList.setPageSize(pageSize);\r
              \r
              // Values returned in the pagination block above the list items\r
              // need to reflect the number of references to authority items\r
              // returned, rather than the number of documents originally scanned\r
              // to find such references.\r
+            // This will be an estimate only...\r
              commonList.setPageNum(pageNum);\r
-            commonList.setTotalItems(list.size());\r
+               commonList.setTotalItems(nRefsFound);   // Accurate if total was scanned, otherwise, just an estimate\r
+            commonList.setItemsInPage(list.size());\r
  \r
+            /* Pagination is now handled in the processing step\r
              // Slice the list to return only the specified page of items\r
              // in the list results.\r
              //\r
@@ -359,6 +365,7 @@ public class RefNameServiceUtils {
              wrapperList.getAuthorityRefDocItem().clear();\r
              wrapperList.getAuthorityRefDocItem().addAll(currentPageList);\r
              commonList.setItemsInPage(currentPageList.size());\r
+            */\r
              \r
              if (logger.isDebugEnabled() && (nRefsFound < docList.size())) {\r
                  logger.debug("Internal curiosity: got fewer matches of refs than # docs matched..."); // We found a ref to ourself and have excluded it.\r
@@ -433,8 +440,9 @@ public class RefNameServiceUtils {
  \r
                  // Only match complete refNames - unless and until we decide how to resolve changes\r
                  // to NPTs we will defer that and only change PTs or refNames as passed in.\r
-                int nRefsFoundThisPage = processRefObjsDocList(docList, ctx.getTenantId(), oldRefName, false, queriedServiceBindings, authRefFieldsByService, // Perform the refName updates on the list of document models\r
-                        null, newRefName);\r
+                int nRefsFoundThisPage = processRefObjsDocListForUpdate(docList, ctx.getTenantId(), oldRefName, \r
+                               queriedServiceBindings, authRefFieldsByService, // Perform the refName updates on the list of document models\r
+                        newRefName);\r
                  if (nRefsFoundThisPage > 0) {\r
                      ((RepositoryJavaClientImpl) repoClient).saveDocListWithoutHandlerProcessing(ctx, repoSession, docList, true); // Flush the document model list out to Nuxeo storage\r
                      nRefsFound += nRefsFoundThisPage;\r
@@ -566,8 +574,31 @@ public class RefNameServiceUtils {
                 return result;\r
         }\r
  \r
+    private static int processRefObjsDocListForUpdate(\r
+            DocumentModelList docList,\r
+            String tenantId,\r
+            String refName,\r
+            Map<String, ServiceBindingType> queriedServiceBindings,\r
+            Map<String, List<AuthRefConfigInfo>> authRefFieldsByService,\r
+            String newAuthorityRefName) {\r
+       return processRefObjsDocList(docList, tenantId, refName, false, queriedServiceBindings,\r
+                       authRefFieldsByService, null, 0, 0, newAuthorityRefName);\r
+    }\r
+                       \r
+    private static int processRefObjsDocListForList(\r
+            DocumentModelList docList,\r
+            String tenantId,\r
+            String refName,\r
+            Map<String, ServiceBindingType> queriedServiceBindings,\r
+            Map<String, List<AuthRefConfigInfo>> authRefFieldsByService,\r
+            List<AuthorityRefDocList.AuthorityRefDocItem> list, \r
+            int pageSize, int pageNum) {\r
+       return processRefObjsDocList(docList, tenantId, refName, true, queriedServiceBindings,\r
+                       authRefFieldsByService, list, pageSize, pageNum, null);\r
+    }\r
+                       \r
  \r
-    /*\r
+       /*\r
       * Runs through the list of found docs, processing them. If list is\r
       * non-null, then processing means gather the info for items. If list is\r
       * null, and newRefName is non-null, then processing means replacing and\r
@@ -583,9 +614,19 @@ public class RefNameServiceUtils {
              Map<String, ServiceBindingType> queriedServiceBindings,\r
              Map<String, List<AuthRefConfigInfo>> authRefFieldsByService,\r
              List<AuthorityRefDocList.AuthorityRefDocItem> list,\r
+            int pageSize, int pageNum, // Only used when constructing a list.\r
              String newAuthorityRefName) {\r
+        UriTemplateRegistry registry = ServiceMain.getInstance().getUriTemplateRegistry();\r
          Iterator<DocumentModel> iter = docList.iterator();\r
          int nRefsFoundTotal = 0;\r
+        boolean foundSelf = false;\r
+\r
+        // When paginating results, we have to guess at the total. First guess is the number of docs returned\r
+        // by the query. However, this returns some false positives, so may be high. \r
+        // In addition, we can match multiple fields per doc, so this may be low. Fun, eh?\r
+        int nDocsReturnedInQuery = (int)docList.totalSize();\r
+        int nDocsProcessed = 0;\r
+        int firstItemInPage = pageNum*pageSize;\r
          while (iter.hasNext()) {\r
              DocumentModel docModel = iter.next();\r
              AuthorityRefDocList.AuthorityRefDocItem ilistItem;\r
@@ -603,10 +644,17 @@ public class RefNameServiceUtils {
                      throw new InternalError("processRefObjsDocList() called with neither an itemList nor a new RefName!");\r
                  }\r
                  ilistItem = null;\r
+                pageSize = 0;\r
+                firstItemInPage = 0;   // Do not paginate if updating, rather than building list\r
              } else {    // Have a list - refObjs case\r
                  if (newAuthorityRefName != null) {\r
                      throw new InternalError("processRefObjsDocList() called with both an itemList and a new RefName!");\r
                  }\r
+                if(firstItemInPage > 100) {\r
+                       logger.warn("Processing a large offset (size:{}, num:{}) for refObjs - will be expensive!!!",\r
+                                               pageSize, pageNum);\r
+                }\r
+                // Note that we have to go through check all the fields to determine the actual page start\r
                  ilistItem = new AuthorityRefDocList.AuthorityRefDocItem();\r
                  String csid = NuxeoUtils.getCsid(docModel);//NuxeoUtils.extractId(docModel.getPathAsString());\r
                  try {\r
@@ -619,7 +667,6 @@ public class RefNameServiceUtils {
                  }\r
                  ilistItem.setDocId(csid);\r
                  String uri = "";\r
-                UriTemplateRegistry registry = ServiceMain.getInstance().getUriTemplateRegistry();\r
                  UriTemplateRegistryKey key = new UriTemplateRegistryKey(tenantId, docType);\r
                  StoredValuesUriTemplate template = registry.get(key);\r
                  if (template != null) {\r
@@ -672,38 +719,70 @@ public class RefNameServiceUtils {
              //String authRefAncestorField = "";\r
              //String authRefDescendantField = "";\r
              //String sourceField = "";\r
-            int nRefsFoundInDoc = 0;\r
  \r
              ArrayList<RefNameServiceUtils.AuthRefInfo> foundProps = new ArrayList<RefNameServiceUtils.AuthRefInfo>();\r
              try {\r
                  findAuthRefPropertiesInDoc(docModel, matchingAuthRefFields, refName, matchBaseOnly, foundProps); // REM - side effect that foundProps is set\r
-                for (RefNameServiceUtils.AuthRefInfo ari : foundProps) {\r
-                    if (ilistItem != null) {\r
-                        if (nRefsFoundInDoc == 0) {    // First one?\r
-                            ilistItem.setSourceField(ari.getQualifiedDisplayName());\r
-                        } else {    // duplicates from one object\r
-                            ilistItem = cloneAuthRefDocItem(ilistItem, ari.getQualifiedDisplayName());\r
-                        }\r
-                        list.add(ilistItem);\r
-                    } else {    // update refName case\r
-                        Property propToUpdate = ari.getProperty();\r
-                        propToUpdate.setValue(newAuthorityRefName);\r
-                    }\r
-                    nRefsFoundInDoc++;\r
+                if(!foundProps.isEmpty()) {\r
+                    int nRefsFoundInDoc = 0;\r
+                       for (RefNameServiceUtils.AuthRefInfo ari : foundProps) {\r
+                               if (ilistItem != null) {\r
+                                       // So this is a true positive, and not a false one. We have to consider pagination now.\r
+                                       if(nRefsFoundTotal >= firstItemInPage) {        // skipped enough already\r
+                                               if (nRefsFoundInDoc == 0) {    // First one?\r
+                                                       ilistItem.setSourceField(ari.getQualifiedDisplayName());\r
+                                               } else {    // duplicates from one object\r
+                                                       ilistItem = cloneAuthRefDocItem(ilistItem, ari.getQualifiedDisplayName());\r
+                                               }\r
+                                               list.add(ilistItem);\r
+                                       nRefsFoundInDoc++;      // Only increment if processed, or clone logic above will fail\r
+                                       }\r
+                               } else {    // update refName case\r
+                                       Property propToUpdate = ari.getProperty();\r
+                                       propToUpdate.setValue(newAuthorityRefName);\r
+                               }\r
+                               nRefsFoundTotal++;              // Whether we processed or not, we found - essential to pagination logic\r
+                       }\r
+                } else if(ilistItem != null) {\r
+                       String docRefName = ilistItem.getRefName();\r
+                    if (matchBaseOnly?\r
+                                       (docRefName!=null && docRefName.startsWith(refName))\r
+                                       :refName.equals(docRefName)) {\r
+                               // We found the self for an item\r
+                               foundSelf = true;\r
+                               logger.debug("getAuthorityRefDocs: Result: "\r
+                                                               + docType + " [" + NuxeoUtils.getCsid(docModel)\r
+                                                               + "] appears to be self for: ["\r
+                                                               + refName + "]");\r
+                       } else {\r
+                               logger.debug("getAuthorityRefDocs: Result: "\r
+                                                               + docType + " [" + NuxeoUtils.getCsid(docModel)\r
+                                                               + "] does not reference ["\r
+                                                               + refName + "]");\r
+                       }\r
                  }\r
              } catch (ClientException ce) {\r
-                throw new RuntimeException(\r
-                        "getAuthorityRefDocs: Problem fetching values from repo: " + ce.getLocalizedMessage());\r
+               throw new RuntimeException(\r
+                               "getAuthorityRefDocs: Problem fetching values from repo: " + ce.getLocalizedMessage());\r
              }\r
-            if (nRefsFoundInDoc == 0) {\r
-                logger.warn(\r
-                        "getAuthorityRefDocs: Result: "\r
-                        + docType + " [" + NuxeoUtils.getCsid(docModel)\r
-                        + "] does not reference ["\r
-                        + refName + "]");\r
+            nDocsProcessed++;\r
+            // Done processing that doc. Are we done with the whole page?\r
+            // Note pageSize <=0 means do them all\r
+            if((pageSize > 0) && ((nRefsFoundTotal-firstItemInPage)>=pageSize)) {\r
+               // Quitting early, so we need to estimate the total. Assume one per doc\r
+               // for the rest of the docs we matched in the query\r
+               int unprocessedDocs = nDocsReturnedInQuery - nDocsProcessed;\r
+               if(unprocessedDocs>0) {\r
+                       // We generally match ourselves in the keyword search. If we already saw ourselves\r
+                       // then do not try to correct for this. Otherwise, decrement the total.\r
+                       // Yes, this is fairly goofy, but the whole estimation mechanism is goofy. \r
+                       if(!foundSelf)\r
+                               unprocessedDocs--;\r
+                       nRefsFoundTotal += unprocessedDocs;\r
+               }\r
+               break;\r
              }\r
-            nRefsFoundTotal += nRefsFoundInDoc;\r
-        }\r
+        } // close while(iterator)\r
          return nRefsFoundTotal;\r
      }\r
  \r
author	Patrick Schmitz <pschmitz@berkeley.edu>
	Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)
committer	Patrick Schmitz <pschmitz@berkeley.edu>
	Fri, 8 Feb 2013 22:19:44 +0000 (14:19 -0800)