From: Ray Lee Date: Mon, 13 Jan 2014 23:44:17 +0000 (-0800) Subject: CSPACE-6286: Fetch full text search result pages lazily when finding refobjs. X-Git-Url: https://git.aero2k.de/?a=commitdiff_plain;h=6a1fd4f181f49c4186c522bb64a3a8837b6f2b9f;p=tmp%2Fjakarta-migration.git CSPACE-6286: Fetch full text search result pages lazily when finding refobjs. --- diff --git a/services/common/src/main/java/org/collectionspace/services/common/vocabulary/LazyAuthorityRefDocList.java b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/LazyAuthorityRefDocList.java new file mode 100644 index 000000000..e1810a3e4 --- /dev/null +++ b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/LazyAuthorityRefDocList.java @@ -0,0 +1,208 @@ +package org.collectionspace.services.common.vocabulary; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.collectionspace.services.client.PoxPayloadIn; +import org.collectionspace.services.client.PoxPayloadOut; +import org.collectionspace.services.common.context.ServiceContext; +import org.collectionspace.services.common.document.DocumentException; +import org.collectionspace.services.common.document.DocumentNotFoundException; +import org.collectionspace.services.common.repository.RepositoryClient; +import org.collectionspace.services.common.vocabulary.RefNameServiceUtils.AuthRefConfigInfo; +import org.collectionspace.services.config.service.ServiceBindingType; +import org.nuxeo.ecm.core.api.DocumentModel; +import org.nuxeo.ecm.core.api.DocumentModelList; +import org.nuxeo.ecm.core.api.impl.DocumentModelListImpl; +import org.nuxeo.ecm.core.api.repository.RepositoryInstance; + +import com.google.common.collect.AbstractIterator; + +/** + * A DocumentModelList representing all of the documents that potentially reference an + * authority item, found via full text search. This list must be post-processed to + * eliminate false positives. + * + * Documents in this list are lazily fetched one page at a time, as they are accessed through + * the list's Iterator, retrieved with the iterator() method. List items may not be accessed + * through any other means, including the get() method, and the ListIterator retrieved + * with listIterator(). Attempts to do so will result in unspecified behavior. + * + */ +public class LazyAuthorityRefDocList extends DocumentModelListImpl { + private static final long serialVersionUID = 1L; + + private ServiceContext ctx; + private RepositoryClient repoClient; + private RepositoryInstance repoSession; + private List serviceTypes; + private String refName; + private String refPropName; + private Map queriedServiceBindings; + private Map> authRefFieldsByService; + private String whereClauseAdditions; + private String orderByClause; + private int pageSize; + + private DocumentModelList firstPageDocList; + + /** + * Creates a LazyAuthorityRefDocList. The method signature is modeled after + * RefNameServiceUtils.findAuthorityRefDocs (the non-lazy way of doing this). + * + * @param ctx + * @param repoClient + * @param repoSession + * @param serviceTypes + * @param refName + * @param refPropName + * @param queriedServiceBindings + * @param authRefFieldsByService + * @param whereClauseAdditions + * @param orderByClause + * @param pageSize The number of documents to retrieve in each page + * @param computeTotal + * @throws DocumentException + * @throws DocumentNotFoundException + */ + public LazyAuthorityRefDocList( + ServiceContext ctx, + RepositoryClient repoClient, + RepositoryInstance repoSession, List serviceTypes, + String refName, + String refPropName, + Map queriedServiceBindings, + Map> authRefFieldsByService, + String whereClauseAdditions, + String orderByClause, + int pageSize, + boolean computeTotal) throws DocumentException, DocumentNotFoundException { + + this.ctx = ctx; + this.repoClient = repoClient; + this.repoSession = repoSession; + this.serviceTypes = serviceTypes; + this.refName = refName; + this.refPropName = refPropName; + this.queriedServiceBindings = queriedServiceBindings; + this.authRefFieldsByService = authRefFieldsByService; + this.whereClauseAdditions = whereClauseAdditions; + this.orderByClause = orderByClause; + this.pageSize = pageSize; + + // Fetch the first page immediately. This is necessary so that calls + // to totalSize() will work immediately. The computeTotal flag is passed + // into this initial page fetch. There's no need to compute totals + // when fetching subsequent pages. + + firstPageDocList = fetchPage(0, computeTotal); + } + + /** + * Retrieves a page of authority references. + * + * @param pageNum The page number + * @param computeTotal + * @return + * @throws DocumentNotFoundException + * @throws DocumentException + */ + private DocumentModelList fetchPage(int pageNum, boolean computeTotal) throws DocumentNotFoundException, DocumentException { + return RefNameServiceUtils.findAuthorityRefDocs(ctx, repoClient, repoSession, + serviceTypes, refName, refPropName, queriedServiceBindings, authRefFieldsByService, + whereClauseAdditions, orderByClause, pageSize, pageNum, computeTotal); + } + + @Override + public long totalSize() { + // Return the totalSize from the first page of documents. + return firstPageDocList.totalSize(); + } + + @Override + public Iterator iterator() { + // Create a new iterator that starts with the first page of documents. + return new Itr(0, firstPageDocList); + } + + /** + * An iterator over a LazyAuthorityRefDocList. The iterator keeps one + * page of documents in memory at a time, and traverses that page until + * no items remain. A new page is fetched only when the current page is + * exhausted. + * + */ + private class Itr extends AbstractIterator { + private int currentPageNum = 0; + private DocumentModelList currentPageDocList; + private Iterator currentPageIterator; + + /** + * Creates a new iterator. + * + * @param currentPageNum The initial page number + * @param currentPageDocList The documents in the initial page + */ + protected Itr(int pageNum, DocumentModelList pageDocList) { + setCurrentPage(pageNum, pageDocList); + } + + /** + * Changes the current page. + * + * @param pageNum The new page number + * @param pageDocList The documents in the new page + */ + private void setCurrentPage(int pageNum, DocumentModelList pageDocList) { + this.currentPageNum = pageNum; + this.currentPageDocList = pageDocList; + this.currentPageIterator = pageDocList.iterator(); + } + + @Override + protected DocumentModel computeNext() { + // Find the next document to return, looking first in the current + // page. If the current page is exhausted, fetch the next page. + + if (currentPageIterator.hasNext()) { + // There is still an element to return from the current page. + return currentPageIterator.next(); + } + + // The current page is exhausted. + + if (currentPageDocList.size() < pageSize) { + // There are no more pages. + return endOfData(); + } + + // There may be more pages. Try to fetch the next one. + + int nextPageNum = currentPageNum + 1; + DocumentModelList nextPageDocList = null; + + try { + nextPageDocList = fetchPage(nextPageNum, false); + } + catch(DocumentException e) {} + + if (nextPageDocList == null || nextPageDocList.size() == 0) { + // There are no more pages. + return endOfData(); + } + + // There is another page. Make it the current page. + + setCurrentPage(nextPageNum, nextPageDocList); + + if (currentPageIterator.hasNext()) { + return currentPageIterator.next(); + } + + // Shouldn't get here. + + return endOfData(); + } + } +} diff --git a/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java index 96a50c23e..ec2d63d84 100644 --- a/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java +++ b/services/common/src/main/java/org/collectionspace/services/common/vocabulary/RefNameServiceUtils.java @@ -266,9 +266,16 @@ public class RefNameServiceUtils { // the following call, as they pertain to the list of authority // references to be returned, not to the list of documents to be // scanned for those references. - DocumentModelList docList = findAuthorityRefDocs(ctx, repoClient, repoSession, + + // Get a list of possibly referencing documents. This list is + // lazily loaded, page by page. Ideally, only one page will + // need to be loaded to fill one page of results. Some number + // of possibly referencing documents will be false positives, + // so use a page size of double the requested page size to + // account for those. + DocumentModelList docList = findAllAuthorityRefDocs(ctx, repoClient, repoSession, serviceTypes, refName, refPropName, queriedServiceBindings, authRefFieldsByService, - filter.getWhereClause(), null, 0 /* pageSize */, 0 /* pageNum */, computeTotal); + filter.getWhereClause(), null, 2*pageSize, computeTotal); if (docList == null) { // found no authRef fields - nothing to process return wrapperList; @@ -465,7 +472,25 @@ public class RefNameServiceUtils { return nRefsFound; } - private static DocumentModelList findAuthorityRefDocs( + private static DocumentModelList findAllAuthorityRefDocs( + ServiceContext ctx, + RepositoryClient repoClient, + RepositoryInstance repoSession, List serviceTypes, + String refName, + String refPropName, + Map queriedServiceBindings, + Map> authRefFieldsByService, + String whereClauseAdditions, + String orderByClause, + int pageSize, + boolean computeTotal) throws DocumentException, DocumentNotFoundException { + + return new LazyAuthorityRefDocList(ctx, repoClient, repoSession, + serviceTypes, refName, refPropName, queriedServiceBindings, authRefFieldsByService, + whereClauseAdditions, orderByClause, pageSize, computeTotal); + } + + protected static DocumentModelList findAuthorityRefDocs( ServiceContext ctx, RepositoryClient repoClient, RepositoryInstance repoSession, List serviceTypes,