1 package org.collectionspace.services.batch.nuxeo;
3 import java.net.URISyntaxException;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collections;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.Iterator;
10 import java.util.LinkedHashMap;
11 import java.util.LinkedHashSet;
12 import java.util.List;
16 import org.apache.commons.lang.StringUtils;
18 import org.collectionspace.services.client.PayloadOutputPart;
19 import org.collectionspace.services.client.PoxPayloadOut;
20 import org.collectionspace.services.client.RelationClient;
21 import org.collectionspace.services.client.workflow.WorkflowClient;
22 import org.collectionspace.services.common.NuxeoBasedResource;
23 import org.collectionspace.services.common.api.RefNameUtils;
24 import org.collectionspace.services.common.api.RefNameUtils.AuthorityTermInfo;
25 import org.collectionspace.services.common.authorityref.AuthorityRefDocList;
26 import org.collectionspace.services.common.invocable.InvocationContext.Params.Param;
27 import org.collectionspace.services.common.invocable.InvocationResults;
28 import org.collectionspace.services.common.relation.RelationResource;
29 import org.collectionspace.services.common.vocabulary.AuthorityResource;
30 import org.collectionspace.services.relation.RelationsCommonList;
32 import org.dom4j.Document;
33 import org.dom4j.DocumentException;
34 import org.dom4j.DocumentHelper;
35 import org.dom4j.Element;
36 import org.dom4j.Node;
38 import org.slf4j.Logger;
39 import org.slf4j.LoggerFactory;
42 * A batch job that merges authority items. The single and list contexts are
45 * The merge target is a record into which one or more source records will be
46 * merged. A merge source is a record that will be merged into the target, as
47 * follows: Each term in a source record is added to the target as a non-
48 * preferred term, if that term does not already exist in the target. If a term
49 * in the source already exists in the target, each non-blank term field is
50 * copied to the target, if that field is empty in the target. If the field is
51 * non-empty in the target, and differs from the source field, a warning is
52 * emitted and no action is taken. If a source is successfully merged into the
53 * target, all references to the source are transferred to the target, and the
54 * source record is soft-deleted.
56 * The context (singleCSID or listCSIDs of the batch invocation payload
57 * specifies the source record(s).
59 * The following parameters are allowed:
61 * targetCSID: The csid of the target record. Only one target may be supplied.
65 public class MergeAuthorityItemsBatchJob extends AbstractBatchJob {
66 final Logger logger = LoggerFactory.getLogger(MergeAuthorityItemsBatchJob.class);
68 public MergeAuthorityItemsBatchJob() {
69 setSupportedInvocationModes(Arrays.asList(INVOCATION_MODE_SINGLE, INVOCATION_MODE_LIST));
74 setCompletionStatus(STATUS_MIN_PROGRESS);
78 Set<String> sourceCsids = new LinkedHashSet<String>();
79 String docType = this.getDocType();
81 if (this.requestIsForInvocationModeSingle()) {
82 String singleCsid = this.getSingleCsid();
84 if (singleCsid != null) {
85 sourceCsids.add(singleCsid);
87 } else if (this.requestIsForInvocationModeList()) {
88 sourceCsids.addAll(this.getListCsids());
91 for (Param param : this.getParams()) {
92 String key = param.getKey();
94 // I don't want this batch job to appear in the UI, since it won't run successfully without parameters.
95 // That means it can't be registered with any docType. But if the invocation payload contains a docType,
96 // it will be checked against the null registered docType, and will fail. So docType should be passed as a
99 if (key.equals("docType")) {
100 docType = param.getValue();
102 else if (key.equals("target")) {
103 target = param.getValue();
105 else if (key.equals("targetCSID")) {
106 target = param.getValue();
108 else if (key.equals("sourceCSID")) {
109 sourceCsids.add(param.getValue());
113 if (target == null || target.equals("")) {
114 throw new Exception("a target or targetCSID parameter must be supplied");
117 if (sourceCsids.size() == 0) {
118 throw new Exception("a source csid must be supplied");
121 InvocationResults results = merge(docType, target, sourceCsids);
124 setCompletionStatus(STATUS_COMPLETE);
126 catch (Exception e) {
127 setCompletionStatus(STATUS_ERROR);
128 setErrorInfo(new InvocationError(INT_ERROR_STATUS, e.getMessage()));
132 public InvocationResults merge(String docType, String target, String sourceCsid) throws URISyntaxException, DocumentException, Exception {
133 return merge(docType, target, new LinkedHashSet<String>(Arrays.asList(sourceCsid)));
136 public InvocationResults merge(String docType, String target, Set<String> sourceCsids) throws URISyntaxException, DocumentException, Exception {
137 logger.debug("Merging docType=" + docType + " target=" + target + " sourceCsids=" + StringUtils.join(sourceCsids, ","));
139 String serviceName = getAuthorityServiceNameForDocType(docType);
141 PoxPayloadOut targetItemPayload = RefNameUtils.isTermRefname(target)
142 ? findAuthorityItemByRefName(serviceName, target)
143 : findAuthorityItemByCsid(serviceName, target);
145 String targetItemCsid = getCsid(targetItemPayload);
147 for (String sourceCsid : sourceCsids) {
148 if (sourceCsid.equals(targetItemCsid)) {
149 throw new DocumentException("Can't merge a record into itself.");
153 List<PoxPayloadOut> sourceItemPayloads = new ArrayList<PoxPayloadOut>();
155 for (String sourceCsid : sourceCsids) {
156 sourceItemPayloads.add(findAuthorityItemByCsid(serviceName, sourceCsid));
159 return merge(docType, targetItemPayload, sourceItemPayloads);
162 private InvocationResults merge(String docType, PoxPayloadOut targetItemPayload, List<PoxPayloadOut> sourceItemPayloads) throws URISyntaxException, DocumentException, Exception {
164 List<String> userNotes = new ArrayList<String>();
166 Element targetTermGroupListElement = getTermGroupListElement(targetItemPayload);
167 Element mergedTermGroupListElement = targetTermGroupListElement.createCopy();
169 String targetCsid = getCsid(targetItemPayload);
170 String targetRefName = getRefName(targetItemPayload);
171 String inAuthority = getFieldValue(targetItemPayload, "inAuthority");
173 logger.debug("Merging term groups");
175 for (PoxPayloadOut sourceItemPayload : sourceItemPayloads) {
176 String sourceCsid = getCsid(sourceItemPayload);
177 Element sourceTermGroupListElement = getTermGroupListElement(sourceItemPayload);
179 logger.debug("Merging term groups from source " + sourceCsid + " into target " + targetCsid);
182 mergeTermGroupLists(mergedTermGroupListElement, sourceTermGroupListElement);
184 catch(RuntimeException e) {
185 throw new RuntimeException("Error merging source record " + sourceCsid + " into target record " + targetCsid + ": " + e.getMessage(), e);
189 logger.debug("Updating target: docType=" + docType + " inAuthority=" + inAuthority + " targetCsid=" + targetCsid);
191 updateAuthorityItem(docType, inAuthority, targetCsid, getUpdatePayload(targetTermGroupListElement, mergedTermGroupListElement));
193 String targetDisplayName = RefNameUtils.getDisplayName(targetRefName);
195 userNotes.add("Updated the target record, " + targetDisplayName + ".");
198 String serviceName = getAuthorityServiceNameForDocType(docType);
200 logger.debug("Updating references");
202 for (PoxPayloadOut sourceItemPayload : sourceItemPayloads) {
203 String sourceCsid = getCsid(sourceItemPayload);
204 String sourceRefName = getRefName(sourceItemPayload);
206 InvocationResults results = updateReferences(serviceName, inAuthority, sourceCsid, sourceRefName, targetRefName);
208 userNotes.add(results.getUserNote());
209 numAffected += results.getNumAffected();
212 logger.debug("Deleting source items");
214 for (PoxPayloadOut sourceItemPayload : sourceItemPayloads) {
215 String sourceCsid = getCsid(sourceItemPayload);
216 String sourceRefName = getRefName(sourceItemPayload);
218 InvocationResults results = deleteAuthorityItem(docType, getFieldValue(sourceItemPayload, "inAuthority"), sourceCsid, sourceRefName);
220 userNotes.add(results.getUserNote());
221 numAffected += results.getNumAffected();
224 InvocationResults results = new InvocationResults();
225 results.setNumAffected(numAffected);
226 results.setUserNote(StringUtils.join(userNotes, "\n"));
231 private InvocationResults updateReferences(String serviceName, String inAuthority, String sourceCsid, String sourceRefName, String targetRefName) throws URISyntaxException, DocumentException, Exception {
232 logger.debug("Updating references: serviceName=" + serviceName + " inAuthority=" + inAuthority + " sourceCsid=" + sourceCsid + " sourceRefName=" + sourceRefName + " targetRefName=" + targetRefName);
234 String sourceDisplayName = RefNameUtils.getDisplayName(sourceRefName);
238 List<AuthorityRefDocList.AuthorityRefDocItem> items;
243 logger.debug("Looping with pageSize=" + pageSize);
248 // The pageNum/pageSize parameters don't work properly for refobj requests!
249 // It should be safe to repeatedly fetch page 0 for a large-ish page size,
250 // and update that page, until no references are left.
252 items = findReferencingFields(serviceName, inAuthority, sourceCsid, null, pageNum, pageSize);
253 Map<String, ReferencingRecord> referencingRecordsByCsid = new LinkedHashMap<String, ReferencingRecord>();
255 logger.debug("Loop " + loopCount + ": " + items.size() + " items found");
257 for (AuthorityRefDocList.AuthorityRefDocItem item : items) {
258 // If a record contains a reference to the record multiple times, multiple items are returned,
259 // but only the first has a non-null workflow state. A bug?
261 String itemCsid = item.getDocId();
262 ReferencingRecord record = referencingRecordsByCsid.get(itemCsid);
264 if (record == null) {
265 if (item.getWorkflowState() != null && !item.getWorkflowState().equals(WorkflowClient.WORKFLOWSTATE_DELETED)) {
266 record = new ReferencingRecord(item.getUri());
267 referencingRecordsByCsid.put(itemCsid, record);
271 if (record != null) {
272 String[] sourceFieldElements = item.getSourceField().split(":");
273 String partName = sourceFieldElements[0];
274 String fieldName = sourceFieldElements[1];
276 Map<String, Set<String>> fields = record.getFields();
277 Set<String> fieldsInPart = fields.get(partName);
279 if (fieldsInPart == null) {
280 fieldsInPart = new HashSet<String>();
281 fields.put(partName, fieldsInPart);
284 fieldsInPart.add(fieldName);
288 List<ReferencingRecord> referencingRecords = new ArrayList<ReferencingRecord>(referencingRecordsByCsid.values());
290 logger.debug("Loop " + loopCount + ": updating " + referencingRecords.size() + " records");
292 for (ReferencingRecord record : referencingRecords) {
293 InvocationResults results = updateReferencingRecord(record, sourceRefName, targetRefName);
294 numUpdated += results.getNumAffected();
297 while (items.size() > 0);
299 InvocationResults results = new InvocationResults();
300 results.setNumAffected(numUpdated);
302 if (numUpdated > 0) {
306 + (numUpdated == 1 ? " record " : " records ")
307 + "that referenced the source record, "
308 + sourceDisplayName + "."
311 results.setUserNote("No records referenced the source record, " + sourceDisplayName + ".");
317 private InvocationResults updateReferencingRecord(ReferencingRecord record, String fromRefName, String toRefName) throws URISyntaxException, DocumentException {
318 String fromRefNameStem = RefNameUtils.stripAuthorityTermDisplayName(fromRefName);
319 // String toRefNameStem = RefNameUtils.stripAuthorityTermDisplayName(toRefName);
321 logger.debug("Updating references: record.uri=" + record.getUri() + " fromRefName=" + fromRefName + " toRefName=" + toRefName);
323 Map<String, Set<String>> fields = record.getFields();
325 PoxPayloadOut recordPayload = findByUri(record.getUri());
326 Document recordDocument = recordPayload.getDOMDocument();
327 Document newDocument = (Document) recordDocument.clone();
328 Element rootElement = newDocument.getRootElement();
330 for (Element partElement : (List<Element>) rootElement.elements()) {
331 String partName = partElement.getName();
333 if (fields.containsKey(partName)) {
334 for (String fieldName : fields.get(partName)) {
335 List<Node> nodes = partElement.selectNodes("descendant::" + fieldName);
337 for (Node node : nodes) {
338 String text = node.getText();
339 String refNameStem = null;
342 refNameStem = RefNameUtils.stripAuthorityTermDisplayName(text);
344 catch(IllegalArgumentException e) {}
346 if (refNameStem != null && refNameStem.equals(fromRefNameStem)) {
347 AuthorityTermInfo termInfo = RefNameUtils.parseAuthorityTermInfo(text);
348 // String newRefName = toRefNameStem + "'" + termInfo.displayName + "'";
349 String newRefName = toRefName;
351 node.setText(newRefName);
357 rootElement.remove(partElement);
361 String payload = newDocument.asXML();
363 return updateUri(record.getUri(), payload);
366 private InvocationResults updateUri(String uri, String payload) throws URISyntaxException {
367 String[] uriParts = uri.split("/");
369 if (uriParts.length == 3) {
370 String serviceName = uriParts[1];
371 String csid = uriParts[2];
373 NuxeoBasedResource resource = (NuxeoBasedResource) getResourceMap().get(serviceName);
375 resource.update(getResourceMap(), createUriInfo(), csid, payload);
377 else if (uriParts.length == 5) {
378 String serviceName = uriParts[1];
379 String vocabularyCsid = uriParts[2];
380 String items = uriParts[3];
381 String csid = uriParts[4];
383 if (items.equals("items")) {
384 AuthorityResource<?, ?> resource = (AuthorityResource<?, ?>) getResourceMap().get(serviceName);
386 resource.updateAuthorityItem(getResourceMap(), createUriInfo(), vocabularyCsid, csid, payload);
390 throw new IllegalArgumentException("Invalid uri " + uri);
393 logger.debug("Updated referencing record " + uri);
395 InvocationResults results = new InvocationResults();
396 results.setNumAffected(1);
397 results.setUserNote("Updated referencing record " + uri);
402 private void updateAuthorityItem(String docType, String inAuthority, String csid, String payload) throws URISyntaxException {
403 String serviceName = getAuthorityServiceNameForDocType(docType);
404 AuthorityResource<?, ?> resource = (AuthorityResource<?, ?>) getResourceMap().get(serviceName);
406 resource.updateAuthorityItem(getServiceContext(), getResourceMap(), createUriInfo(), inAuthority, csid, payload);
409 private InvocationResults deleteAuthorityItem(String docType, String inAuthority, String csid, String refName) throws URISyntaxException, Exception {
411 List<String> userNotes = new ArrayList<String>();
412 String displayName = RefNameUtils.getDisplayName(refName);
414 // If the item is the broader context of any items, warn and do nothing.
416 List<String> narrowerItemCsids = findNarrower(csid);
418 if (narrowerItemCsids.size() > 0) {
419 logger.debug("Item " + csid + " has narrower items -- not deleting");
421 userNotes.add("The source record, " + displayName + ", was not deleted because it has narrower items in its hierarchy.");
424 // If the item has a broader context, delete the relation.
426 List<RelationsCommonList.RelationListItem> relationItems = new ArrayList<RelationsCommonList.RelationListItem>();
428 for (RelationsCommonList.RelationListItem item : findRelated(csid, null, "hasBroader", null, null)) {
429 relationItems.add(item);
432 if (relationItems.size() > 0) {
433 RelationResource relationResource = (RelationResource) getResourceMap().get(RelationClient.SERVICE_NAME);
435 for (RelationsCommonList.RelationListItem item : relationItems) {
436 String relationCsid = item.getCsid();
438 String subjectRefName = item.getSubject().getRefName();
439 String subjectDisplayName = RefNameUtils.getDisplayName(subjectRefName);
441 String objectRefName = item.getObject().getRefName();
442 String objectDisplayName = RefNameUtils.getDisplayName(objectRefName);
444 logger.debug("Deleting hasBroader relation " + relationCsid);
446 relationResource.deleteWithParentCtx(getServiceContext(), relationCsid);
448 userNotes.add("Deleted the \"has broader\" relation from " + subjectDisplayName + " to " + objectDisplayName + ".");
453 String serviceName = getAuthorityServiceNameForDocType(docType);
454 AuthorityResource<?, ?> resource = (AuthorityResource<?, ?>) getResourceMap().get(serviceName);
456 logger.debug("Soft deleting: docType=" + docType + " inAuthority=" + inAuthority + " csid=" + csid);
458 resource.updateItemWorkflowWithTransition(getServiceContext(), createUriInfo(), inAuthority, csid, "delete");
460 userNotes.add("Deleted the source record, " + displayName + ".");
464 InvocationResults results = new InvocationResults();
465 results.setNumAffected(numAffected);
466 results.setUserNote(StringUtils.join(userNotes, "\n"));
472 * @param Returns a map of the term groups in term group list, keyed by display name.
473 * If multiple groups have the same display name, an exception is thrown.
474 * @return The term groups.
476 private Map<String, Element> getTermGroups(Element termGroupListElement) {
477 Map<String, Element> termGroups = new LinkedHashMap<String, Element>();
478 Iterator<Element> childIterator = termGroupListElement.elementIterator();
480 while (childIterator.hasNext()) {
481 Element termGroupElement = childIterator.next();
482 String displayName = getDisplayName(termGroupElement);
484 if (termGroups.containsKey(displayName)) {
485 // Two term groups in the same item have identical display names.
487 throw new RuntimeException("multiple terms have display name \"" + displayName + "\"");
490 termGroups.put(displayName, termGroupElement);
497 private String getDisplayName(Element termGroupElement) {
498 Node displayNameNode = termGroupElement.selectSingleNode("termDisplayName");
499 String displayName = (displayNameNode == null) ? "" : displayNameNode.getText();
504 private Element getTermGroupListElement(PoxPayloadOut itemPayload) {
505 Element termGroupListElement = null;
506 Element commonPartElement = findCommonPartElement(itemPayload);
508 if (commonPartElement != null) {
509 termGroupListElement = findTermGroupListElement(commonPartElement);
512 return termGroupListElement;
515 private Element findCommonPartElement(PoxPayloadOut itemPayload) {
516 Element commonPartElement = null;
518 for (PayloadOutputPart candidatePart : itemPayload.getParts()) {
519 Element candidatePartElement = candidatePart.asElement();
521 if (candidatePartElement.getName().endsWith("_common")) {
522 commonPartElement = candidatePartElement;
527 return commonPartElement;
530 private Element findTermGroupListElement(Element contextElement) {
531 Element termGroupListElement = null;
532 Iterator<Element> childIterator = contextElement.elementIterator();
534 while (childIterator.hasNext()) {
535 Element candidateElement = childIterator.next();
537 if (candidateElement.getName().endsWith("TermGroupList")) {
538 termGroupListElement = candidateElement;
543 return termGroupListElement;
546 private void mergeTermGroupLists(Element targetTermGroupListElement, Element sourceTermGroupListElement) {
547 Map<String, Element> sourceTermGroups;
550 sourceTermGroups = getTermGroups(sourceTermGroupListElement);
552 catch(RuntimeException e) {
553 throw new RuntimeException("a problem was found in the source record: " + e.getMessage(), e);
556 for (Element targetTermGroupElement : (List<Element>) targetTermGroupListElement.elements()) {
557 String displayName = getDisplayName(targetTermGroupElement);
559 if (sourceTermGroups.containsKey(displayName)) {
560 logger.debug("Merging in existing term \"" + displayName + "\"");
563 mergeTermGroups(targetTermGroupElement, sourceTermGroups.get(displayName));
565 catch(RuntimeException e) {
566 throw new RuntimeException("could not merge term groups with display name \"" + displayName + "\": " + e.getMessage(), e);
569 sourceTermGroups.remove(displayName);
573 for (Element sourceTermGroupElement : sourceTermGroups.values()) {
574 logger.debug("Adding new term \"" + getDisplayName(sourceTermGroupElement) + "\"");
576 targetTermGroupListElement.add(sourceTermGroupElement.createCopy());
580 private void mergeTermGroups(Element targetTermGroupElement, Element sourceTermGroupElement) {
581 // This function assumes there are no nested repeating groups.
583 for (Element sourceChildElement : (List<Element>) sourceTermGroupElement.elements()) {
584 String sourceValue = sourceChildElement.getText();
586 if (sourceValue == null) {
590 if (sourceValue.length() > 0) {
591 String name = sourceChildElement.getName();
592 Element targetChildElement = targetTermGroupElement.element(name);
594 if (targetChildElement == null) {
595 targetTermGroupElement.add(sourceChildElement.createCopy());
598 String targetValue = targetChildElement.getText();
600 if (targetValue == null) {
604 if (!targetValue.equals(sourceValue)) {
605 if (targetValue.length() > 0) {
606 throw new RuntimeException("merge conflict in field " + name + ": source value \"" + sourceValue + "\" differs from target value \"" + targetValue +"\"");
609 targetTermGroupElement.remove(targetChildElement);
610 targetTermGroupElement.add(sourceChildElement.createCopy());
617 private String getUpdatePayload(Element originalTermGroupListElement, Element updatedTermGroupListElement) {
618 List<Element> parents = new ArrayList<Element>();
620 for (Element e = originalTermGroupListElement; e != null; e = e.getParent()) {
624 Collections.reverse(parents);
626 // Remove the original termGroupList element
627 parents.remove(parents.size() - 1);
630 Element rootElement = parents.remove(0);
632 // Copy the root to a new document
633 Document document = DocumentHelper.createDocument(copyElement(rootElement));
634 Element current = document.getRootElement();
636 // Copy the remaining parents
637 for (Element parent : parents) {
638 Element parentCopy = copyElement(parent);
640 current.add(parentCopy);
641 current = parentCopy;
644 // Add the updated termGroupList element
646 current.add(updatedTermGroupListElement);
648 String payload = document.asXML();
653 private Element copyElement(Element element) {
654 Element copy = DocumentHelper.createElement(element.getQName());
655 copy.appendAttributes(element);
660 private class ReferencingRecord {
662 private Map<String, Set<String>> fields;
664 public ReferencingRecord(String uri) {
666 this.fields = new HashMap<String, Set<String>>();
669 public String getUri() {
673 public void setUri(String uri) {
677 public Map<String, Set<String>> getFields() {