From 09240b54c2b26cc12ca3b3243dd10b6cd1804ae4 Mon Sep 17 00:00:00 2001 From: Michael Ritter Date: Thu, 14 Dec 2023 21:58:57 -0500 Subject: [PATCH] DRYD-1249: UCB Structured Date Contrib (#380) Co-authored-by: Richard Millet --- .../structureddate/antlr/StructuredDate.g4 | 9 +-- .../antlr/ANTLRStructuredDateEvaluator.java | 59 +++++++++++++------ .../StructuredDateEvaluatorTest.java | 1 + .../src/test/resources/test-dates.yaml | 28 +++++++++ 4 files changed, 76 insertions(+), 21 deletions(-) diff --git a/services/structureddate/structureddate/src/main/antlr4/org/collectionspace/services/structureddate/antlr/StructuredDate.g4 b/services/structureddate/structureddate/src/main/antlr4/org/collectionspace/services/structureddate/antlr/StructuredDate.g4 index a4781ab44..b5082c149 100644 --- a/services/structureddate/structureddate/src/main/antlr4/org/collectionspace/services/structureddate/antlr/StructuredDate.g4 +++ b/services/structureddate/structureddate/src/main/antlr4/org/collectionspace/services/structureddate/antlr/StructuredDate.g4 @@ -63,7 +63,7 @@ date: numDate | dayFirstDate | dayOrYearFirstDate | invStrDateEraLastDate -| romanDate +| romanStringDate ; month: monthYear @@ -97,8 +97,7 @@ century: ( strCentury | numCentury ) era? ; millennium: nth MILLENNIUM era? ; -partialEraRange: num strMonth num era (DASH|HYPHEN) num strMonth num ; -romanDate: num (HYPHEN | SLASH) romanMonth (HYPHEN | SLASH) numYear era? ; +partialEraRange: num strMonth num era (DASH|HYPHEN) num strMonth num ; strDate: strMonth ( numDayOfMonth | nth ) COMMA? numYear era?; invStrDate: era num COMMA? strMonth num | era? num COMMA strMonth num ; @@ -136,7 +135,9 @@ numMonth: NUMBER ; numDayOfMonth: NUMBER ; num: NUMBER ; unknownDate: UNKNOWN ; -romanMonth: ROMANMONTH ; +romanMonth: ROMANMONTH ; +romanStringDate: num ( DOT | DASH | HYPHEN ) ( MONTH | SHORTMONTH | ROMANMONTH ) ( DOT | DASH | HYPHEN ) num era? +| ( MONTH | SHORTMONTH | ROMANMONTH ) ( DOT | DASH | HYPHEN ) num ( DOT | DASH | HYPHEN ) num era? ; /* * Lexer rules diff --git a/services/structureddate/structureddate/src/main/java/org/collectionspace/services/structureddate/antlr/ANTLRStructuredDateEvaluator.java b/services/structureddate/structureddate/src/main/java/org/collectionspace/services/structureddate/antlr/ANTLRStructuredDateEvaluator.java index 6328de05e..efadc12ea 100644 --- a/services/structureddate/structureddate/src/main/java/org/collectionspace/services/structureddate/antlr/ANTLRStructuredDateEvaluator.java +++ b/services/structureddate/structureddate/src/main/java/org/collectionspace/services/structureddate/antlr/ANTLRStructuredDateEvaluator.java @@ -1,7 +1,5 @@ package org.collectionspace.services.structureddate.antlr; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.Stack; import org.antlr.v4.runtime.ANTLRInputStream; @@ -35,9 +33,9 @@ import org.collectionspace.services.structureddate.DeferredQuarterCenturyEndDate import org.collectionspace.services.structureddate.DeferredQuarterCenturyStartDate; import org.collectionspace.services.structureddate.Era; import org.collectionspace.services.structureddate.Part; -import org.collectionspace.services.structureddate.StructuredDateInternal; import org.collectionspace.services.structureddate.StructuredDateEvaluator; import org.collectionspace.services.structureddate.StructuredDateFormatException; +import org.collectionspace.services.structureddate.StructuredDateInternal; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.AllOrPartOfContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.BeforeOrAfterDateContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.CenturyContext; @@ -78,11 +76,11 @@ import org.collectionspace.services.structureddate.antlr.StructuredDateParser.Pa import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialDecadeContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialEraRangeContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialYearContext; -import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanDateContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterCenturyContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterInYearRangeContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterYearContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanMonthContext; +import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanStringDateContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.SeasonYearContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrCenturyContext; import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrDateContext; @@ -183,7 +181,7 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp Date latestDate = (Date) stack.pop(); Date earliestDate = (Date) stack.pop(); - if (earliestDate.getYear() != null || earliestDate.getYear() != null) { + if (earliestDate.getYear() != null) { int compareResult = DateUtils.compareDates(earliestDate, latestDate); if (compareResult == 1) { Date temp; @@ -589,10 +587,10 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp stack.push(dayOfMonth); if (dayOfMonth > 31 || dayOfMonth <= 0) { - throw new StructuredDateFormatException("unexpected day of month '" + Integer.toString(dayOfMonth) + "'"); + throw new StructuredDateFormatException("unexpected day of month '" + dayOfMonth + "'"); } if (year == 0) { - throw new StructuredDateFormatException("unexpected year '" + Integer.toString(year) + "'"); + throw new StructuredDateFormatException("unexpected year '" + year + "'"); } } @@ -1248,20 +1246,41 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp } @Override - public void exitRomanMonth(RomanMonthContext ctx) { - int num = DateUtils.romanToDecimal(ctx.ROMANMONTH().getText()); + public void exitRomanStringDate(RomanStringDateContext ctx) { + if (ctx.exception != null) { + return; + } - stack.push(num); - } + // Need to find out whether it is a MONTH, SHORTMONTH, ROMANMONTH here in order to disambiguate the order + // Find if there is a month or SHORTMONTH + Integer month = (ctx.MONTH() != null && ctx.SHORTMONTH() == null && ctx.ROMANMONTH() == null) ? + (Integer) DateUtils.getMonthByName(ctx.MONTH().getText()) : + null; - @Override - public void exitRomanDate(RomanDateContext ctx) { - if (ctx.exception != null) return; + // it was not a MONTH, so it will either be a SHORTMONTH or ROMANMONTH + if (month == null) { + month = ctx.ROMANMONTH() == null ? + (Integer) DateUtils.getMonthByName(ctx.SHORTMONTH().getText()) : + (Integer) DateUtils.romanToDecimal(ctx.ROMANMONTH().getText()); + } + // Expect the canonical year-month-day-era ordering Era era = (ctx.era() == null) ? null : (Era) stack.pop(); - Integer year = (Integer) stack.pop(); - Integer month = (Integer) stack.pop(); - Integer day = (Integer) stack.pop(); + + Integer num1 = (Integer) stack.pop(); + Integer num2 = (Integer) stack.pop(); + + Integer year = num1; + Integer day = num2; + + + if (DateUtils.isValidDate(num1, month, num2, era)) { + // it is a valid year-month-day-era, proceed + } else if (DateUtils.isValidDate(num2, month, num1, era)) { + // check if its day-month-year-era otherwise + year = num2; + day = num1; + } stack.push(year); stack.push(month); @@ -1269,6 +1288,12 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp stack.push(era); } + @Override + public void exitRomanMonth(RomanMonthContext ctx) { + int num = DateUtils.romanToDecimal(ctx.ROMANMONTH().getText()); + stack.push(num); + } + @Override public void exitUnknownDate(UnknownDateContext ctx) { if (ctx.exception != null) return; diff --git a/services/structureddate/structureddate/src/test/java/org/collectionspace/services/structureddate/StructuredDateEvaluatorTest.java b/services/structureddate/structureddate/src/test/java/org/collectionspace/services/structureddate/StructuredDateEvaluatorTest.java index 1469d9124..9a14b1f30 100644 --- a/services/structureddate/structureddate/src/test/java/org/collectionspace/services/structureddate/StructuredDateEvaluatorTest.java +++ b/services/structureddate/structureddate/src/test/java/org/collectionspace/services/structureddate/StructuredDateEvaluatorTest.java @@ -47,6 +47,7 @@ public class StructuredDateEvaluatorTest { } Assert.assertEquals(actualStructuredDate, expectedStructuredDate); + logger.debug("{} was successfully parsed.", displayDate); } } diff --git a/services/structureddate/structureddate/src/test/resources/test-dates.yaml b/services/structureddate/structureddate/src/test/resources/test-dates.yaml index 6b58ea560..4e11dd38c 100644 --- a/services/structureddate/structureddate/src/test/resources/test-dates.yaml +++ b/services/structureddate/structureddate/src/test/resources/test-dates.yaml @@ -1296,7 +1296,35 @@ ' ([1997 }]] )': # enclosing mixed brackets earliestSingleDate: [1997, 1, 1, CE] latestDate: [1997, 12, 31, CE] + '2021.sep.03': + earliestSingleDate: [2021, 9, 3, CE] + '2021-oct-4': + earliestSingleDate: [2021, 10, 4, CE] + + '2021-november-5': + earliestSingleDate: [2021, 11, 5, CE] + + '2021.december.6': + earliestSingleDate: [2021, 12, 6, CE] + + '1908-IX-01': + earliestSingleDate: [1908, 9, 1, CE] + + 'IX-01-1908': + earliestSingleDate: [1908, 9, 1, CE] + + '01-IX-1908': + earliestSingleDate: [1908, 9, 1, CE] + + '1908.IX.01': + earliestSingleDate: [1908, 9, 1, CE] + + 'IX.01.1908': + earliestSingleDate: [1908, 9, 1, CE] + + '01.IX.1908': + earliestSingleDate: [1908, 9, 1, CE] # ------------------------------------------------------------------------------------------------------- # Invalid dates -- 2.47.3