]> git.aero2k.de Git - tmp/jakarta-migration.git/commitdiff
DRYD-1249: UCB Structured Date Contrib (#380)
authorMichael Ritter <mikejritter@users.noreply.github.com>
Fri, 15 Dec 2023 02:58:57 +0000 (21:58 -0500)
committerGitHub <noreply@github.com>
Fri, 15 Dec 2023 02:58:57 +0000 (21:58 -0500)
Co-authored-by: Richard Millet <richard.millet@berkeley.edu>
services/structureddate/structureddate/src/main/antlr4/org/collectionspace/services/structureddate/antlr/StructuredDate.g4
services/structureddate/structureddate/src/main/java/org/collectionspace/services/structureddate/antlr/ANTLRStructuredDateEvaluator.java
services/structureddate/structureddate/src/test/java/org/collectionspace/services/structureddate/StructuredDateEvaluatorTest.java
services/structureddate/structureddate/src/test/resources/test-dates.yaml

index a4781ab44f2d70d3ecfec5a283430c081353b276..b5082c149074b9ed48ac216d052c22d9b2286639 100644 (file)
@@ -63,7 +63,7 @@ date:                  numDate
 |                      dayFirstDate
 |                      dayOrYearFirstDate
 |                      invStrDateEraLastDate
-|                      romanDate
+|                      romanStringDate
 ;
 
 month:                 monthYear
@@ -97,8 +97,7 @@ century:               ( strCentury | numCentury ) era? ;
 
 millennium:            nth MILLENNIUM era? ;
 
-partialEraRange:      num strMonth num era (DASH|HYPHEN) num strMonth num ;
-romanDate:             num (HYPHEN | SLASH) romanMonth (HYPHEN | SLASH) numYear era? ;
+partialEraRange:       num strMonth num era (DASH|HYPHEN) num strMonth num ;
 strDate:               strMonth ( numDayOfMonth | nth ) COMMA? numYear era?;
 invStrDate:            era num COMMA? strMonth num
 |                      era? num COMMA strMonth num ;
@@ -136,7 +135,9 @@ numMonth:              NUMBER ;
 numDayOfMonth:         NUMBER ;
 num:                   NUMBER ;
 unknownDate:           UNKNOWN ;
-romanMonth:            ROMANMONTH ; 
+romanMonth:            ROMANMONTH ;
+romanStringDate:  num ( DOT | DASH | HYPHEN ) ( MONTH | SHORTMONTH | ROMANMONTH ) ( DOT | DASH | HYPHEN ) num era?
+|                 ( MONTH | SHORTMONTH | ROMANMONTH ) ( DOT | DASH | HYPHEN ) num ( DOT | DASH | HYPHEN ) num era? ;
 
 /*
  * Lexer rules
index 6328de05e5d5dd2005a12b0ce13cdafdff51778f..efadc12eaf15410fff86848d3b03a2babe784436 100644 (file)
@@ -1,7 +1,5 @@
 package org.collectionspace.services.structureddate.antlr;
 
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import java.util.Stack;
 
 import org.antlr.v4.runtime.ANTLRInputStream;
@@ -35,9 +33,9 @@ import org.collectionspace.services.structureddate.DeferredQuarterCenturyEndDate
 import org.collectionspace.services.structureddate.DeferredQuarterCenturyStartDate;
 import org.collectionspace.services.structureddate.Era;
 import org.collectionspace.services.structureddate.Part;
-import org.collectionspace.services.structureddate.StructuredDateInternal;
 import org.collectionspace.services.structureddate.StructuredDateEvaluator;
 import org.collectionspace.services.structureddate.StructuredDateFormatException;
+import org.collectionspace.services.structureddate.StructuredDateInternal;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.AllOrPartOfContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.BeforeOrAfterDateContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.CenturyContext;
@@ -78,11 +76,11 @@ import org.collectionspace.services.structureddate.antlr.StructuredDateParser.Pa
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialDecadeContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialEraRangeContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.PartialYearContext;
-import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanDateContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterCenturyContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterInYearRangeContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.QuarterYearContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanMonthContext;
+import org.collectionspace.services.structureddate.antlr.StructuredDateParser.RomanStringDateContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.SeasonYearContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrCenturyContext;
 import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrDateContext;
@@ -183,7 +181,7 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp
                Date latestDate = (Date) stack.pop();
                Date earliestDate = (Date) stack.pop();
 
-               if (earliestDate.getYear() != null || earliestDate.getYear() != null) {
+               if (earliestDate.getYear() != null) {
                        int compareResult = DateUtils.compareDates(earliestDate, latestDate);
                        if (compareResult == 1) {
                                Date temp;
@@ -589,10 +587,10 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp
                stack.push(dayOfMonth);
 
                if (dayOfMonth > 31 || dayOfMonth <= 0) {
-                       throw new StructuredDateFormatException("unexpected day of month '" + Integer.toString(dayOfMonth) + "'");
+                       throw new StructuredDateFormatException("unexpected day of month '" + dayOfMonth + "'");
                }
                if (year == 0) {
-                       throw new StructuredDateFormatException("unexpected year '" + Integer.toString(year) + "'");
+                       throw new StructuredDateFormatException("unexpected year '" + year + "'");
                }
        }
 
@@ -1248,20 +1246,41 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp
        }
 
        @Override
-       public void exitRomanMonth(RomanMonthContext ctx) {
-               int num = DateUtils.romanToDecimal(ctx.ROMANMONTH().getText());
+       public void exitRomanStringDate(RomanStringDateContext ctx) {
+               if (ctx.exception != null) {
+                       return;
+               }
 
-               stack.push(num);
-       }
+               // Need to find out whether it is a MONTH, SHORTMONTH, ROMANMONTH here in order to disambiguate the order
+               // Find if there is a month or SHORTMONTH
+               Integer month = (ctx.MONTH() != null && ctx.SHORTMONTH() == null && ctx.ROMANMONTH() == null) ?
+                                               (Integer) DateUtils.getMonthByName(ctx.MONTH().getText()) :
+                                               null;
 
-       @Override
-       public void exitRomanDate(RomanDateContext ctx) {
-               if (ctx.exception != null) return;
+               // it was not a MONTH, so it will either be a SHORTMONTH or ROMANMONTH
+               if (month == null) {
+                       month = ctx.ROMANMONTH() == null ?
+                                       (Integer) DateUtils.getMonthByName(ctx.SHORTMONTH().getText()) :
+                                       (Integer) DateUtils.romanToDecimal(ctx.ROMANMONTH().getText());
+               }
 
+               // Expect the canonical year-month-day-era ordering
                Era era = (ctx.era() == null) ? null : (Era) stack.pop();
-               Integer year = (Integer) stack.pop();
-               Integer month = (Integer) stack.pop();
-               Integer day = (Integer) stack.pop();
+
+               Integer num1 = (Integer) stack.pop();
+               Integer num2 = (Integer) stack.pop();
+
+               Integer year = num1;
+               Integer day = num2;
+
+
+               if (DateUtils.isValidDate(num1, month, num2, era)) {
+                       // it is a valid year-month-day-era, proceed
+               } else if (DateUtils.isValidDate(num2, month, num1, era)) {
+                       // check if its day-month-year-era otherwise
+                       year = num2;
+                       day = num1;
+               }
 
                stack.push(year);
                stack.push(month);
@@ -1269,6 +1288,12 @@ public class ANTLRStructuredDateEvaluator extends StructuredDateBaseListener imp
                stack.push(era);
        }
 
+       @Override
+       public void exitRomanMonth(RomanMonthContext ctx) {
+               int num = DateUtils.romanToDecimal(ctx.ROMANMONTH().getText());
+               stack.push(num);
+       }
+
        @Override
        public void exitUnknownDate(UnknownDateContext ctx) {
                if (ctx.exception != null) return;
index 1469d9124634b75d81f62caa6f36f199c235d978..9a14b1f3095fa048b65efd87477577a90fc59e44 100644 (file)
@@ -47,6 +47,7 @@ public class StructuredDateEvaluatorTest {
                        }
 
                        Assert.assertEquals(actualStructuredDate, expectedStructuredDate);
+                       logger.debug("{} was successfully parsed.", displayDate);
                }
        }
 
index 6b58ea5608f7a952844f3d18192df9466a2c95c5..4e11dd38c021b165e36750c29cda7a5c568a2056 100644 (file)
   ' ([1997 }]] )':                       # enclosing mixed brackets
                                          earliestSingleDate: [1997,  1,  1, CE]
                                          latestDate:         [1997, 12, 31, CE]
+  '2021.sep.03':
+                                         earliestSingleDate: [2021,  9, 3, CE]
 
+  '2021-oct-4':
+                                         earliestSingleDate: [2021,  10, 4, CE]
+
+  '2021-november-5':
+                                         earliestSingleDate: [2021,  11, 5, CE]
+
+  '2021.december.6':
+                                         earliestSingleDate: [2021,  12, 6, CE]
+
+  '1908-IX-01':
+                                         earliestSingleDate: [1908,  9, 1, CE]
+
+  'IX-01-1908':
+                                         earliestSingleDate: [1908,  9, 1, CE]
+
+  '01-IX-1908':
+                                         earliestSingleDate: [1908,  9, 1, CE]
+
+  '1908.IX.01':
+                                         earliestSingleDate: [1908,  9, 1, CE]
+
+  'IX.01.1908':
+                                         earliestSingleDate: [1908,  9, 1, CE]
+
+  '01.IX.1908':
+                                         earliestSingleDate: [1908,  9, 1, CE]
 
 # -------------------------------------------------------------------------------------------------------
 # Invalid dates