| certainDate
| beforeOrAfterDate
| unknownDate
+| uncalibratedDate
;
uncertainDate: CIRCA certainDate ;
beforeOrAfterDate: ( BEFORE | AFTER ) singleInterval ;
+uncalibratedDate: numYear PLUSMINUS num YEARSSTRING? BP ;
+
hyphenatedRange: singleInterval ( HYPHEN | DASH ) singleInterval
| nthCenturyRange
| monthInYearRange
/*
* Lexer rules
*/
-
WS: [ \t\r\n]+ -> skip;
+PLUSMINUS: '±' | '+/-' ;
CIRCA: ('c' | 'ca') DOT? | 'circa' ;
SPRING: 'spring' | 'spr' ;
SUMMER: 'summer' | 'sum' ;
MONTH: 'january' | 'february' | 'march' | 'april' | 'may' | 'june' | 'july' | 'august' | 'september' | 'october' | 'november' | 'december' ;
SHORTMONTH: 'jan' | 'feb' | 'mar' | 'apr' | 'jun' | 'jul' | 'aug' | 'sep' | 'sept' | 'oct' | 'nov' | 'dec' ;
BC: 'bc' | 'bce' | 'b.c.' | 'b.c.e.' ;
-AD: 'ad' | 'a.d.' | 'ce' | 'c.e.';
+AD: 'ad' | 'a.d.' | 'ce' | 'c.e.' ;
+BP: 'bp' | 'b.p.' | 'b.p' ;
NTHSTR: [0-9]*? ([0456789] 'th' | '1st' | '2nd' | '3rd' | '11th' | '12th' | '13th') ;
HUNDREDS: [0-9]*? '00' '\''? 's';
TENS: [0-9]*? '0' '\''? 's';
-NUMBER: [0-9]+ ;
+NUMBER: ([0-9,]+)*[0-9] ;
COMMA: ',' ;
HYPHEN: '-' ;
DASH: [—–] ; /* EM DASH, EN DASH */
QUESTION: '?' ;
OTHER: . ;
UNKNOWN: 'unknown' ;
+YEARSSTRING: 'years' | 'year' ;
STRING: [a-z]+ ;
return currentDate;
}
- MutableDateTime currentDateTime = convertToDateTime(currentDate);
- MutableDateTime endDateTime = convertToDateTime(endDate);
-
- int comparisonResult = currentDateTime.compareTo(endDateTime);
+ int comparisonResult = compareDates(currentDate, endDate);
if (comparisonResult == 1 || comparisonResult == 0) {
return currentDate;
}
return null;
}
+ /**
+ * Wrapper function for MutableDateTime's comparator.
+ * @param startDate The first date in the range
+ * @param endDate The last date in the range
+ * @return -1 if startDate is before, 0 if they are equal, 1 if startDate is after endDate
+ */
+ public static int compareDates(Date startDate, Date endDate) {
+ if (startDate.getYear() == null || endDate.getYear() == null) {
+ throw new IllegalArgumentException("Must provide a start and end date to compare.");
+ }
+
+ MutableDateTime startDateTime = convertToDateTime(startDate);
+ MutableDateTime endDateTime = convertToDateTime(endDate);
+
+ return startDateTime.compareTo(endDateTime);
+ }
+
+ /**
+ * Returns a Date object based on the local date.
+ */
public static Date getCurrentDate() {
LocalDate localDate = new LocalDate();
Integer year = (Integer) localDate.getYear();
if (era == null) {
era = Date.DEFAULT_ERA;
}
+
+ if (era == Era.BCE) {
+ // Improved precision for BC dates
+ int interval = 0;
+
+ if (year % 1000 == 0) {
+ interval = 500;
+ } else if (year % 100 == 0) {
+ interval = 50;
+ } else if (year % 10 == 0) {
+ interval = 10;
+ } else if (year % 10 > 0 && year % 10 < 10) {
+ interval = 5;
+ }
+ return interval;
+ }
MutableDateTime dateTime = new MutableDateTime(chronology);
dateTime.era().set((era == Era.BCE) ? DateTimeConstants.BC : DateTimeConstants.AD);
int years = Years.yearsBetween(dateTime, circaBaseDateTime).getYears();
+
+ // return interval;
return ((int) Math.round(years * 0.05));
}
package org.collectionspace.services.structureddate.antlr;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.util.Stack;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrMonthContext;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrSeasonContext;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.StrSeasonInYearRangeContext;
+import org.collectionspace.services.structureddate.antlr.StructuredDateParser.UncalibratedDateContext;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.UncertainDateContext;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.UnknownDateContext;
import org.collectionspace.services.structureddate.antlr.StructuredDateParser.YearContext;
result = new StructuredDateInternal();
result.setDisplayDate(displayDate);
- // Instantiate a parser from the lowercased display date, so that parsing will be
- // case insensitive.
+ // Instantiate a parser from the lowercased display date, so that parsing will be case insensitive
ANTLRInputStream inputStream = new ANTLRInputStream(displayDate.toLowerCase());
StructuredDateLexer lexer = new StructuredDateLexer(inputStream);
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
Date latestDate = (Date) stack.pop();
Date earliestDate = (Date) stack.pop();
+ if (earliestDate.getYear() != null || earliestDate.getYear() != null) {
+ int compareResult = DateUtils.compareDates(earliestDate, latestDate);
+ if (compareResult == 1) {
+ Date temp;
+ temp = earliestDate;
+ earliestDate = latestDate;
+ latestDate = temp;
+
+ // Check to see if the dates were reversed AND calculated. If they were
+ // Then this probably means the absolute earliestDate should have month and day as "1"
+ // and the latestDate momth 12, day 31.
+ if ((earliestDate.getMonth() == 12 && earliestDate.getDay() == 31) &&
+ (latestDate.getMonth() == 1 && latestDate.getDay() == 1)) {
+ earliestDate.setMonth(1);
+ earliestDate.setDay(1);
+ latestDate.setMonth(12);
+ latestDate.setDay(31);
+ }
+ }
+ }
+
// If the earliest date and the latest date are the same, it's just a "single" date.
// There's no need to have the latest, so set it to null.
Date latestDate = (Date) stack.pop();
Date earliestDate = (Date) stack.pop();
+
int earliestInterval = DateUtils.getCircaIntervalYears(earliestDate.getYear(), earliestDate.getEra());
int latestInterval = DateUtils.getCircaIntervalYears(latestDate.getYear(), latestDate.getEra());
- // Express the circa interval as a qualifier.
-
- // stack.push(earliestDate.withQualifier(QualifierType.MINUS, earliestInterval, QualifierUnit.YEARS));
- // stack.push(latestDate.withQualifier(QualifierType.PLUS, latestInterval, QualifierUnit.YEARS));
+ // Express the circa interval as a qualifier.
- // OR:
+ // stack.push(earliestDate.withQualifier(QualifierType.MINUS, earliestInterval, QualifierUnit.YEARS));
+ // stack.push(latestDate.withQualifier(QualifierType.PLUS, latestInterval, QualifierUnit.YEARS));
+ // OR:
+
// Express the circa interval as an offset calculated into the year.
DateUtils.subtractYears(earliestDate, earliestInterval);
// Convert the string to a number,
// and push on the stack.
- Integer year = new Integer(ctx.NUMBER().getText());
+ Integer year = new Integer(ctx.getText().replaceAll(",", ""));
if (year == 0) {
throw new StructuredDateFormatException("unexpected year '" + ctx.NUMBER().getText() + "'");
// Convert the numeric string to an Integer,
// and push on the stack.
- Integer num = new Integer(ctx.NUMBER().getText());
+ Integer num = new Integer(ctx.getText().replaceAll(",", ""));
stack.push(num);
}
stack.push(new Date());
}
+ public void exitUncalibratedDate(UncalibratedDateContext ctx) {
+ if (ctx.exception != null) return;
+
+ Integer adjustmentDate = (Integer) stack.pop();
+ Integer mainYear = (Integer) stack.pop();
+
+ Integer upperBound = mainYear + adjustmentDate;
+ Integer lowerBound = mainYear - adjustmentDate;
+
+ Integer currentYear = DateUtils.getCurrentDate().getYear();
+
+ Integer earliestYear = currentYear - upperBound;
+ Integer latestYear = currentYear - lowerBound ;
+
+ // If negative, then BC, else AD
+ Era earliestEra = earliestYear < 0 ? Era.BCE : Era.CE;
+ Era latestEra = latestYear < 0 ? Era.BCE : Era.CE;
+
+ stack.push(new Date(Math.abs(earliestYear), 1, 1, earliestEra)); // Earliest Early Date
+ stack.push(new Date(Math.abs(latestYear), 12, DateUtils.getDaysInMonth(12, Math.abs(latestYear), latestEra), latestEra)); // Latest Late Date
+
+ }
+
protected String getErrorMessage(RecognitionException re) {
String message = "";
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Stack;
import org.apache.commons.beanutils.PropertyUtils;
import org.slf4j.Logger;
if (structuredDateFields != null && structuredDateFields.containsKey("latestDate")) {
Object latestDate = structuredDateFields.get("latestDate");
if (latestDate instanceof String) {
+ Date currentDate = DateUtils.getCurrentDate();
+ ArrayList latestDateItems = new ArrayList<>();
if (latestDate.equals("current date")) {
- ArrayList items = new ArrayList<>();
- Date currentDate = DateUtils.getCurrentDate();
- items.add(currentDate.getYear());
- items.add(currentDate.getMonth());
- items.add(currentDate.getDay());
- items.add(currentDate.getEra() == Era.BCE ? "BCE" : "CE");
- structuredDateFields.put("latestDate", items);
+ latestDateItems.add(currentDate.getYear());
+ latestDateItems.add(currentDate.getMonth());
+ latestDateItems.add(currentDate.getDay());
+ latestDateItems.add(currentDate.getEra() == Era.BCE ? "BCE" : "CE");
+ structuredDateFields.put("latestDate", latestDateItems);
+ }
+ if (latestDate.equals("uncalibrated latest date")) {
+ Stack<ArrayList> results = calculateUncalibratedDate(displayDate, currentDate.getYear());
+ structuredDateFields.put("latestDate", results.pop());
+ structuredDateFields.put("earliestSingleDate", results.pop());
}
}
}
return structuredDate;
}
+
+ /**
+ * Calculates the uncalibrated date, since the yalm expected dates need to be dynamic
+ * as they will change from year to year.
+ * @param displayDate The current test's display date
+ * @param currentYear The current year
+ *
+ * @return a stack consisting of two ArrayLists, each containing the expected dates
+ */
+ public Stack<ArrayList> calculateUncalibratedDate(String displayDate, Integer currentYear) {
+ Stack<ArrayList> stack = new Stack<ArrayList>();
+ ArrayList latestDate = new ArrayList<>();
+ ArrayList earliestDate = new ArrayList<>();
+
+
+ String reg = "±|\\+/-";
+ String[] splitDateTokens = displayDate.split(reg);
+ String[] tokensPartTwo = splitDateTokens[1].split(" ");
+
+ Integer mainYear = Integer.parseInt(splitDateTokens[0].replaceAll("\\s|,", ""));
+ Integer offset;
+
+ try {
+ offset = Integer.parseInt(tokensPartTwo[0]);
+ } catch (Exception e) {
+ offset = Integer.parseInt(tokensPartTwo[1].replaceAll("\\s|,", ""));
+ }
+
+ Integer earliestYear = currentYear - (mainYear + offset);
+ Integer latestYear = currentYear - (mainYear - offset);
+
+ String earliestEra = earliestYear < 0 ? "BCE" : "CE";
+ String latestEra = latestYear < 0 ? "BCE" : "CE";
+
+ earliestYear = Math.abs(earliestYear);
+ latestYear = Math.abs(latestYear);
+
+ latestDate.add(latestYear);
+ latestDate.add(12);
+ latestDate.add(DateUtils.getDaysInMonth(12, latestYear, null));
+ latestDate.add(latestEra);
+
+ earliestDate.add(earliestYear);
+ earliestDate.add(1);
+ earliestDate.add(1);
+ earliestDate.add(earliestEra);
+
+ stack.push(earliestDate);
+ stack.push(latestDate);
+
+ return stack;
+ }
+
private Date createDateFromYamlSpec(List<Object> dateFields) {
Date date = new Date();
Iterator<Object> fieldIterator = dateFields.iterator();
latestDate: [2013, 4, 5, CE]
'5/3/1962-4/5/2013 BC': # hyphenatedRange, date
- earliestSingleDate: [1962, 5, 3, BCE]
- latestDate: [2013, 4, 5, BCE]
+ earliestSingleDate: [2013, 4, 5, BCE]
+ latestDate: [1962, 5, 3, BCE]
'5/3/1962 BC-4/5/2013': # hyphenatedRange, date
earliestSingleDate: [1962, 5, 3, BCE]
# latestDate: [ 10, 12, 31, BCE, null, PLUS, 106, YEARS]
'Circa 10 BC': # uncertainDate, year - calculating the uncertainty into the year field
- earliestSingleDate: [ 115, 1, 1, BCE]
- latestDate: [ 96, 12, 31, CE]
+ earliestSingleDate: [ 20, 1, 1, BCE]
+ latestDate: [ 1, 12, 31, CE]
# 'Circa 10': # uncertainDate, year - using qualifier/value/unit fields
# earliestSingleDate: [ 10, 1, 1, CE, null, MINUS, 105, YEARS]
"13th april, 1995": # oneDisplayDate - singleInterval - dayFirstDate - Day (ordinal) Month Year
earliestSingleDate: [1995, 4, 13, CE]
- "13th april, 1995 - 5th may 1999": # oneDisplayDate - hyphenatedRange - dayFirstDate - Day (ordinal) Month Year
+ "13th april, 1995 - 5th may 1999": # oneDisplayDate - hyphenatedRange - dayFirstDate - Day (ordinal) Month Year
earliestSingleDate: [1995, 4, 13, CE]
latestDate: [1999, 5, 5, CE]
- "13 april 15": # oneDisplayDate - ambigous day and year - should be Year month day
+ "13 april 15": # oneDisplayDate - ambigous day and year - should be Year month day
earliestSingleDate: [13, 4, 15, CE]
"before 13 april 1995": # beforeAfterDate - Empty earliestSingleDate - Day Month Year Format
latestDate: [2017, 6, 10, CE]
- "after 13 april 1995": # beforeAfterDate - Empty latestDate calculated as current date - Day Month Year Format
+ "after 13 april 1995": # beforeAfterDate - Empty latestDate calculated as current date - Day Month Year Format
earliestSingleDate: [1995, 4, 13, CE]
latestDate: "current date"
- "after april 13 1995": # beforeAfterDate - Empty latestDate calculated as current date - Month Day Year Format
+ "after april 13 1995": # beforeAfterDate - Empty latestDate calculated as current date - Month Day Year Format
earliestSingleDate: [1995, 4, 13, CE]
latestDate: "current date"
- "10/2005-12/2006": # Month/Year - Month/Year date
+ "10/2005-12/2006": # Month/Year - Month/Year date
earliestSingleDate: [2005, 10, 1, CE]
latestDate: [2006, 12, 31, CE]
- "04/1995-04/2018": # Month/Year - Month/Year date
+ "04/1995-04/2018": # Month/Year - Month/Year date
earliestSingleDate: [1995, 4, 1, CE]
latestDate: [2018, 4, 30, CE]
- "unknown": # Unknown date: Should result in empty fields
+ "unknown": # Unknown date: Should result in empty fields
earliestSingleDate: []
- "13 april 15": # oneDisplayDate - ambiguous day and year, intepreted as year month day
+ "13 april 15": # oneDisplayDate - ambiguous day and year, intepreted as year month day
earliestSingleDate: [13, 4, 15, CE]
- "04/5-6/2018": # Month/Day - Day/Year date
+ "04/5-6/2018": # Month/Day - Day/Year date
earliestSingleDate: [2018, 4, 5, CE]
latestDate: [2018, 4, 6, CE]
- "04/03-07/09": # Ambigious NumDayInMonthRange - should be interpreted as Month/Day - Day/Year date
+ "04/03-07/09": # Ambigious NumDayInMonthRange - should be interpreted as Month/Day - Day/Year date
earliestSingleDate: [9, 4, 3, CE]
latestDate: [9, 4, 7, CE]
'04/1996-07/09': # Semi-ambigious NumDayInMonthRange - should be interpreted as Month/Year - Month/Year date
- earliestSingleDate: [1996, 4, 1, CE]
- latestDate: [9, 7, 31, CE]
+ earliestSingleDate: [9, 7, 31, CE]
+ latestDate: [1996, 4, 1, CE]
+
+ "1200±50 BP": # Uncalibrated date with ± symbol, with CE
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "3100 +/- 150 BP": # Uncalibrated date with +/- instead of ± symbol
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "3100+/-150 BP": # Uncalibrated date with +/- instead of ± symbol, no spaces
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "3100+/-150 years BP": # Uncalibrated date with 'years' in it
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "3,100+/-150 years BP": # Uncalibrated date with 'years' in it as well as with a comma
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "2000±100 BP": # Uncalibrated date with BCE and AD mix
+ earliestSingleDate: "uncalibrated earliest date"
+ latestDate: "uncalibrated latest date"
+
+ "5580-5460 BC": # Calibrated date with commas
+ earliestSingleDate: [5580, 1, 1, BCE]
+ latestDate: [5460, 12, 31, BCE]
+
+ "5,580 - 5,460 BC": # Calibrated date with commas and spaces
+ earliestSingleDate: [5580, 1, 1, BCE]
+ latestDate: [5460, 12, 31, BCE]
+
+ "5460-5580 BC": # Calibrated date with dates reversed
+ earliestSingleDate: [5580, 1, 1, BCE]
+ latestDate: [5460, 12, 31, BCE]
+
+ "c. 69 BC": # Circa date, ± 10 years
+ earliestSingleDate: [74, 1, 1, BCE]
+ latestDate: [64, 12, 31, BCE]
+
+ "ca. 60 BC": # Circa date, ± 5 years
+ earliestSingleDate: [70, 1, 1, BCE]
+ latestDate: [50, 12, 31, BCE]
+
+ "circa 200 BC": # Circa date, ± 50 years
+ earliestSingleDate: [250, 1, 1, BCE]
+ latestDate: [150, 12, 31, BCE]
+
+ "circa 1000 BC": # Circa date, ± 500 years
+ earliestSingleDate: [1500, 1, 1, BCE]
+ latestDate: [500, 12, 31, BCE]
+
+ '5/13/54,962 BC-4/5/2,019': # hyphenatedRange, date with comma'd numbers
+ earliestSingleDate: [54962, 5, 13, BCE]
+ latestDate: [2019, 4, 5, CE]
# -------------------------------------------------------------------------------------------------------
# Invalid dates
# -------------------------------------------------------------------------------------------------------