You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ku...@apache.org on 2019/08/09 08:53:21 UTC
[hive] branch master updated: HIVE-21579: Introduce more complex
SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
This is an automated email from the ASF dual-hosted git repository.
kuczoram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1729d77 HIVE-21579: Introduce more complex SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
1729d77 is described below
commit 1729d77b5e87d0bf5fae840ef670dd42999cff3b
Author: Karen Coppage <kc...@gmail.com>
AuthorDate: Fri Aug 9 10:52:34 2019 +0200
HIVE-21579: Introduce more complex SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
---
.../format/datetime/HiveSqlDateTimeFormatter.java | 287 +++++++++++++++++++--
.../datetime/TestHiveSqlDateTimeFormatter.java | 70 ++++-
2 files changed, 335 insertions(+), 22 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
index 102647e..9443e8e 100644
--- a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
+++ b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -21,22 +21,32 @@ package org.apache.hadoop.hive.common.format.datetime;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.WordUtils;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.Timestamp;
import java.io.Serializable;
import java.time.DateTimeException;
+import java.time.DayOfWeek;
import java.time.Instant;
import java.time.LocalDateTime;
+import java.time.Month;
import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.TextStyle;
import java.time.temporal.ChronoField;
import java.time.temporal.ChronoUnit;
+import java.time.temporal.IsoFields;
import java.time.temporal.TemporalField;
import java.time.temporal.TemporalUnit;
+import java.time.temporal.WeekFields;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* Formatter using SQL:2016 datetime patterns.
@@ -107,7 +117,7 @@ import java.util.Map;
*
* MM
* Month (1-12)
- * - For string to datetime conversion, conflicts with DDD.
+ * - For string to datetime conversion, conflicts with DDD, MONTH, MON.
*
* DD
* Day of month (1-31)
@@ -167,6 +177,82 @@ import java.util.Map;
* output=2019-01-01 23:00:00
* - If FX is enabled, input length has to match the pattern's length. e.g. pattern=AM input=A.M.
* is not accepted, but input=pm is.
+ * - Not listed as a character temporal because of special status: does not get padded with spaces
+ * upon formatting, and case is handled differently at datetime to string conversion.
+ *
+ * D
+ * Day of week (1-7)
+ * - 1 means Sunday, 2 means Monday, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * Q
+ * Quarter of year (1-4)
+ * - Not allowed in string to datetime conversion.
+ *
+ * WW
+ * Aligned week of year (1-53)
+ * - 1st week begins on January 1st and ends on January 7th, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * W
+ * Aligned week of month (1-5)
+ * - 1st week starts on the 1st of the month and ends on the 7th, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * A.2. Character temporals
+ * Temporal elements, but spelled out.
+ * - For datetime to string conversion, the pattern's case must match one of the listed formats
+ * (e.g. mOnTh is not accepted) to avoid ambiguity. Output is right padded with trailing spaces
+ * unless the pattern is marked with the fill mode modifier (FM).
+ * - For string to datetime conversion, the case of the pattern does not matter.
+ *
+ * MONTH|Month|month
+ * Name of month of year
+ * - For datetime to string conversion, will include trailing spaces up to length 9 (length of
+ * longest month of year name: "September"). Case is taken into account according to the
+ * following example (pattern => output):
+ * - MONTH => JANUARY
+ * - Month => January
+ * - month => january
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ * are taken into account.
+ * - For string to datetime conversion, conflicts with MM and MON.
+ *
+ *
+ * MON|Mon|mon
+ * Abbreviated name of month of year
+ * - For datetime to string conversion, case is taken into account according to the following
+ * example (pattern => output):
+ * - MON => JAN
+ * - Mon => Jan
+ * - mon => jan
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ * are taken into account.
+ * - For string to datetime conversion, conflicts with MM and MONTH.
+ *
+ *
+ * DAY|Day|day
+ * Name of day of week
+ * - For datetime to string conversion, will include trailing spaces until length is 9 (length of
+ * longest day of week name: "Wednesday"). Case is taken into account according to the following
+ * example (pattern => output):
+ * - DAY = SUNDAY
+ * - Day = Sunday
+ * - day = sunday
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ * are taken into account.
+ * - Not allowed in string to datetime conversion.
+ *
+ * DY|Dy|dy
+ * Abbreviated name of day of week
+ * - For datetime to string conversion, case is taken into account according to the following
+ * example (pattern => output):
+ * - DY = SUN
+ * - Dy = Sun
+ * - dy = sun
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ * are taken into account.
+ * - Not allowed in string to datetime conversion.
*
* B. Time zone tokens
* TZH
@@ -259,16 +345,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
private static final int NANOS_MAX_LENGTH = 9;
public static final int AM = 0;
public static final int PM = 1;
+ private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("MMM");
private String pattern;
private List<Token> tokens = new ArrayList<>();
private boolean formatExact = false;
- private static final Map<String, TemporalField> TEMPORAL_TOKENS =
+ private static final Map<String, TemporalField> NUMERIC_TEMPORAL_TOKENS =
ImmutableMap.<String, TemporalField>builder()
.put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
.put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
.put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
.put("mm", ChronoField.MONTH_OF_YEAR)
+ .put("d", WeekFields.SUNDAY_START.dayOfWeek())
.put("dd", ChronoField.DAY_OF_MONTH)
.put("ddd", ChronoField.DAY_OF_YEAR)
.put("hh", ChronoField.HOUR_OF_AMPM)
@@ -284,8 +372,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
.put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND)
.put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY)
.put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY)
+ .put("ww", ChronoField.ALIGNED_WEEK_OF_YEAR).put("w", ChronoField.ALIGNED_WEEK_OF_MONTH)
+ .put("q", IsoFields.QUARTER_OF_YEAR)
.build();
+ private static final Map<String, TemporalField> CHARACTER_TEMPORAL_TOKENS =
+ ImmutableMap.<String, TemporalField>builder()
+ .put("mon", ChronoField.MONTH_OF_YEAR)
+ .put("month", ChronoField.MONTH_OF_YEAR)
+ .put("day", ChronoField.DAY_OF_WEEK)
+ .put("dy", ChronoField.DAY_OF_WEEK)
+ .build();
+
private static final Map<String, TemporalUnit> TIME_ZONE_TOKENS =
ImmutableMap.<String, TemporalUnit>builder()
.put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
@@ -303,13 +401,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
.put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
.put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
.put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+ .put("month", 9).put("day", 9).put("dy", 3)
.build();
/**
* Represents broad categories of tokens.
*/
public enum TokenType {
- TEMPORAL,
+ NUMERIC_TEMPORAL,
+ CHARACTER_TEMPORAL,
SEPARATOR,
TIMEZONE,
ISO_8601_DELIMITER,
@@ -327,8 +427,9 @@ public class HiveSqlDateTimeFormatter implements Serializable {
int length; // length (e.g. YYY: 3, FF8: 8)
boolean fillMode; //FM, applies to type TEMPORAL only (later should apply to TIMEZONE as well)
- public Token(TemporalField temporalField, String string, int length, boolean fillMode) {
- this(TokenType.TEMPORAL, temporalField, null, string, length, fillMode);
+ public Token(TokenType tokenType, TemporalField temporalField, String string, int length,
+ boolean fillMode) {
+ this(tokenType, temporalField, null, string, length, fillMode);
}
public Token(TemporalUnit temporalUnit, String string, int length, boolean fillMode) {
@@ -429,12 +530,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
begin = end;
break;
}
- if (isTemporalToken(candidate)) {
+ if (isNumericTemporalToken(candidate)) {
lastAddedToken = parseTemporalToken(originalPattern, candidate, fillMode, begin);
fillMode = false;
begin = end;
break;
}
+ if (isCharacterTemporalToken(candidate)) {
+ lastAddedToken = parseCharacterTemporalToken(originalPattern, candidate, fillMode, begin);
+ fillMode = false;
+ begin = end;
+ break;
+ }
if (isTimeZoneToken(candidate)) {
lastAddedToken = parseTimeZoneToken(candidate, fillMode, begin);
begin = end;
@@ -468,8 +575,12 @@ public class HiveSqlDateTimeFormatter implements Serializable {
return candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate);
}
- private boolean isTemporalToken(String candidate) {
- return TEMPORAL_TOKENS.containsKey(candidate);
+ private boolean isNumericTemporalToken(String candidate) {
+ return NUMERIC_TEMPORAL_TOKENS.containsKey(candidate);
+ }
+
+ private boolean isCharacterTemporalToken(String candidate) {
+ return CHARACTER_TEMPORAL_TOKENS.containsKey(candidate);
}
private boolean isTimeZoneToken(String pattern) {
@@ -514,11 +625,24 @@ public class HiveSqlDateTimeFormatter implements Serializable {
private Token parseTemporalToken(String originalPattern, String candidate, boolean fillMode,
int begin) {
// for AM/PM, keep original case
- if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+ if (NUMERIC_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
int subStringEnd = begin + candidate.length();
candidate = originalPattern.substring(begin, subStringEnd);
}
- Token lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+ Token lastAddedToken = new Token(TokenType.NUMERIC_TEMPORAL,
+ NUMERIC_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+ getTokenStringLength(candidate), fillMode);
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private Token parseCharacterTemporalToken(String originalPattern, String candidate,
+ boolean fillMode, int begin) {
+ // keep original case
+ candidate = originalPattern.substring(begin, begin + candidate.length());
+
+ Token lastAddedToken = new Token(TokenType.CHARACTER_TEMPORAL,
+ CHARACTER_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
getTokenStringLength(candidate), fillMode);
tokens.add(lastAddedToken);
return lastAddedToken;
@@ -587,6 +711,24 @@ public class HiveSqlDateTimeFormatter implements Serializable {
timeZoneTemporalUnits.add(token.temporalUnit);
}
}
+
+ //check for illegal temporal fields
+ if (temporalFields.contains(IsoFields.QUARTER_OF_YEAR)) {
+ throw new IllegalArgumentException("Illegal field: q (" + IsoFields.QUARTER_OF_YEAR + ")");
+ }
+ if (temporalFields.contains(WeekFields.SUNDAY_START.dayOfWeek())) {
+ throw new IllegalArgumentException("Illegal field: d (" + WeekFields.SUNDAY_START.dayOfWeek() + ")");
+ }
+ if (temporalFields.contains(ChronoField.DAY_OF_WEEK)) {
+ throw new IllegalArgumentException("Illegal field: dy/day (" + ChronoField.DAY_OF_WEEK + ")");
+ }
+ if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_MONTH)) {
+ throw new IllegalArgumentException("Illegal field: w (" + ChronoField.ALIGNED_WEEK_OF_MONTH + ")");
+ }
+ if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_YEAR)) {
+ throw new IllegalArgumentException("Illegal field: ww (" + ChronoField.ALIGNED_WEEK_OF_YEAR + ")");
+ }
+
if (!(temporalFields.contains(ChronoField.YEAR))) {
throw new IllegalArgumentException("Missing year token.");
}
@@ -648,6 +790,14 @@ public class HiveSqlDateTimeFormatter implements Serializable {
throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ "timestamp or date.");
}
+ if (token.type == TokenType.CHARACTER_TEMPORAL) {
+ String s = token.string;
+ if (!(s.equals(s.toUpperCase()) || s.equals(capitalize(s)) || s.equals(s.toLowerCase()))) {
+ throw new IllegalArgumentException(
+ "Ambiguous capitalization of token " + s + ". Accepted " + "forms are " + s
+ .toUpperCase() + ", " + capitalize(s) + ", or " + s.toLowerCase() + ".");
+ }
+ }
}
}
@@ -659,10 +809,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
for (Token token : tokens) {
switch (token.type) {
- case TEMPORAL:
+ case NUMERIC_TEMPORAL:
+ case CHARACTER_TEMPORAL:
try {
value = localDateTime.get(token.temporalField);
- outputString = formatTemporal(value, token);
+ if (token.type == TokenType.NUMERIC_TEMPORAL) {
+ outputString = formatNumericTemporal(value, token);
+ } else {
+ outputString = formatCharacterTemporal(value, token);
+ }
} catch (DateTimeException e) {
throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from "
+ "LocalDateTime " + localDateTime, e);
@@ -690,7 +845,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
}
- private String formatTemporal(int value, Token token) {
+ private String formatNumericTemporal(int value, Token token) {
String output;
if (token.temporalField == ChronoField.AMPM_OF_DAY) {
output = value == 0 ? "a" : "p";
@@ -713,6 +868,34 @@ public class HiveSqlDateTimeFormatter implements Serializable {
return output;
}
+ private String formatCharacterTemporal(int value, Token token) {
+ String output = null;
+ if (token.temporalField == ChronoField.MONTH_OF_YEAR) {
+ output = Month.of(value).getDisplayName(TextStyle.FULL, Locale.US);
+ } else if (token.temporalField == ChronoField.DAY_OF_WEEK) {
+ output = DayOfWeek.of(value).getDisplayName(TextStyle.FULL, Locale.US);
+ }
+ if (output == null) {
+ throw new IllegalStateException("TemporalField: " + token.temporalField + " not valid for "
+ + "character formatting.");
+ }
+
+ // set length
+ if (output.length() > token.length) {
+ output = output.substring(0, token.length); // truncate to length
+ } else if (!token.fillMode && output.length() < token.length) {
+ output = StringUtils.rightPad(output, token.length); //pad to size
+ }
+
+ // set case
+ if (Character.isUpperCase(token.string.charAt(1))) {
+ output = output.toUpperCase();
+ } else if (Character.isLowerCase(token.string.charAt(0))) {
+ output = output.toLowerCase();
+ }
+ return output;
+ }
+
/**
* To match token.length, pad left with zeroes or truncate.
* Omit padding if fill mode (FM) modifier on.
@@ -754,9 +937,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
for (Token token : tokens) {
switch (token.type) {
- case TEMPORAL:
- substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy
- value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+ case NUMERIC_TEMPORAL:
+ case CHARACTER_TEMPORAL:
+ if (token.type == TokenType.NUMERIC_TEMPORAL) {
+ substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy
+ value = parseNumericTemporal(substring, token); // e.g. 18->2018
+ } else {
+ substring = getNextCharacterSubstring(fullInput, index, token); //e.g. Marcharch -> March
+ value = parseCharacterTemporal(substring, token); // e.g. July->07
+ }
try {
ldt = ldt.with(token.temporalField, value);
} catch (DateTimeException e){
@@ -844,9 +1033,13 @@ public class HiveSqlDateTimeFormatter implements Serializable {
return s;
}
}
- // next non-numeric character is a delimiter. Don't worry about AM/PM since we've already
- // handled that case.
- if ((token.type == TokenType.TEMPORAL || token.type == TokenType.TIMEZONE)
+ // if it's a character temporal, the first non-letter character is a delimiter
+ if (token.type == TokenType.CHARACTER_TEMPORAL && s.matches(".*[^A-Za-z].*")) {
+ s = s.split("[^A-Za-z]", 2)[0];
+
+ // if it's a numeric element, next non-numeric character is a delimiter. Don't worry about
+ // AM/PM since we've already handled that case.
+ } else if ((token.type == TokenType.NUMERIC_TEMPORAL || token.type == TokenType.TIMEZONE)
&& s.matches(".*\\D.*")) {
s = s.split("\\D", 2)[0];
}
@@ -857,7 +1050,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
/**
* Get the integer value of a temporal substring.
*/
- private int parseTemporal(String substring, Token token){
+ private int parseNumericTemporal(String substring, Token token){
checkFormatExact(substring, token);
// exceptions to the rule
@@ -898,6 +1091,53 @@ public class HiveSqlDateTimeFormatter implements Serializable {
}
}
+ private static final String MONTH_REGEX;
+ static {
+ StringBuilder sb = new StringBuilder();
+ String or = "";
+ for (Month month : Month.values()) {
+ sb.append(or).append(month);
+ or = "|";
+ }
+ MONTH_REGEX = sb.toString();
+ }
+
+ private String getNextCharacterSubstring(String fullInput, int index, Token token) {
+ int end = index + token.length;
+ if (end > fullInput.length()) {
+ end = fullInput.length();
+ }
+ String substring = fullInput.substring(index, end);
+ if (token.length == 3) { //dy, mon
+ return substring;
+ }
+
+ Matcher matcher = Pattern.compile(MONTH_REGEX, Pattern.CASE_INSENSITIVE).matcher(substring);
+ if (matcher.find()) {
+ return substring.substring(0, matcher.end());
+ }
+ throw new IllegalArgumentException(
+ "Couldn't find " + token.string + " in substring " + substring + " at index " + index);
+ }
+
+ private int parseCharacterTemporal(String substring, Token token) {
+ try {
+ if (token.temporalField == ChronoField.MONTH_OF_YEAR) {
+ if (token.length == 3) {
+ return Month.from(MONTH_FORMATTER.parse(capitalize(substring))).getValue();
+ } else {
+ return Month.valueOf(substring.toUpperCase()).getValue();
+ }
+ }
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ "Couldn't parse substring \"" + substring + "\" with token " + token + " to integer."
+ + "Pattern is " + pattern, e);
+ }
+ throw new IllegalArgumentException(
+ "token: (" + token + ") isn't a valid character temporal. Pattern is " + pattern);
+ }
+
/**
* @throws IllegalArgumentException if input length doesn't match expected (token) length
*/
@@ -986,7 +1226,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
pattern = pattern.toLowerCase();
return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit
- || isTemporalToken(pattern) && TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
+ || isNumericTemporalToken(pattern) && NUMERIC_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField
+ || isCharacterTemporalToken(pattern) && CHARACTER_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
}
public String getPattern() {
@@ -999,4 +1240,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
protected List<Token> getTokens() {
return new ArrayList<>(tokens);
}
+
+ private static String capitalize(String substring) {
+ return WordUtils.capitalize(substring.toLowerCase());
+ }
}
diff --git a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
index c0f9f7b..ff41534 100644
--- a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
+++ b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -91,9 +91,26 @@ public class TestHiveSqlDateTimeFormatter {
verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+ verifyBadPattern("yyyy mm-MON dd", true);
+ verifyBadPattern("yyyy mm-MONTH dd", true);
+ verifyBadPattern("yyyy MON, month dd", true);
verifyBadPattern("tzm", false);
verifyBadPattern("tzh", false);
+
+ //ambiguous case for formatting
+ verifyBadPattern("MOnth", false);
+ verifyBadPattern("DaY", false);
+ verifyBadPattern("dAy", false);
+ verifyBadPattern("dY", false);
+
+ //illegal for parsing
+ verifyBadPattern("yyyy-mm-dd q", true);
+ verifyBadPattern("yyyy-mm-dd d", true);
+ verifyBadPattern("yyyy-mm-dd dy", true);
+ verifyBadPattern("yyyy-mm-dd day", true);
+ verifyBadPattern("yyyy-mm-dd w", true);
+ verifyBadPattern("yyyy-mm-dd ww", true);
}
@Test
@@ -106,6 +123,28 @@ public class TestHiveSqlDateTimeFormatter {
checkFormatTs("HH12 P.M.", "2019-01-01 00:15:10", "12 A.M.");
checkFormatTs("HH12 AM", "2019-01-01 12:15:10", "12 PM");
checkFormatTs("YYYY-MM-DD HH12PM", "2017-05-05 00:00:00", "2017-05-05 12AM");
+
+ checkFormatTs("YYYY-MONTH-DD", "2019-01-01 00:00:00", "2019-JANUARY -01"); //fill to length 9
+ checkFormatTs("YYYY-Month-DD", "2019-01-01 00:00:00", "2019-January -01");
+ checkFormatTs("YYYY-month-DD", "2019-01-01 00:00:00", "2019-january -01");
+ checkFormatTs("YYYY-MON-DD", "2019-01-01 00:00:00", "2019-JAN-01");
+ checkFormatTs("YYYY-Mon-DD", "2019-01-01 00:00:00", "2019-Jan-01");
+ checkFormatTs("YYYY-mon-DD", "2019-01-01 00:00:00", "2019-jan-01");
+
+ checkFormatTs("D: DAY", "2019-01-01 00:00:00", "3: TUESDAY "); //fill to length 9
+ checkFormatTs("D: Day", "2019-01-02 00:00:00", "4: Wednesday");
+ checkFormatTs("D: day", "2019-01-03 00:00:00", "5: thursday ");
+ checkFormatTs("D: DY", "2019-01-04 00:00:00", "6: FRI");
+ checkFormatTs("D: Dy", "2019-01-05 00:00:00", "7: Sat");
+ checkFormatTs("D: dy", "2019-01-06 00:00:00", "1: sun");
+ checkFormatTs("D: DAY", "2019-01-07 00:00:00", "2: MONDAY ");
+
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-01 00:00:00", "2019-01-01: 1 01 1");
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-07 00:00:00", "2019-01-07: 1 01 1");
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-08 00:00:00", "2019-01-08: 1 02 2");
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-03-31 00:00:00", "2019-03-31: 1 13 5");
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-04-01 00:00:00", "2019-04-01: 2 13 1");
+ checkFormatTs("YYYY-mm-dd: Q WW W", "2019-12-31 00:00:00", "2019-12-31: 4 53 5");
}
private void checkFormatTs(String pattern, String input, String expectedOutput) {
@@ -188,6 +227,18 @@ public class TestHiveSqlDateTimeFormatter {
checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 08:00:00");
checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 08:00:00");
checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 08:00:00");
+
+ //MONTH, MON : case really doesn't matter
+ checkParseTimestamp("yyyy-MONTH-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-Month-dd", "2018-february-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-month-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-montH-dd", "2018-febRuary-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-MON-dd", "2018-FEB-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-moN-dd", "2018-FeB-28", "2018-02-28 00:00:00");
+ checkParseTimestamp("yyyy-mon-dd", "2018-FEB-28", "2018-02-28 00:00:00");
+ //letters and numbers are delimiters to each other, respectively
+ checkParseDate("yyyy-ddMONTH", "2018-4March", "2018-03-04");
+ checkParseDate("yyyy-MONTHdd", "2018-March4", "2018-03-04");
}
private int getFirstTwoDigits() {
@@ -229,6 +280,9 @@ public class TestHiveSqlDateTimeFormatter {
checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55", "2018-01-01");
+
+ checkParseDate("dd/MonthT/yyyy", "31/AugustT/2020", "2020-08-31");
+ checkParseDate("dd/MonthT/yyyy", "31/MarchT/2020", "2020-03-31");
}
private void checkParseDate(String pattern, String input, String expectedOutput) {
@@ -247,6 +301,11 @@ public class TestHiveSqlDateTimeFormatter {
verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+ verifyBadParseString("yyyy-month-dd", "2019-merch-23"); //invalid month of year
+ verifyBadParseString("yyyy-mon-dd", "2019-mer-23"); //invalid month of year
+ verifyBadParseString("yyyy-MON-dd", "2018-FEBRUARY-28"); // can't mix and match mon and month
+ verifyBadParseString("yyyy-MON-dd", "2018-FEBR-28");
+ verifyBadParseString("yyyy-MONTH-dd", "2018-FEB-28");
}
private void verifyBadPattern(String string, boolean forParsing) {
@@ -261,12 +320,16 @@ public class TestHiveSqlDateTimeFormatter {
@Test
public void testFm() {
- //fm
//year (019) becomes 19 even if pattern is yyy
checkFormatTs("FMyyy-FMmm-dd FMHH12:MI:FMSS", "2019-01-01 01:01:01", "19-1-01 1:01:1");
//ff[1-9] shouldn't be affected, because leading zeroes hold information
checkFormatTs("FF5/FMFF5", "2019-01-01 01:01:01.0333", "03330/03330");
checkFormatTs("FF/FMFF", "2019-01-01 01:01:01.0333", "0333/0333");
+ //omit trailing spaces from character temporal elements
+ checkFormatTs("YYYY-fmMonth-DD", "2019-01-01 00:00:00", "2019-January-01");
+ checkFormatTs("D: fmDAY", "2019-01-01 00:00:00", "3: TUESDAY");
+ checkFormatTs("D: fmDay", "2019-01-02 00:00:00", "4: Wednesday");
+
//only affects temporals that immediately follow
verifyBadPattern("yyy-mm-dd FM,HH12", false);
verifyBadPattern("yyy-mm-dd FM,HH12", true);
@@ -301,6 +364,8 @@ public class TestHiveSqlDateTimeFormatter {
checkParseTimestamp("FXDD-MM-YYYY hh12 A.M.", "01-01-1998 12 p.m.", "1998-01-01 12:00:00");
verifyBadParseString("FXDD-MM-YYYY hh12 am", "01-01-1998 12 p.m.");
verifyBadParseString("FXDD-MM-YYYY hh12 a.m.", "01-01-1998 12 pm");
+ //character temporals shouldn't have trailing spaces
+ checkParseTimestamp("FXDD-month-YYYY", "15-March-1998", "1998-03-15 00:00:00");
}
@Test
@@ -330,6 +395,9 @@ public class TestHiveSqlDateTimeFormatter {
// non-numeric characters in text counts as a delimiter
checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", LocalDate.now().getYear() / 100 + "19-01-01");
checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", LocalDate.now().getYear() / 100 + "19-01-01");
+ // parse character temporals correctly
+ checkParseDate("dd/Month\"arch\"/yyyy", "31/Marcharch/2020", "2020-03-31");
+ checkParseDate("dd/Month\"ember\"/yyyy", "31/Decemberember/2020", "2020-12-31");
// single quotes are separators and not text delimiters
checkParseTimestamp("\"Y\'ear \"YYYY \' \"month\" MM \"day\" DD.\"!\"",