You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ku...@apache.org on 2019/08/09 08:53:21 UTC

[hive] branch master updated: HIVE-21579: Introduce more complex SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)

This is an automated email from the ASF dual-hosted git repository.

kuczoram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 1729d77  HIVE-21579: Introduce more complex SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
1729d77 is described below

commit 1729d77b5e87d0bf5fae840ef670dd42999cff3b
Author: Karen Coppage <kc...@gmail.com>
AuthorDate: Fri Aug 9 10:52:34 2019 +0200

    HIVE-21579: Introduce more complex SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
---
 .../format/datetime/HiveSqlDateTimeFormatter.java  | 287 +++++++++++++++++++--
 .../datetime/TestHiveSqlDateTimeFormatter.java     |  70 ++++-
 2 files changed, 335 insertions(+), 22 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
index 102647e..9443e8e 100644
--- a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
+++ b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -21,22 +21,32 @@ package org.apache.hadoop.hive.common.format.datetime;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.WordUtils;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.Timestamp;
 
 import java.io.Serializable;
 import java.time.DateTimeException;
+import java.time.DayOfWeek;
 import java.time.Instant;
 import java.time.LocalDateTime;
+import java.time.Month;
 import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.TextStyle;
 import java.time.temporal.ChronoField;
 import java.time.temporal.ChronoUnit;
+import java.time.temporal.IsoFields;
 import java.time.temporal.TemporalField;
 import java.time.temporal.TemporalUnit;
+import java.time.temporal.WeekFields;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * Formatter using SQL:2016 datetime patterns.
@@ -107,7 +117,7 @@ import java.util.Map;
  *
  * MM
  * Month (1-12)
- * - For string to datetime conversion, conflicts with DDD.
+ * - For string to datetime conversion, conflicts with DDD, MONTH, MON.
  *
  * DD
  * Day of month (1-31)
@@ -167,6 +177,82 @@ import java.util.Map;
  *          output=2019-01-01 23:00:00
  *   - If FX is enabled, input length has to match the pattern's length. e.g. pattern=AM input=A.M.
  *     is not accepted, but input=pm is.
+ * - Not listed as a character temporal because of special status: does not get padded with spaces
+ *   upon formatting, and case is handled differently at datetime to string conversion.
+ *
+ * D
+ * Day of week (1-7)
+ * - 1 means Sunday, 2 means Monday, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * Q
+ * Quarter of year (1-4)
+ * - Not allowed in string to datetime conversion.
+ *
+ * WW
+ * Aligned week of year (1-53)
+ * - 1st week begins on January 1st and ends on January 7th, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * W
+ * Aligned week of month (1-5)
+ * - 1st week starts on the 1st of the month and ends on the 7th, and so on.
+ * - Not allowed in string to datetime conversion.
+ *
+ * A.2. Character temporals
+ * Temporal elements, but spelled out.
+ * - For datetime to string conversion, the pattern's case must match one of the listed formats
+ *   (e.g. mOnTh is not accepted) to avoid ambiguity. Output is right padded with trailing spaces
+ *   unless the pattern is marked with the fill mode modifier (FM).
+ * - For string to datetime conversion, the case of the pattern does not matter.
+ *
+ * MONTH|Month|month
+ * Name of month of year
+ * - For datetime to string conversion, will include trailing spaces up to length 9 (length of
+ *   longest month of year name: "September"). Case is taken into account according to the
+ *   following example (pattern => output):
+ *   - MONTH => JANUARY
+ *   - Month => January
+ *   - month => january
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ *   are taken into account.
+ * - For string to datetime conversion, conflicts with MM and MON.
+ *
+ *
+ * MON|Mon|mon
+ * Abbreviated name of month of year
+ * - For datetime to string conversion, case is taken into account according to the following
+ *   example (pattern => output):
+ *   - MON => JAN
+ *   - Mon => Jan
+ *   - mon => jan
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ *   are taken into account.
+ * - For string to datetime conversion, conflicts with MM and MONTH.
+ *
+ *
+ * DAY|Day|day
+ * Name of day of week
+ * - For datetime to string conversion, will include trailing spaces until length is 9 (length of
+ *   longest day of week name: "Wednesday"). Case is taken into account according to the following
+ *   example (pattern => output):
+ *   - DAY = SUNDAY
+ *   - Day = Sunday
+ *   - day = sunday
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ *   are taken into account.
+ * - Not allowed in string to datetime conversion.
+ *
+ * DY|Dy|dy
+ * Abbreviated name of day of week
+ * - For datetime to string conversion, case is taken into account according to the following
+ *   example (pattern => output):
+ *   - DY = SUN
+ *   - Dy = Sun
+ *   - dy = sun
+ * - For string to datetime conversion, neither the case of the pattern nor the case of the input
+ *   are taken into account.
+ * - Not allowed in string to datetime conversion.
  *
  * B. Time zone tokens
  * TZH
@@ -259,16 +345,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   private static final int NANOS_MAX_LENGTH = 9;
   public static final int AM = 0;
   public static final int PM = 1;
+  private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("MMM");
   private String pattern;
   private List<Token> tokens = new ArrayList<>();
   private boolean formatExact = false;
 
-  private static final Map<String, TemporalField> TEMPORAL_TOKENS =
+  private static final Map<String, TemporalField> NUMERIC_TEMPORAL_TOKENS =
       ImmutableMap.<String, TemporalField>builder()
           .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
           .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
           .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
           .put("mm", ChronoField.MONTH_OF_YEAR)
+          .put("d", WeekFields.SUNDAY_START.dayOfWeek())
           .put("dd", ChronoField.DAY_OF_MONTH)
           .put("ddd", ChronoField.DAY_OF_YEAR)
           .put("hh", ChronoField.HOUR_OF_AMPM)
@@ -284,8 +372,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
           .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND)
           .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY)
           .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY)
+          .put("ww", ChronoField.ALIGNED_WEEK_OF_YEAR).put("w", ChronoField.ALIGNED_WEEK_OF_MONTH)
+          .put("q", IsoFields.QUARTER_OF_YEAR)
           .build();
 
+  private static final Map<String, TemporalField> CHARACTER_TEMPORAL_TOKENS =
+      ImmutableMap.<String, TemporalField>builder()
+          .put("mon", ChronoField.MONTH_OF_YEAR)
+          .put("month", ChronoField.MONTH_OF_YEAR)
+          .put("day", ChronoField.DAY_OF_WEEK)
+          .put("dy", ChronoField.DAY_OF_WEEK)
+      .build();
+
   private static final Map<String, TemporalUnit> TIME_ZONE_TOKENS =
       ImmutableMap.<String, TemporalUnit>builder()
           .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
@@ -303,13 +401,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
       .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
       .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
       .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+      .put("month", 9).put("day", 9).put("dy", 3)
       .build();
 
   /**
    * Represents broad categories of tokens.
    */
   public enum TokenType {
-    TEMPORAL,
+    NUMERIC_TEMPORAL,
+    CHARACTER_TEMPORAL,
     SEPARATOR,
     TIMEZONE,
     ISO_8601_DELIMITER,
@@ -327,8 +427,9 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     int length; // length (e.g. YYY: 3, FF8: 8)
     boolean fillMode; //FM, applies to type TEMPORAL only (later should apply to TIMEZONE as well)
 
-    public Token(TemporalField temporalField, String string, int length, boolean fillMode) {
-      this(TokenType.TEMPORAL, temporalField, null, string, length, fillMode);
+    public Token(TokenType tokenType, TemporalField temporalField, String string, int length,
+        boolean fillMode) {
+      this(tokenType, temporalField, null, string, length, fillMode);
     }
 
     public Token(TemporalUnit temporalUnit, String string, int length, boolean fillMode) {
@@ -429,12 +530,18 @@ public class HiveSqlDateTimeFormatter implements Serializable {
           begin = end;
           break;
         }
-        if (isTemporalToken(candidate)) {
+        if (isNumericTemporalToken(candidate)) {
           lastAddedToken = parseTemporalToken(originalPattern, candidate, fillMode, begin);
           fillMode = false;
           begin = end;
           break;
         }
+        if (isCharacterTemporalToken(candidate)) {
+          lastAddedToken = parseCharacterTemporalToken(originalPattern, candidate, fillMode, begin);
+          fillMode = false;
+          begin = end;
+          break;
+        }
         if (isTimeZoneToken(candidate)) {
           lastAddedToken = parseTimeZoneToken(candidate, fillMode, begin);
           begin = end;
@@ -468,8 +575,12 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     return candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate);
   }
 
-  private boolean isTemporalToken(String candidate) {
-    return TEMPORAL_TOKENS.containsKey(candidate);
+  private boolean isNumericTemporalToken(String candidate) {
+    return NUMERIC_TEMPORAL_TOKENS.containsKey(candidate);
+  }
+
+  private boolean isCharacterTemporalToken(String candidate) {
+    return CHARACTER_TEMPORAL_TOKENS.containsKey(candidate);
   }
 
   private boolean isTimeZoneToken(String pattern) {
@@ -514,11 +625,24 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   private Token parseTemporalToken(String originalPattern, String candidate, boolean fillMode,
       int begin) {
     // for AM/PM, keep original case
-    if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+    if (NUMERIC_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
       int subStringEnd = begin + candidate.length();
       candidate = originalPattern.substring(begin, subStringEnd);
     }
-    Token lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+    Token lastAddedToken = new Token(TokenType.NUMERIC_TEMPORAL,
+        NUMERIC_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+        getTokenStringLength(candidate), fillMode);
+    tokens.add(lastAddedToken);
+    return lastAddedToken;
+  }
+
+  private Token parseCharacterTemporalToken(String originalPattern, String candidate,
+      boolean fillMode, int begin) {
+    // keep original case
+    candidate = originalPattern.substring(begin, begin + candidate.length());
+
+    Token lastAddedToken = new Token(TokenType.CHARACTER_TEMPORAL,
+        CHARACTER_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
         getTokenStringLength(candidate), fillMode);
     tokens.add(lastAddedToken);
     return lastAddedToken;
@@ -587,6 +711,24 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         timeZoneTemporalUnits.add(token.temporalUnit);
       }
     }
+
+    //check for illegal temporal fields
+    if (temporalFields.contains(IsoFields.QUARTER_OF_YEAR)) {
+      throw new IllegalArgumentException("Illegal field: q (" + IsoFields.QUARTER_OF_YEAR + ")");
+    }
+    if (temporalFields.contains(WeekFields.SUNDAY_START.dayOfWeek())) {
+      throw new IllegalArgumentException("Illegal field: d (" + WeekFields.SUNDAY_START.dayOfWeek() + ")");
+    }
+    if (temporalFields.contains(ChronoField.DAY_OF_WEEK)) {
+      throw new IllegalArgumentException("Illegal field: dy/day (" + ChronoField.DAY_OF_WEEK + ")");
+    }
+    if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_MONTH)) {
+      throw new IllegalArgumentException("Illegal field: w (" + ChronoField.ALIGNED_WEEK_OF_MONTH + ")");
+    }
+    if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_YEAR)) {
+      throw new IllegalArgumentException("Illegal field: ww (" + ChronoField.ALIGNED_WEEK_OF_YEAR + ")");
+    }
+
     if (!(temporalFields.contains(ChronoField.YEAR))) {
       throw new IllegalArgumentException("Missing year token.");
     }
@@ -648,6 +790,14 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
             + "timestamp or date.");
       }
+      if (token.type == TokenType.CHARACTER_TEMPORAL) {
+        String s = token.string;
+        if (!(s.equals(s.toUpperCase()) || s.equals(capitalize(s)) || s.equals(s.toLowerCase()))) {
+          throw new IllegalArgumentException(
+              "Ambiguous capitalization of token " + s + ". Accepted " + "forms are " + s
+                  .toUpperCase() + ", " + capitalize(s) + ", or " + s.toLowerCase() + ".");
+        }
+      }
     }
   }
 
@@ -659,10 +809,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
     for (Token token : tokens) {
       switch (token.type) {
-      case TEMPORAL:
+      case NUMERIC_TEMPORAL:
+      case CHARACTER_TEMPORAL:
         try {
           value = localDateTime.get(token.temporalField);
-          outputString = formatTemporal(value, token);
+          if (token.type == TokenType.NUMERIC_TEMPORAL) {
+            outputString = formatNumericTemporal(value, token);
+          } else {
+            outputString = formatCharacterTemporal(value, token);
+          }
         } catch (DateTimeException e) {
           throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from "
               + "LocalDateTime " + localDateTime, e);
@@ -690,7 +845,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
   }
 
-  private String formatTemporal(int value, Token token) {
+  private String formatNumericTemporal(int value, Token token) {
     String output;
     if (token.temporalField == ChronoField.AMPM_OF_DAY) {
       output = value == 0 ? "a" : "p";
@@ -713,6 +868,34 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     return output;
   }
 
+  private String formatCharacterTemporal(int value, Token token) {
+    String output = null;
+    if (token.temporalField == ChronoField.MONTH_OF_YEAR) {
+      output = Month.of(value).getDisplayName(TextStyle.FULL, Locale.US);
+    } else if (token.temporalField == ChronoField.DAY_OF_WEEK) {
+      output = DayOfWeek.of(value).getDisplayName(TextStyle.FULL, Locale.US);
+    }
+    if (output == null) {
+      throw new IllegalStateException("TemporalField: " + token.temporalField + " not valid for "
+          + "character formatting.");
+    }
+
+    // set length
+    if (output.length() > token.length) {
+      output = output.substring(0, token.length); // truncate to length
+    } else if (!token.fillMode && output.length() < token.length) {
+      output = StringUtils.rightPad(output, token.length); //pad to size
+    }
+
+    // set case
+    if (Character.isUpperCase(token.string.charAt(1))) {
+      output = output.toUpperCase();
+    } else if (Character.isLowerCase(token.string.charAt(0))) {
+      output = output.toLowerCase();
+    }
+    return output;
+  }
+
   /**
    * To match token.length, pad left with zeroes or truncate.
    * Omit padding if fill mode (FM) modifier on.
@@ -754,9 +937,15 @@ public class HiveSqlDateTimeFormatter implements Serializable {
 
     for (Token token : tokens) {
       switch (token.type) {
-      case TEMPORAL:
-        substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy
-        value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+      case NUMERIC_TEMPORAL:
+      case CHARACTER_TEMPORAL:
+        if (token.type == TokenType.NUMERIC_TEMPORAL) {
+          substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy
+          value = parseNumericTemporal(substring, token); // e.g. 18->2018
+        } else {
+          substring = getNextCharacterSubstring(fullInput, index, token); //e.g. Marcharch -> March
+          value = parseCharacterTemporal(substring, token); // e.g. July->07
+        }
         try {
           ldt = ldt.with(token.temporalField, value);
         } catch (DateTimeException e){
@@ -844,9 +1033,13 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         return s;
       }
     }
-    // next non-numeric character is a delimiter. Don't worry about AM/PM since we've already
-    // handled that case.
-    if ((token.type == TokenType.TEMPORAL || token.type == TokenType.TIMEZONE)
+    // if it's a character temporal, the first non-letter character is a delimiter
+    if (token.type == TokenType.CHARACTER_TEMPORAL && s.matches(".*[^A-Za-z].*")) {
+      s = s.split("[^A-Za-z]", 2)[0];
+
+    // if it's a numeric element, next non-numeric character is a delimiter. Don't worry about
+    // AM/PM since we've already handled that case.
+    } else if ((token.type == TokenType.NUMERIC_TEMPORAL || token.type == TokenType.TIMEZONE)
         && s.matches(".*\\D.*")) {
       s = s.split("\\D", 2)[0];
     }
@@ -857,7 +1050,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   /**
    * Get the integer value of a temporal substring.
    */
-  private int parseTemporal(String substring, Token token){
+  private int parseNumericTemporal(String substring, Token token){
     checkFormatExact(substring, token);
 
     // exceptions to the rule
@@ -898,6 +1091,53 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     }
   }
 
+  private static final String MONTH_REGEX;
+  static {
+    StringBuilder sb = new StringBuilder();
+    String or = "";
+    for (Month month : Month.values()) {
+      sb.append(or).append(month);
+      or = "|";
+    }
+    MONTH_REGEX = sb.toString();
+  }
+
+  private String getNextCharacterSubstring(String fullInput, int index, Token token) {
+    int end = index + token.length;
+    if (end > fullInput.length()) {
+      end = fullInput.length();
+    }
+    String substring = fullInput.substring(index, end);
+    if (token.length == 3) { //dy, mon
+      return substring;
+    }
+
+    Matcher matcher = Pattern.compile(MONTH_REGEX, Pattern.CASE_INSENSITIVE).matcher(substring);
+    if (matcher.find()) {
+      return substring.substring(0, matcher.end());
+    }
+    throw new IllegalArgumentException(
+        "Couldn't find " + token.string + " in substring " + substring + " at index " + index);
+  }
+
+  private int parseCharacterTemporal(String substring, Token token) {
+    try {
+      if (token.temporalField == ChronoField.MONTH_OF_YEAR) {
+        if (token.length == 3) {
+          return Month.from(MONTH_FORMATTER.parse(capitalize(substring))).getValue();
+        } else {
+          return Month.valueOf(substring.toUpperCase()).getValue();
+        }
+      }
+    } catch (Exception e) {
+      throw new IllegalArgumentException(
+          "Couldn't parse substring \"" + substring + "\" with token " + token + " to integer."
+              + "Pattern is " + pattern, e);
+    }
+    throw new IllegalArgumentException(
+        "token: (" + token + ") isn't a valid character temporal. Pattern is " + pattern);
+  }
+
   /**
    * @throws IllegalArgumentException if input length doesn't match expected (token) length
    */
@@ -986,7 +1226,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
     pattern = pattern.toLowerCase();
     return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit
-        || isTemporalToken(pattern) && TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
+        || isNumericTemporalToken(pattern) && NUMERIC_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField
+        || isCharacterTemporalToken(pattern) && CHARACTER_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
   }
 
   public String getPattern() {
@@ -999,4 +1240,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   protected List<Token> getTokens() {
     return new ArrayList<>(tokens);
   }
+
+  private static String capitalize(String substring) {
+    return WordUtils.capitalize(substring.toLowerCase());
+  }
 }
diff --git a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
index c0f9f7b..ff41534 100644
--- a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
+++ b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -91,9 +91,26 @@ public class TestHiveSqlDateTimeFormatter {
     verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
     verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
     verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+    verifyBadPattern("yyyy mm-MON dd", true);
+    verifyBadPattern("yyyy mm-MONTH dd", true);
+    verifyBadPattern("yyyy MON, month dd", true);
 
     verifyBadPattern("tzm", false);
     verifyBadPattern("tzh", false);
+
+    //ambiguous case for formatting
+    verifyBadPattern("MOnth", false);
+    verifyBadPattern("DaY", false);
+    verifyBadPattern("dAy", false);
+    verifyBadPattern("dY", false);
+
+    //illegal for parsing
+    verifyBadPattern("yyyy-mm-dd q", true);
+    verifyBadPattern("yyyy-mm-dd d", true);
+    verifyBadPattern("yyyy-mm-dd dy", true);
+    verifyBadPattern("yyyy-mm-dd day", true);
+    verifyBadPattern("yyyy-mm-dd w", true);
+    verifyBadPattern("yyyy-mm-dd ww", true);
   }
 
   @Test
@@ -106,6 +123,28 @@ public class TestHiveSqlDateTimeFormatter {
     checkFormatTs("HH12 P.M.", "2019-01-01 00:15:10", "12 A.M.");
     checkFormatTs("HH12 AM", "2019-01-01 12:15:10", "12 PM");
     checkFormatTs("YYYY-MM-DD HH12PM", "2017-05-05 00:00:00", "2017-05-05 12AM");
+
+    checkFormatTs("YYYY-MONTH-DD", "2019-01-01 00:00:00", "2019-JANUARY  -01"); //fill to length 9
+    checkFormatTs("YYYY-Month-DD", "2019-01-01 00:00:00", "2019-January  -01");
+    checkFormatTs("YYYY-month-DD", "2019-01-01 00:00:00", "2019-january  -01");
+    checkFormatTs("YYYY-MON-DD", "2019-01-01 00:00:00", "2019-JAN-01");
+    checkFormatTs("YYYY-Mon-DD", "2019-01-01 00:00:00", "2019-Jan-01");
+    checkFormatTs("YYYY-mon-DD", "2019-01-01 00:00:00", "2019-jan-01");
+
+    checkFormatTs("D: DAY", "2019-01-01 00:00:00", "3: TUESDAY  "); //fill to length 9
+    checkFormatTs("D: Day", "2019-01-02 00:00:00", "4: Wednesday");
+    checkFormatTs("D: day", "2019-01-03 00:00:00", "5: thursday ");
+    checkFormatTs("D: DY", "2019-01-04 00:00:00", "6: FRI");
+    checkFormatTs("D: Dy", "2019-01-05 00:00:00", "7: Sat");
+    checkFormatTs("D: dy", "2019-01-06 00:00:00", "1: sun");
+    checkFormatTs("D: DAY", "2019-01-07 00:00:00", "2: MONDAY   ");
+
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-01 00:00:00", "2019-01-01: 1 01 1");
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-07 00:00:00", "2019-01-07: 1 01 1");
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-08 00:00:00", "2019-01-08: 1 02 2");
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-03-31 00:00:00", "2019-03-31: 1 13 5");
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-04-01 00:00:00", "2019-04-01: 2 13 1");
+    checkFormatTs("YYYY-mm-dd: Q WW W", "2019-12-31 00:00:00", "2019-12-31: 4 53 5");
   }
 
   private void checkFormatTs(String pattern, String input, String expectedOutput) {
@@ -188,6 +227,18 @@ public class TestHiveSqlDateTimeFormatter {
     checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 08:00:00");
     checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 08:00:00");
     checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 08:00:00");
+
+    //MONTH, MON : case really doesn't matter
+    checkParseTimestamp("yyyy-MONTH-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-Month-dd", "2018-february-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-month-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-montH-dd", "2018-febRuary-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-MON-dd", "2018-FEB-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-moN-dd", "2018-FeB-28", "2018-02-28 00:00:00");
+    checkParseTimestamp("yyyy-mon-dd", "2018-FEB-28", "2018-02-28 00:00:00");
+    //letters and numbers are delimiters to each other, respectively
+    checkParseDate("yyyy-ddMONTH", "2018-4March", "2018-03-04");
+    checkParseDate("yyyy-MONTHdd", "2018-March4", "2018-03-04");
   }
 
   private int getFirstTwoDigits() {
@@ -229,6 +280,9 @@ public class TestHiveSqlDateTimeFormatter {
     checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
 
     checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55", "2018-01-01");
+
+    checkParseDate("dd/MonthT/yyyy", "31/AugustT/2020", "2020-08-31");
+    checkParseDate("dd/MonthT/yyyy", "31/MarchT/2020", "2020-03-31");
   }
 
   private void checkParseDate(String pattern, String input, String expectedOutput) {
@@ -247,6 +301,11 @@ public class TestHiveSqlDateTimeFormatter {
     verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
     verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
     verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+    verifyBadParseString("yyyy-month-dd", "2019-merch-23"); //invalid month of year
+    verifyBadParseString("yyyy-mon-dd", "2019-mer-23"); //invalid month of year
+    verifyBadParseString("yyyy-MON-dd", "2018-FEBRUARY-28"); // can't mix and match mon and month
+    verifyBadParseString("yyyy-MON-dd", "2018-FEBR-28");
+    verifyBadParseString("yyyy-MONTH-dd", "2018-FEB-28");
   }
 
   private void verifyBadPattern(String string, boolean forParsing) {
@@ -261,12 +320,16 @@ public class TestHiveSqlDateTimeFormatter {
 
   @Test
   public void testFm() {
-    //fm
     //year (019) becomes 19 even if pattern is yyy
     checkFormatTs("FMyyy-FMmm-dd FMHH12:MI:FMSS", "2019-01-01 01:01:01", "19-1-01 1:01:1");
     //ff[1-9] shouldn't be affected, because leading zeroes hold information
     checkFormatTs("FF5/FMFF5", "2019-01-01 01:01:01.0333", "03330/03330");
     checkFormatTs("FF/FMFF", "2019-01-01 01:01:01.0333", "0333/0333");
+    //omit trailing spaces from character temporal elements
+    checkFormatTs("YYYY-fmMonth-DD", "2019-01-01 00:00:00", "2019-January-01");
+    checkFormatTs("D: fmDAY", "2019-01-01 00:00:00", "3: TUESDAY");
+    checkFormatTs("D: fmDay", "2019-01-02 00:00:00", "4: Wednesday");
+
     //only affects temporals that immediately follow
     verifyBadPattern("yyy-mm-dd FM,HH12", false);
     verifyBadPattern("yyy-mm-dd FM,HH12", true);
@@ -301,6 +364,8 @@ public class TestHiveSqlDateTimeFormatter {
     checkParseTimestamp("FXDD-MM-YYYY hh12 A.M.", "01-01-1998 12 p.m.", "1998-01-01 12:00:00");
     verifyBadParseString("FXDD-MM-YYYY hh12 am", "01-01-1998 12 p.m.");
     verifyBadParseString("FXDD-MM-YYYY hh12 a.m.", "01-01-1998 12 pm");
+    //character temporals shouldn't have trailing spaces
+    checkParseTimestamp("FXDD-month-YYYY", "15-March-1998", "1998-03-15 00:00:00");
   }
 
   @Test
@@ -330,6 +395,9 @@ public class TestHiveSqlDateTimeFormatter {
     // non-numeric characters in text counts as a delimiter
     checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", LocalDate.now().getYear() / 100 + "19-01-01");
     checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", LocalDate.now().getYear() / 100 + "19-01-01");
+    // parse character temporals correctly
+    checkParseDate("dd/Month\"arch\"/yyyy", "31/Marcharch/2020", "2020-03-31");
+    checkParseDate("dd/Month\"ember\"/yyyy", "31/Decemberember/2020", "2020-12-31");
 
     // single quotes are separators and not text delimiters
     checkParseTimestamp("\"Y\'ear \"YYYY \' \"month\" MM \"day\" DD.\"!\"",