You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ku...@apache.org on 2019/07/29 08:59:15 UTC

[hive] branch master updated: HIVE-21578: Introduce SQL:2016 formats FM, FX, and nested strings (Karen Coppage via Marta Kuczora)

This is an automated email from the ASF dual-hosted git repository.

kuczoram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 03aae63  HIVE-21578: Introduce SQL:2016 formats FM, FX, and nested strings (Karen Coppage via Marta Kuczora)
03aae63 is described below

commit 03aae630d9fd1f4a82ec1d6422de1312ba325f6a
Author: Karen Coppage <kc...@gmail.com>
AuthorDate: Mon Jul 29 10:58:47 2019 +0200

    HIVE-21578: Introduce SQL:2016 formats FM, FX, and nested strings (Karen Coppage via Marta Kuczora)
---
 .../format/datetime/HiveSqlDateTimeFormatter.java  | 334 ++++++++++++++-------
 .../datetime/TestHiveSqlDateTimeFormatter.java     | 170 +++++++----
 .../cast_datetime_with_sql_2016_format.q           |   5 +
 .../cast_datetime_with_sql_2016_format.q.out       |  15 +
 4 files changed, 356 insertions(+), 168 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
index 998e5a2..102647e 100644
--- a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
+++ b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -26,7 +26,6 @@ import org.apache.hadoop.hive.common.type.Timestamp;
 
 import java.io.Serializable;
 import java.time.DateTimeException;
-import java.time.Duration;
 import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.ZoneOffset;
@@ -38,13 +37,13 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.TimeZone;
 
 /**
  * Formatter using SQL:2016 datetime patterns.
  *
  * For all tokens:
- * - Patterns are case-insensitive, except AM/PM and T/Z. See these sections for more details.
+ * - Patterns are case-insensitive, except AM/PM, T/Z and nested strings. See these sections for more
+ *   details.
  * - For string to datetime conversion, no duplicate format tokens are allowed, including tokens
  *   that have the same meaning but different lengths ("Y" and "YY" conflict) or different
  *   behaviors ("RR" and "YY" conflict).
@@ -54,12 +53,16 @@ import java.util.TimeZone;
  *   "Y": 1, and so on), with some exceptions (see map SPECIAL_LENGTHS).
  * - For string to datetime conversion, inputs of fewer digits than expected are accepted if
  *   followed by a delimiter, e.g. format="YYYY-MM-DD", input="19-1-1", output=2019-01-01 00:00:00.
+ *   This is modified by format modifier FX (format exact). See FX for details.
  * - For datetime to string conversion, output is left padded with zeros, e.g. format="DD SSSSS",
  *   input=2019-01-01 00:00:03, output="01 00003".
+ *   This is modified by format modifier FM (fill mode). See FM for details.
  *
  *
  * Accepted format tokens:
- * Note: "|" means "or". "Delimiter" means a separator, tokens T or Z, or end of input.
+ * Note: - "|" means "or".
+ *       - "Delimiter" for numeric tokens means any non-numeric character or end of input.
+ *       - The words token and pattern are used interchangeably.
  *
  * A. Temporal tokens
  * YYYY
@@ -68,7 +71,6 @@ import java.util.TimeZone;
  *   from current date
  *   E.g. input=‘9-01-01’, pattern =‘YYYY-MM-DD’, current year=2020, output=2029-01-01 00:00:00
  *
- *
  * YYY
  * Last 3 digits of a year
  * - Gets the prefix digit from current date.
@@ -149,23 +151,22 @@ import java.util.TimeZone;
  * - In string to datetime conversion, fewer digits than expected are accepted if followed by a
  *   delimiter. "FF" acts like "FF9".
  *
- * AM|A.M.
- * Meridiem indicator or AM/PM
+ * AM|A.M.|PM|P.M.
+ * Meridiem indicator (or AM/PM)
  * - Datetime to string conversion:
  *   - AM and PM mean the exact same thing in the pattern.
  *     e.g. input=2019-01-01 20:00, format=“AM”, output=“PM”.
  *   - Retains the exact format (capitalization and length) provided in the pattern string. If p.m.
  *     is in the pattern, we expect a.m. or p.m. in the output; if AM is in the pattern, we expect
- *     AM or PM in the output.
+ *     AM or PM in the output. If the case is mixed (Am or aM) then the output case will match the
+ *     case of the pattern's first character (Am => AM, aM => am).
  * - String to datetime conversion:
  *   - Conflicts with HH24 and SSSSS.
- *   - It doesn’t matter which meridian indicator is in the pattern.
+ *   - It doesn't matter which meridian indicator is in the pattern.
  *     E.g. input="2019-01-01 11:00 p.m.", pattern="YYYY-MM-DD HH12:MI AM",
  *          output=2019-01-01 23:00:00
- *
- * PM|P.M.
- * Meridiem indicator
- * See AM|A.M.
+ *   - If FX is enabled, input length has to match the pattern's length. e.g. pattern=AM input=A.M.
+ *     is not accepted, but input=pm is.
  *
  * B. Time zone tokens
  * TZH
@@ -198,30 +199,69 @@ import java.util.TimeZone;
  *   by a time zone hour (tzh) token, it's a negative sign and not counted as a separator, UNLESS
  *   this is the only possible separator character in the separator substring (in which case it is
  *   not counted as the tzh's negative sign).
+ * - If the whole pattern string is delimited by single quotes (''), then the apostrophe separator
+ *   (') must be escaped with a single backslash: (\').
  *
  * D. ISO 8601 delimiters
- * T
+ * T|Z
  * ISO 8601 delimiter
  * - Serves as a delimiter.
  * - Function is to support formats like “YYYY-MM-DDTHH24:MI:SS.FF9Z”, “YYYY-MM-DD-HH24:MI:SSZ”
  * - For datetime to string conversion, output is always capitalized ("T"), even if lowercase ("t")
  *   is provided in the pattern.
+ * - For string to datetime conversion, case of input and pattern may differ.
  *
- * Z
- * ISO 8601 delimiter
- * See T.
+ * E. Nested strings (Text)
+ * – Surround with double quotes (") in the pattern. Note, if the whole pattern string is delimited
+ *   by double quotes, then the double quotes must be escaped with a single backslash: (\").
+ * - In order to include a literal double quote character within the nested string, the double
+ *   quote character must be escaped with a double backslash: (\\”). If the whole pattern string is
+ *   delimited by double quotes, then escape with a triple backslash: (\\\")
+ * - If the whole pattern string is delimited by single quotes, literal single
+ *   quotes/apostrophes (') in the nested string must be escaped with a single backslash: (\')
+ * - For datetime to string conversion, we simply include the string in the output, preserving the
+ *   characters' case.
+ * - For string to datetime conversion, the information is lost as the nested string won’t be part
+ *   of the resulting datetime object. However, the nested string has to match the related part of
+ *   the input string, except case may differ.
+ *
+ * F. Format modifier tokens
+ * FM
+ * Fill mode modifier
+ * - Default for string to datetime conversion. Inputs of fewer digits than expected are accepted
+ *   if followed by a delimiter:
+ *   e.g. format="YYYY-MM-DD", input="19-1-1", output=2019-01-01 00:00:00
+ * - For datetime to string conversion, padding (trailing spaces for text data and leading zeroes
+ *   for numeric data) is omitted for the temporal element immediately following an "FM" in the
+ *   pattern string. If the element following is not a temporal element (for example, if "FM"
+ *   precedes a separator), an error will be thrown.
+ *   e.g. pattern=FMHH12:MI:FMSS, input=2019-01-01 01:01:01, output=1:01:1
+ * - Modifies FX so that lack of leading zeroes are accepted for the element immediately following
+ *   an "FM" in the pattern string.
+ *
+ * FX
+ * Format exact modifier
+ * - Default for datetime to string conversion. Numeric output is left padded with zeros, and
+ *   non-numeric output except for AM/PM is right padded with spaces up to expected length.
+ * - Applies to the whole pattern.
+ * - Rules applied at string to datetime conversion:
+ *   - Separators must match exactly, down to the character.
+ *   - Numeric input can't omit leading zeroes. This rule does not apply to elements (tokens)
+ *     immediately preceded by an "FM."
+ *   - AM/PM input length has to match the pattern's length. e.g. pattern=AM input=A.M. is not
+ *     accepted, but input=pm is.
  */
 
 public class HiveSqlDateTimeFormatter implements Serializable {
 
   private static final int LONGEST_TOKEN_LENGTH = 5;
   private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake
-  private static final long MINUTES_PER_HOUR = 60;
   private static final int NANOS_MAX_LENGTH = 9;
   public static final int AM = 0;
   public static final int PM = 1;
   private String pattern;
   private List<Token> tokens = new ArrayList<>();
+  private boolean formatExact = false;
 
   private static final Map<String, TemporalField> TEMPORAL_TOKENS =
       ImmutableMap.<String, TemporalField>builder()
@@ -256,6 +296,9 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   private static final List<String> VALID_SEPARATORS =
       ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ",");
 
+  private static final List<String> VALID_FORMAT_MODIFIERS =
+      ImmutableList.of("fm", "fx");
+
   private static final Map<String, Integer> SPECIAL_LENGTHS = ImmutableMap.<String, Integer>builder()
       .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
       .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
@@ -269,7 +312,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     TEMPORAL,
     SEPARATOR,
     TIMEZONE,
-    ISO_8601_DELIMITER
+    ISO_8601_DELIMITER,
+    TEXT
   }
 
   /**
@@ -281,29 +325,31 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS
     String string; // pattern string, e.g. "yyy"
     int length; // length (e.g. YYY: 3, FF8: 8)
+    boolean fillMode; //FM, applies to type TEMPORAL only (later should apply to TIMEZONE as well)
 
-    public Token(TemporalField temporalField, String string, int length) {
-      this(TokenType.TEMPORAL, temporalField, null, string, length);
+    public Token(TemporalField temporalField, String string, int length, boolean fillMode) {
+      this(TokenType.TEMPORAL, temporalField, null, string, length, fillMode);
     }
 
-    public Token(TemporalUnit temporalUnit, String string, int length) {
-      this(TokenType.TIMEZONE, null, temporalUnit, string, length);
+    public Token(TemporalUnit temporalUnit, String string, int length, boolean fillMode) {
+      this(TokenType.TIMEZONE, null, temporalUnit, string, length, fillMode);
     }
 
     public Token(TokenType tokenType, String string) {
-      this(tokenType, null, null, string, string.length());
+      this(tokenType, null, null, string, string.length(), false);
     }
 
     public Token(TokenType tokenType, TemporalField temporalField, TemporalUnit temporalUnit,
-        String string, int length) {
+        String string, int length, boolean fillMode) {
       this.type = tokenType;
       this.temporalField = temporalField;
       this.temporalUnit = temporalUnit;
       this.string = string;
       this.length = length;
+      this.fillMode = fillMode;
     }
 
-    public String toString() {
+    @Override public String toString() {
       StringBuilder sb = new StringBuilder();
       sb.append(string);
       sb.append(" type: ");
@@ -317,6 +363,11 @@ public class HiveSqlDateTimeFormatter implements Serializable {
       }
       return sb.toString();
     }
+
+    public void removeBackslashes() {
+      string = string.replaceAll("\\\\", "");
+      length = string.length();
+    }
   }
 
   public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) {
@@ -343,7 +394,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   /**
    * Parse pattern to list of tokens.
    */
-  private String parsePatternToTokens(String pattern) {
+  private void parsePatternToTokens(String pattern) {
     tokens.clear();
     String originalPattern = pattern;
     pattern = pattern.toLowerCase();
@@ -352,6 +403,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     int begin=0, end=0;
     String candidate;
     Token lastAddedToken = null;
+    boolean fillMode = false;
 
     while (begin < pattern.length()) {
       // if begin hasn't progressed, then pattern is not parsable
@@ -368,28 +420,44 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         }
         candidate = pattern.substring(begin, end);
         if (isSeparator(candidate)) {
-          lastAddedToken = parseSeparatorToken(candidate, lastAddedToken);
+          lastAddedToken = parseSeparatorToken(candidate, lastAddedToken, fillMode, begin);
           begin = end;
           break;
         }
         if (isIso8601Delimiter(candidate)) {
-          lastAddedToken = parseIso8601DelimiterToken(candidate);
+          lastAddedToken = parseIso8601DelimiterToken(candidate, fillMode, begin);
           begin = end;
           break;
         }
         if (isTemporalToken(candidate)) {
-          lastAddedToken = parseTemporalToken(originalPattern, begin, candidate);
+          lastAddedToken = parseTemporalToken(originalPattern, candidate, fillMode, begin);
+          fillMode = false;
           begin = end;
           break;
         }
         if (isTimeZoneToken(candidate)) {
-          lastAddedToken = parseTimeZoneToken(candidate);
+          lastAddedToken = parseTimeZoneToken(candidate, fillMode, begin);
+          begin = end;
+          break;
+        }
+        if (isTextToken(candidate)) {
+          lastAddedToken = parseTextToken(originalPattern, fillMode, begin);
+          end = begin + lastAddedToken.length + 2; // skip 2 quotation marks
+          lastAddedToken.removeBackslashes();
+          begin = end;
+          break;
+        }
+        if (isFormatModifierToken(candidate)) {
+          checkFillModeOff(fillMode, begin);
+          fillMode = isFm(candidate);
+          if (!fillMode) {
+            formatExact = true;
+          }
           begin = end;
           break;
         }
       }
     }
-    return pattern;
   }
 
   private boolean isSeparator(String candidate) {
@@ -408,7 +476,21 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     return TIME_ZONE_TOKENS.containsKey(pattern);
   }
 
-  private Token parseSeparatorToken(String candidate, Token lastAddedToken) {
+  private boolean isTextToken(String candidate) {
+    return candidate.startsWith("\"");
+  }
+
+  private boolean isFormatModifierToken(String candidate) {
+    return candidate.length() == 2 && VALID_FORMAT_MODIFIERS.contains(candidate);
+  }
+
+  private boolean isFm(String candidate) {
+    return "fm".equals(candidate);
+  }
+
+  private Token parseSeparatorToken(String candidate, Token lastAddedToken, boolean fillMode,
+      int begin) {
+    checkFillModeOff(fillMode, begin);
     // try to clump separator with immediately preceding separators (e.g. "---" counts as one
     // separator)
     if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) {
@@ -421,37 +503,61 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     return lastAddedToken;
   }
 
-  private Token parseIso8601DelimiterToken(String candidate) {
+  private Token parseIso8601DelimiterToken(String candidate, boolean fillMode, int begin) {
+    checkFillModeOff(fillMode, begin);
     Token lastAddedToken;
     lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER, candidate.toUpperCase());
     tokens.add(lastAddedToken);
     return lastAddedToken;
   }
 
-  private Token parseTemporalToken(String originalPattern, int begin, String candidate) {
-    Token lastAddedToken;
-
+  private Token parseTemporalToken(String originalPattern, String candidate, boolean fillMode,
+      int begin) {
     // for AM/PM, keep original case
     if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
       int subStringEnd = begin + candidate.length();
       candidate = originalPattern.substring(begin, subStringEnd);
     }
-    lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
-        getTokenStringLength(candidate.toLowerCase()));
+    Token lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+        getTokenStringLength(candidate), fillMode);
     tokens.add(lastAddedToken);
     return lastAddedToken;
   }
 
-  private Token parseTimeZoneToken(String candidate) {
-    Token lastAddedToken;
-    lastAddedToken = new Token(TIME_ZONE_TOKENS.get(candidate), candidate,
-        getTokenStringLength(candidate));
+  private Token parseTimeZoneToken(String candidate, boolean fillMode, int begin) {
+    checkFillModeOff(fillMode, begin);
+    Token lastAddedToken = new Token(TIME_ZONE_TOKENS.get(candidate), candidate,
+        getTokenStringLength(candidate), false);
+    tokens.add(lastAddedToken);
+    return lastAddedToken;
+  }
+
+  private Token parseTextToken(String fullPattern, boolean fillMode, int begin) {
+    checkFillModeOff(fillMode, begin);
+    int end = begin;
+    do {
+      end = fullPattern.indexOf('\"', end + 1);
+      if (end == -1) {
+        throw new IllegalArgumentException(
+            "Missing closing double quote (\") opened at index " + begin);
+      }
+    // if double quote is escaped with a backslash, keep looking for the closing quotation mark
+    } while ("\\".equals(fullPattern.substring(end - 1, end)));
+    Token lastAddedToken = new Token(TokenType.TEXT, fullPattern.substring(begin + 1, end));
     tokens.add(lastAddedToken);
     return lastAddedToken;
   }
 
+  private void checkFillModeOff(boolean fillMode, int index) {
+    if (fillMode) {
+      throw new IllegalArgumentException("Bad date/time conversion pattern: " + pattern +
+          ". Error at index " + index + ": Fill mode modifier (FM) must "
+          + "be followed by a temporal token.");
+    }
+  }
+
   private int getTokenStringLength(String candidate) {
-    Integer length = SPECIAL_LENGTHS.get(candidate);
+    Integer length = SPECIAL_LENGTHS.get(candidate.toLowerCase());
     if (length != null) {
       return length;
     }
@@ -566,6 +672,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
             + "timestamp or date.");
       case SEPARATOR:
+      case TEXT:
         outputString = token.string;
         break;
       case ISO_8601_DELIMITER:
@@ -608,47 +715,34 @@ public class HiveSqlDateTimeFormatter implements Serializable {
 
   /**
    * To match token.length, pad left with zeroes or truncate.
+   * Omit padding if fill mode (FM) modifier on.
    */
   private String padOrTruncateNumericTemporal(Token token, String output) {
-    if (output.length() < token.length) {
-      output = StringUtils.leftPad(output, token.length, '0'); // pad left
-    } else if (output.length() > token.length) {
-      if (token.temporalField == ChronoField.NANO_OF_SECOND) {
-        output = output.substring(0, token.length); // truncate right
-      } else {
-        output = output.substring(output.length() - token.length); // truncate left
+    //exception
+    if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+      output = StringUtils.leftPad(output, 9, '0'); // pad left to length 9
+      if (output.length() > token.length) {
+        output = output.substring(0, token.length); // truncate right to size
       }
-    }
-    if (token.temporalField == ChronoField.NANO_OF_SECOND
-        && token.string.equalsIgnoreCase("ff")) {
-      output = output.replaceAll("0*$", ""); //truncate trailing 0's
-      if (output.isEmpty()) {
-        output = "0";
+      if (token.string.equalsIgnoreCase("ff")) {
+        output = output.replaceAll("0*$", ""); //truncate trailing 0's
       }
-    }
-    return output;
-  }
 
-  /**
-   * Left here for timestamp with local time zone.
-   */
-  private String formatTimeZone(TimeZone timeZone, LocalDateTime localDateTime, Token token) {
-    ZoneOffset offset = timeZone.toZoneId().getRules().getOffset(localDateTime);
-    Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS), ChronoUnit.SECONDS);
-    if (token.string.equals("tzh")) {
-      long hours = seconds.toHours();
-      String s = (hours >= 0) ? "+" : "-";
-      s += (Math.abs(hours) < 10) ? "0" : "";
-      s += String.valueOf(Math.abs(hours));
-      return s;
+      // the rule
     } else {
-      long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR);
-      String s = String.valueOf(minutes);
-      if (s.length() == 1) {
-        s = "0" + s;
+      if (output.length() < token.length && !token.fillMode) {
+        output = StringUtils.leftPad(output, token.length, '0'); // pad left
+      } else if (output.length() > token.length) {
+        output = output.substring(output.length() - token.length); // truncate left
+      }
+      if (token.fillMode) {
+        output = output.replaceAll("^0*", ""); //truncate leading 0's
       }
-      return s;
     }
+    if (output.isEmpty()) {
+      output = "0";
+    }
+    return output;
   }
 
   public Timestamp parseTimestamp(String fullInput){
@@ -661,7 +755,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
     for (Token token : tokens) {
       switch (token.type) {
       case TEMPORAL:
-        substring = getNextSubstring(fullInput, index, token); // e.g. yy-m -> yy
+        substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy
         value = parseTemporal(substring, token); // e.g. 18->2018, July->07
         try {
           ldt = ldt.with(token.temporalField, value);
@@ -679,7 +773,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
             index++;
           }
           // parse next two digits
-          substring = getNextSubstring(fullInput, index, index + 2, token);
+          substring = getNextNumericSubstring(fullInput, index, index + 2, token);
           try {
             timeZoneHours = Integer.parseInt(substring);
           } catch (NumberFormatException e) {
@@ -691,7 +785,7 @@ public class HiveSqlDateTimeFormatter implements Serializable {
                 "\" to TZH because TZH range is -15 to +15. Pattern is " + pattern);
           }
         } else { // time zone minutes
-          substring = getNextSubstring(fullInput, index, token);
+          substring = getNextNumericSubstring(fullInput, index, token);
           try {
             timeZoneMinutes = Integer.parseInt(substring);
           } catch (NumberFormatException e) {
@@ -709,7 +803,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         index = parseSeparator(fullInput, index, token);
         break;
       case ISO_8601_DELIMITER:
-        index = parseIso8601Delimiter(fullInput, index, token);
+      case TEXT:
+        index = parseText(fullInput, index, token);
       default:
         //do nothing
       }
@@ -733,11 +828,11 @@ public class HiveSqlDateTimeFormatter implements Serializable {
    * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is "976/" then we
    * return "976" because a separator cuts it short.)
    */
-  private String getNextSubstring(String s, int begin, Token token) {
-    return getNextSubstring(s, begin, begin + token.length, token);
+  private String getNextNumericSubstring(String s, int begin, Token token) {
+    return getNextNumericSubstring(s, begin, begin + token.length, token);
   }
 
-  private String getNextSubstring(String s, int begin, int end, Token token) {
+  private String getNextNumericSubstring(String s, int begin, int end, Token token) {
     if (end > s.length()) {
       end = s.length();
     }
@@ -749,16 +844,11 @@ public class HiveSqlDateTimeFormatter implements Serializable {
         return s;
       }
     }
-    for (String sep : VALID_SEPARATORS) {
-      if (s.contains(sep)) {
-        s = s.substring(0, s.indexOf(sep));
-      }
-    }
-    // TODO this will cause problems with DAY (for example, Thursday starts with T)
-    for (String delimiter : VALID_ISO_8601_DELIMITERS) {
-      if (s.toLowerCase().contains(delimiter)) {
-        s = s.substring(0, s.toLowerCase().indexOf(delimiter));
-      }
+    // next non-numeric character is a delimiter. Don't worry about AM/PM since we've already
+    // handled that case.
+    if ((token.type == TokenType.TEMPORAL || token.type == TokenType.TIMEZONE)
+        && s.matches(".*\\D.*")) {
+      s = s.split("\\D", 2)[0];
     }
 
     return s;
@@ -768,6 +858,8 @@ public class HiveSqlDateTimeFormatter implements Serializable {
    * Get the integer value of a temporal substring.
    */
   private int parseTemporal(String substring, Token token){
+    checkFormatExact(substring, token);
+
     // exceptions to the rule
     if (token.temporalField == ChronoField.AMPM_OF_DAY) {
       return substring.toLowerCase().startsWith("a") ? AM : PM;
@@ -807,6 +899,23 @@ public class HiveSqlDateTimeFormatter implements Serializable {
   }
 
   /**
+   * @throws IllegalArgumentException if input length doesn't match expected (token) length
+   */
+  private void checkFormatExact(String substring, Token token) {
+    // AM/PM defaults to length 4 but make it 2 for FX check if the pattern actually has length 2
+    if (formatExact && token.temporalField == ChronoField.AMPM_OF_DAY) {
+      token.length = token.string.length();
+    }
+    if (formatExact
+        && !(token.fillMode || token.temporalField == ChronoField.NANO_OF_SECOND)
+        && token.length != substring.length()) {
+      throw new IllegalArgumentException(
+          "FX on and expected token length " + token.length + " for token " + token.toString()
+              + " does not match substring (" + substring + ") length " + substring.length());
+    }
+  }
+
+  /**
    * Parse the next separator(s). At least one separator character is expected. Separator
    * characters are interchangeable.
    *
@@ -815,38 +924,45 @@ public class HiveSqlDateTimeFormatter implements Serializable {
    *     separator, UNLESS this is the only separator character in the separator substring (in
    *     which case it is not counted as the negative sign).
    *
-   * @throws IllegalArgumentException if separator is missing
+   * @throws IllegalArgumentException if separator is missing or if FX is on and separator doesn't
+   * match the expected separator pattern exactly
    */
-  private int parseSeparator(String fullInput, int index, Token token){
-    int separatorsFound = 0;
+  private int parseSeparator(String fullInput, int index, Token token) {
     int begin = index;
+    String s;
+    StringBuilder separatorsFound = new StringBuilder();
 
     while (index < fullInput.length() &&
         VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) {
+      s = fullInput.substring(index, index + 1);
       if (!isLastCharacterOfSeparator(index, fullInput)
-          || !("-".equals(fullInput.substring(index, index + 1)) && (nextTokenIs("tzh", token)))
-          || separatorsFound == 0) {
-        separatorsFound++;
+          || !("-".equals(s) && (nextTokenIs("tzh", token)))
+          || separatorsFound.length() == 0) {
+        separatorsFound.append(s);
       }
       index++;
     }
 
-    if (separatorsFound == 0) {
+    if (separatorsFound.length() == 0) {
       throw new IllegalArgumentException("Missing separator at index " + index);
     }
-    return begin + separatorsFound;
+    if (formatExact && !token.string.equals(separatorsFound.toString())) {
+      throw new IllegalArgumentException("FX on and separator found: " + separatorsFound.toString()
+          + " doesn't match expected separator: " + token.string);
+    }
+
+    return begin + separatorsFound.length();
   }
 
-  private int parseIso8601Delimiter(String fullInput, int index, Token token) {
+  private int parseText(String fullInput, int index, Token token) {
     String substring;
-    substring = fullInput.substring(index, index + 1);
-    if (token.string.equalsIgnoreCase(substring)) {
-      index++;
-    } else {
+    substring = fullInput.substring(index, index + token.length);
+    if (!token.string.equalsIgnoreCase(substring)) {
       throw new IllegalArgumentException(
-          "Missing ISO 8601 delimiter " + token.string.toUpperCase());
+          "Wrong input at index " + index + ": Expected: \"" + token.string + "\" but got: \""
+              + substring + "\" for token: " + token);
     }
-    return index;
+    return index + token.length;
   }
 
   /**
diff --git a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
index ac57842..c0f9f7b 100644
--- a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
+++ b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -19,28 +19,15 @@
 package org.apache.hadoop.hive.common.format.datetime;
 
 import com.sun.tools.javac.util.List;
-import junit.framework.TestCase;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.Timestamp;
 
 import java.time.LocalDate;
 import java.time.LocalDateTime;
-import java.time.ZoneOffset;
-import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeFormatterBuilder;
-import java.time.format.ResolverStyle;
-import java.time.format.SignStyle;
 import java.time.temporal.ChronoField;
 import java.time.temporal.TemporalField;
 import java.util.ArrayList;
 
-import static java.time.temporal.ChronoField.DAY_OF_MONTH;
-import static java.time.temporal.ChronoField.HOUR_OF_DAY;
-import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
-import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
-import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
-import static java.time.temporal.ChronoField.YEAR;
-
 import org.junit.Test;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -80,6 +67,7 @@ public class TestHiveSqlDateTimeFormatter {
 
   @Test
   public void testSetPatternWithBadPatterns() {
+    verifyBadPattern("", true);
     verifyBadPattern("eyyyy-ddd", true);
     verifyBadPattern("1yyyy-mm-dd", true);
 
@@ -122,7 +110,8 @@ public class TestHiveSqlDateTimeFormatter {
 
   private void checkFormatTs(String pattern, String input, String expectedOutput) {
     formatter = new HiveSqlDateTimeFormatter(pattern, false);
-    assertEquals(expectedOutput, formatter.format(toTimestamp(input)));
+    assertEquals("Format timestamp to string failed with pattern: " + pattern,
+        expectedOutput, formatter.format(Timestamp.valueOf(input)));
   }
 
   @Test
@@ -138,7 +127,8 @@ public class TestHiveSqlDateTimeFormatter {
 
   private void checkFormatDate(String pattern, String input, String expectedOutput) {
     formatter = new HiveSqlDateTimeFormatter(pattern, false);
-    assertEquals(expectedOutput, formatter.format(toDate(input)));
+    assertEquals("Format date to string failed with pattern: " + pattern,
+        expectedOutput, formatter.format(Date.valueOf(input)));
   }
 
   @Test
@@ -211,7 +201,8 @@ public class TestHiveSqlDateTimeFormatter {
 
   private void checkParseTimestamp(String pattern, String input, String expectedOutput) {
     formatter = new HiveSqlDateTimeFormatter(pattern, true);
-    assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input));
+    assertEquals("Parse string to timestamp failed. Pattern: " + pattern,
+        Timestamp.valueOf(expectedOutput), formatter.parseTimestamp(input));
   }
 
   @Test
@@ -242,16 +233,16 @@ public class TestHiveSqlDateTimeFormatter {
 
   private void checkParseDate(String pattern, String input, String expectedOutput) {
     formatter = new HiveSqlDateTimeFormatter(pattern, true);
-    assertEquals(toDate(expectedOutput), formatter.parseDate(input));
+    assertEquals("Parse string to date failed. Pattern: " + pattern,
+        Date.valueOf(expectedOutput), formatter.parseDate(input));
   }
 
   @Test
   public void testParseTimestampError() {
-    verifyBadParseString("yyyy", "2019-02-03");
     verifyBadParseString("yyyy-mm-dd  ", "2019-02-03"); //separator missing
     verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators
     verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00"); //hh12 out of range
-    verifyBadParseString("yyyy-dddsssss", "2019-912345");
+    verifyBadParseString("yyyy-dddsssss", "2019-912345"); //ddd out of range
     verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range
     verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
     verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
@@ -261,12 +252,105 @@ public class TestHiveSqlDateTimeFormatter {
   private void verifyBadPattern(String string, boolean forParsing) {
     try {
       formatter = new HiveSqlDateTimeFormatter(string, forParsing);
-      fail();
+      fail("Bad pattern " + string + " should have thrown IllegalArgumentException but didn't");
     } catch (Exception e) {
-      assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+      assertEquals("Expected IllegalArgumentException, got another exception.",
+          e.getClass().getName(), IllegalArgumentException.class.getName());
     }
   }
 
+  @Test
+  public void testFm() {
+    //fm
+    //year (019) becomes 19 even if pattern is yyy
+    checkFormatTs("FMyyy-FMmm-dd FMHH12:MI:FMSS", "2019-01-01 01:01:01", "19-1-01 1:01:1");
+    //ff[1-9] shouldn't be affected, because leading zeroes hold information
+    checkFormatTs("FF5/FMFF5", "2019-01-01 01:01:01.0333", "03330/03330");
+    checkFormatTs("FF/FMFF", "2019-01-01 01:01:01.0333", "0333/0333");
+    //only affects temporals that immediately follow
+    verifyBadPattern("yyy-mm-dd FM,HH12", false);
+    verifyBadPattern("yyy-mm-dd FM,HH12", true);
+    verifyBadPattern("yyy-mm-dd HH12 tzh:fmtzm", true);
+    verifyBadPattern("FMFMyyy-mm-dd", true);
+    verifyBadPattern("FMFXDD-MM-YYYY ff2", true);
+  }
+
+  @Test
+  public void testFx() {
+    checkParseDate("FXDD-MM-YYYY", "01-01-1998", "1998-01-01");
+    checkParseTimestamp("FXDD-MM-YYYY hh12:mi:ss.ff", "15-01-1998 11:12:13.0", "1998-01-15 11:12:13");
+    //ff[1-9] are exempt
+    checkParseTimestamp("FXDD-MM-YYYY hh12:mi:ss.ff6", "01-01-1998 00:00:00.4440", "1998-01-01 00:00:00.444");
+    //fx can be anywhere in the pattern string
+    checkParseTimestamp("DD-MM-YYYYFX", "01-01-1998", "1998-01-01 00:00:00");
+    verifyBadParseString("DD-MM-YYYYFX", "1-01-1998");
+    //same separators required
+    verifyBadParseString("FXDD-MM-YYYY", "15/01/1998");
+    //no filling in zeroes or year digits
+    verifyBadParseString("FXDD-MM-YYYY", "1-01-1998");
+    verifyBadParseString("FXDD-MM-YYYY", "01-01-98");
+    //no leading or trailing whitespace
+    verifyBadParseString("FXDD-MM-YYYY", "   01-01-1998   ");
+    //enforce correct amount of leading zeroes
+    verifyBadParseString("FXyyyy-mm-dd hh24:miss", "2018-01-01 17:005");
+    verifyBadParseString("FXyyyy-mm-dd sssss", "2019-01-01 003");
+    //text case does not matter
+    checkParseTimestamp("\"the DATE is\" yyyy-mm-dd", "the date is 2018-01-01", "2018-01-01 00:00:00");
+    //AM/PM length has to match, but case doesn't
+    checkParseTimestamp("FXDD-MM-YYYY hh12 am", "01-01-1998 12 PM", "1998-01-01 12:00:00");
+    checkParseTimestamp("FXDD-MM-YYYY hh12 A.M.", "01-01-1998 12 p.m.", "1998-01-01 12:00:00");
+    verifyBadParseString("FXDD-MM-YYYY hh12 am", "01-01-1998 12 p.m.");
+    verifyBadParseString("FXDD-MM-YYYY hh12 a.m.", "01-01-1998 12 pm");
+  }
+
+  @Test
+  public void testFmFx() {
+    checkParseTimestamp("FXDD-FMMM-YYYY hh12 am", "01-1-1998 12 PM", "1998-01-01 12:00:00");
+    checkParseTimestamp("FXFMDD-MM-YYYY hh12 am", "1-01-1998 12 PM", "1998-01-01 12:00:00");
+    //ff[1-9] unaffected
+    checkParseTimestamp("FXFMDD-MM-YYYY FMff2", "1-01-1998 4", "1998-01-01 00:00:00.4");
+    checkParseTimestamp("FXFMDD-MM-YYYY ff2", "1-01-1998 4", "1998-01-01 00:00:00.4");
+  }
+
+  @Test
+  public void testText() {
+    // keep exact text upon format
+    checkFormatTs("hh24:mi \" Is \" hh12 PM\".\"", "2008-01-01 17:00:00", "17:00  Is  05 PM.");
+    checkFormatDate("\" `the _year_ is` \" yyyy\".\"", "2008-01-01", " `the _year_ is`  2008.");
+    // empty text strings work
+    checkParseTimestamp("\"\"yyyy\"\"-mm-dd\"\"", "2019-01-01", "2019-01-01 00:00:00");
+    checkParseDate("\"\"yyyy\"\"-mm-dd\"\"", "2019-01-01", "2019-01-01");
+    // Case doesn't matter upon parsing
+    checkParseTimestamp("\"Year \"YYYY \"month\" MM \"day\" DD.\"!\"",
+        "YEaR 3000 mOnTh 3 DaY 1...!", "3000-03-01 00:00:00");
+    checkParseDate("\"Year \"YYYY \"month\" MM \"day\" DD.\"!\"",
+        "YEaR 3000 mOnTh 3 DaY 1...!", "3000-03-01");
+    // Characters matter upon parsing
+    verifyBadParseString("\"Year! \"YYYY \"m\" MM \"d\" DD.\"!\"", "Year 3000 m 3 d 1,!");
+    // non-numeric characters in text counts as a delimiter
+    checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", LocalDate.now().getYear() / 100 + "19-01-01");
+    checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", LocalDate.now().getYear() / 100 + "19-01-01");
+
+    // single quotes are separators and not text delimiters
+    checkParseTimestamp("\"Y\'ear \"YYYY \' \"month\" MM \"day\" DD.\"!\"",
+        "Y'EaR 3000 ' mOnTh 3 DaY 1...!", "3000-03-01 00:00:00");
+    checkParseDate("\"Y\'ear \"YYYY \' \"month\" MM \"day\" DD.\"!\"",
+        "Y'EaR 3000 ' mOnTh 3 DaY 1...!", "3000-03-01");
+    // literal double quotes are escaped
+    checkFormatTs("\"the \\\"DATE\\\" is\" yyyy-mm-dd",
+        "2018-01-01 00:00:00", "the \"DATE\" is 2018-01-01");
+    checkFormatTs("\"\\\"\\\"\\\"\"", "2018-01-01 00:00:00", "\"\"\"");
+    checkParseTimestamp("\"the \\\"DATE\\\" is\" yyyy-mm-dd",
+        "the \"date\" is 2018-01-01", "2018-01-01 00:00:00");
+    // Check variations of apostrophes, literal and non-literal double quotes
+    checkParseTimestamp("yyyy'\"\"mm-dd", "2019\'01-01", "2019-01-01 00:00:00");
+    checkParseTimestamp("yyyy\'\"\"mm-dd", "2019\'01-01", "2019-01-01 00:00:00");
+    checkParseTimestamp("yyyy'\"\"mm-dd", "2019'01-01", "2019-01-01 00:00:00");
+    checkParseTimestamp("yyyy\'\"\"mm-dd", "2019'01-01", "2019-01-01 00:00:00");
+    checkParseTimestamp("yyyy\'\"\\\"\"mm-dd", "2019'\"01-01", "2019-01-01 00:00:00");
+    checkParseTimestamp("yyyy\'\"\\\"\"mm-dd", "2019\'\"01-01", "2019-01-01 00:00:00");
+  }
+
   /**
    * Verify pattern is parsed correctly.
    * Check:
@@ -279,7 +363,7 @@ public class TestHiveSqlDateTimeFormatter {
   }
 
   private void verifyPatternParsing(String pattern, int expectedPatternLength,
-      String expectedPattern, ArrayList<TemporalField> temporalFields)  {
+      String expectedPattern, ArrayList<TemporalField> temporalFields) {
     formatter = new HiveSqlDateTimeFormatter(pattern, false);
     assertEquals(temporalFields.size(), formatter.getTokens().size());
     StringBuilder sb = new StringBuilder();
@@ -296,46 +380,14 @@ public class TestHiveSqlDateTimeFormatter {
   }
 
   private void verifyBadParseString(String pattern, String string) {
+    formatter = new HiveSqlDateTimeFormatter(pattern, true);
     try {
-      formatter = new HiveSqlDateTimeFormatter(pattern, true);
       formatter.parseTimestamp(string);
-      fail();
+      fail("Parse string to timestamp should have failed.\nString: " + string + "\nPattern: "
+          + pattern);
     } catch (Exception e) {
-      assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+      assertEquals("Expected IllegalArgumentException, got another exception.",
+          e.getClass().getName(), IllegalArgumentException.class.getName());
     }
   }
-
-
-  // Methods that construct datetime objects using java.time.DateTimeFormatter.
-
-  public static Date toDate(String s) {
-    LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER);
-    return Date.ofEpochDay((int) localDate.toEpochDay());
-  }
-
-  /**
-   * This is effectively the old Timestamp.valueOf method.
-   */
-  public static Timestamp toTimestamp(String s) {
-    LocalDateTime localDateTime = LocalDateTime.parse(s.trim(), TIMESTAMP_FORMATTER);
-    return Timestamp.ofEpochSecond(
-        localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano());
-  }
-
-  private static final DateTimeFormatter DATE_FORMATTER =
-      DateTimeFormatter.ofPattern("yyyy-MM-dd");
-  private static final DateTimeFormatter TIMESTAMP_FORMATTER;
-  static {
-    DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
-    builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-')
-        .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-')
-        .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL)
-        .optionalStart().appendLiteral(" ")
-        .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':')
-        .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':')
-        .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
-        .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd()
-        .optionalEnd();
-    TIMESTAMP_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
-  }
 }
diff --git a/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q b/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
index 5a2a6d7..8a2b975 100644
--- a/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
+++ b/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
@@ -47,6 +47,11 @@ explain from strings    select cast (s as date        format "yyy.mm.dd");
 explain from timestamp1 select cast (t as string      format "yyyy");
 explain from timestamp1 select cast (t as varchar(12) format "yyyy");
 
+--quotation marks, apostrophes, and literal quotation marks are handled correctly
+select
+cast ("2019\' \' '' 01-01" as timestamp format "yyyy\'' \'' mm-dd"),
+cast ("2019\"01-01" as timestamp format "yyyy\"\\\"\"mm-dd"),
+cast ('2019\' " \' 01-01' as timestamp format 'yyyy\' "\\" \'" mm-dd');
 
 --vectorized
 set hive.vectorized.execution.enabled=true;
diff --git a/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
index e1fd341..f245121 100644
--- a/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
+++ b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
@@ -345,6 +345,21 @@ STAGE PLANS:
             Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
             ListSink
 
+PREHOOK: query: select
+cast ("2019\' \' '' 01-01" as timestamp format "yyyy\'' \'' mm-dd"),
+cast ("2019\"01-01" as timestamp format "yyyy\"\\\"\"mm-dd"),
+cast ('2019\' " \' 01-01' as timestamp format 'yyyy\' "\\" \'" mm-dd')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+cast ("2019\' \' '' 01-01" as timestamp format "yyyy\'' \'' mm-dd"),
+cast ("2019\"01-01" as timestamp format "yyyy\"\\\"\"mm-dd"),
+cast ('2019\' " \' 01-01' as timestamp format 'yyyy\' "\\" \'" mm-dd')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+2019-01-01 00:00:00	2019-01-01 00:00:00	2019-01-01 00:00:00
 PREHOOK: query: from timestamp1 select cast (t as string      format "yyyy")
 PREHOOK: type: QUERY
 PREHOOK: Input: default@timestamp1