You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by fj...@apache.org on 2019/06/10 03:08:38 UTC

[incubator-druid] branch master updated: Support more JODA time formats (#7857)

This is an automated email from the ASF dual-hosted git repository.

fjy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new c612ddc  Support more JODA time formats (#7857)
c612ddc is described below

commit c612ddc0f4a4588e97e94173638f8cda286f7c0c
Author: Vadim Ogievetsky <va...@gmail.com>
AuthorDate: Sun Jun 9 20:08:29 2019 -0700

    Support more JODA time formats (#7857)
---
 .../src/components/header-bar/header-bar.tsx       |  2 -
 .../__snapshots__/joda-to-regexp.spec.ts.snap      | 15 ++++
 web-console/src/utils/druid-time.ts                | 51 +++++++-----
 web-console/src/utils/example-ingestion-spec.ts    | 97 ----------------------
 web-console/src/utils/ingestion-spec.tsx           | 15 +++-
 web-console/src/utils/joda-to-regexp.spec.ts       | 37 +++++++++
 web-console/src/utils/joda-to-regexp.ts            | 77 +++++++++++++++++
 7 files changed, 171 insertions(+), 123 deletions(-)

diff --git a/web-console/src/components/header-bar/header-bar.tsx b/web-console/src/components/header-bar/header-bar.tsx
index a229a8e..f48fab3 100644
--- a/web-console/src/components/header-bar/header-bar.tsx
+++ b/web-console/src/components/header-bar/header-bar.tsx
@@ -30,13 +30,11 @@ import {
   Position
 } from '@blueprintjs/core';
 import { IconNames } from '@blueprintjs/icons';
-import classNames from 'classnames';
 import React from 'react';
 
 import { AboutDialog } from '../../dialogs/about-dialog/about-dialog';
 import { CoordinatorDynamicConfigDialog } from '../../dialogs/coordinator-dynamic-config/coordinator-dynamic-config';
 import { OverlordDynamicConfigDialog } from '../../dialogs/overlord-dynamic-config/overlord-dynamic-config';
-import { getWikipediaSpec } from '../../utils/example-ingestion-spec';
 import {
   DRUID_DOCS,
   DRUID_GITHUB,
diff --git a/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap b/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap
new file mode 100644
index 0000000..7ee4beb
--- /dev/null
+++ b/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap
@@ -0,0 +1,15 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`jodaFormatToRegExp works for common formats 1`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])\\\\/(?:1[0-2]|[1-9])\\\\/[0-9]{4}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 2`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 3`] = `"/^(?:1[0-2]|[1-9])\\\\/(?:3[0-1]|[12][0-9]|[1-9])\\\\/[0-9]{2}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 4`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])-(?:1[0-2]|[1-9])-[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 5`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 6`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9]$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 7`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9].[0-9]{1,3}$/i"`;
diff --git a/web-console/src/utils/druid-time.ts b/web-console/src/utils/druid-time.ts
index 44b1761..a3a4520 100644
--- a/web-console/src/utils/druid-time.ts
+++ b/web-console/src/utils/druid-time.ts
@@ -16,14 +16,33 @@
  * limitations under the License.
  */
 
-export type DruidTimestampFormat = 'iso' | 'millis' | 'posix' | 'auto' | 'd/M/yyyy' | 'dd-M-yyyy hh:mm:ss a' |
-  'MM/dd/YYYY' | 'M/d/YY' | 'MM/dd/YYYY hh:mm:ss a' | 'YYYY-MM-dd HH:mm:ss' | 'YYYY-MM-dd HH:mm:ss.S';
+import { jodaFormatToRegExp } from './joda-to-regexp';
 
-export const TIMESTAMP_FORMAT_VALUES: DruidTimestampFormat[] = [
-  'iso', 'millis', 'posix', 'MM/dd/YYYY hh:mm:ss a', 'MM/dd/YYYY', 'M/d/YY', 'd/M/yyyy',
-  'YYYY-MM-dd HH:mm:ss', 'YYYY-MM-dd HH:mm:ss.S'
+export const BASIC_FORMAT_VALUES: string[] = [
+  'iso',
+  'millis',
+  'posix'
 ];
 
+export const DATE_FORMAT_VALUES: string[] = [
+  'dd/MM/yyyy',
+  'MM/dd/yyyy',
+  'd/M/yy',
+  'M/d/yy',
+  'd/M/yyyy',
+  'M/d/yyyy'
+];
+
+export const DATE_TIME_FORMAT_VALUES: string[] = [
+  'd/M/yyyy H:mm:ss',
+  'M/d/yyyy H:mm:ss',
+  'MM/dd/yyyy hh:mm:ss a',
+  'yyyy-MM-dd HH:mm:ss',
+  'yyyy-MM-dd HH:mm:ss.S'
+];
+
+const ALL_FORMAT_VALUES: string[] = BASIC_FORMAT_VALUES.concat(DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES);
+
 const EXAMPLE_DATE_ISO = '2015-10-29T23:00:00.000Z';
 const EXAMPLE_DATE_VALUE = Date.parse(EXAMPLE_DATE_ISO);
 const MIN_MILLIS = 3.15576e11; // 3 years in millis, so Tue Jan 01 1980
@@ -33,18 +52,9 @@ const MAX_POSIX = MAX_MILLIS / 1000;
 
 // copied from http://goo.gl/0ejHHW with small tweak to make dddd not pass on its own
 // tslint:disable-next-line:max-line-length
-export const ISO_MATCHER = new RegExp(/^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/);
-export const JODA_TO_REGEXP_LOOKUP: Record<string, RegExp> = {
-  'd/M/yyyy': /^[12]?\d\/1?\d\/\d\d\d\d$/,
-  'MM/dd/YYYY': /^\d\d\/\d\d\/\d\d\d\d$/,
-  'M/d/YY': /^1?\d\/[12]?\d\/\d\d$/,
-  'd-M-yyyy hh:mm:ss a': /^[12]?\d-1?\d-\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
-  'MM/dd/YYYY hh:mm:ss a' : /^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
-  'YYYY-MM-dd HH:mm:ss' : /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d$/,
-  'YYYY-MM-dd HH:mm:ss.S': /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d$/
-};
+export const ISO_MATCHER = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/;
 
-export function timeFormatMatches(format: DruidTimestampFormat, value: string | number): boolean {
+export function timeFormatMatches(format: string, value: string | number): boolean {
   if (format === 'iso') {
     return ISO_MATCHER.test(String(value));
   }
@@ -59,14 +69,11 @@ export function timeFormatMatches(format: DruidTimestampFormat, value: string |
     return MIN_POSIX < absValue && absValue < MAX_POSIX;
   }
 
-  const formatRegexp = JODA_TO_REGEXP_LOOKUP[format];
-  if (!formatRegexp) throw new Error(`unknown Druid format ${format}`);
-
-  return formatRegexp.test(String(value));
+  return jodaFormatToRegExp(format).test(String(value));
 }
 
-export function possibleDruidFormatForValues(values: any[]): DruidTimestampFormat | null {
-  return TIMESTAMP_FORMAT_VALUES.filter(format => {
+export function possibleDruidFormatForValues(values: any[]): string | null {
+  return ALL_FORMAT_VALUES.filter(format => {
     return values.every(value => timeFormatMatches(format, value));
   })[0] || null;
 }
diff --git a/web-console/src/utils/example-ingestion-spec.ts b/web-console/src/utils/example-ingestion-spec.ts
deleted file mode 100644
index 0fd7614..0000000
--- a/web-console/src/utils/example-ingestion-spec.ts
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { IngestionSpec } from './ingestion-spec';
-
-export function getWikipediaSpec(dataSourceSuffix: string): IngestionSpec {
-  return {
-    'type': 'index',
-    'dataSchema': {
-      'dataSource': 'wikipedia-' + dataSourceSuffix,
-      'parser': {
-        'type': 'string',
-        'parseSpec': {
-          'format': 'json',
-          'dimensionsSpec': {
-            'dimensions': [
-              'isRobot',
-              'channel',
-              'flags',
-              'isUnpatrolled',
-              'page',
-              'diffUrl',
-              {
-                'name': 'added',
-                'type': 'long'
-              },
-              'comment',
-              {
-                'name': 'commentLength',
-                'type': 'long'
-              },
-              'isNew',
-              'isMinor',
-              {
-                'name': 'delta',
-                'type': 'long'
-              },
-              'isAnonymous',
-              'user',
-              {
-                'name': 'deltaBucket',
-                'type': 'long'
-              },
-              {
-                'name': 'deleted',
-                'type': 'long'
-              },
-              'namespace'
-            ]
-          },
-          'timestampSpec': {
-            'column': 'timestamp',
-            'format': 'iso'
-          }
-        }
-      },
-      'granularitySpec': {
-        'type': 'uniform',
-        'segmentGranularity': 'DAY',
-        'rollup': false,
-        'queryGranularity': 'none'
-      },
-      'metricsSpec': []
-    },
-    'ioConfig': {
-      'type': 'index',
-      'firehose': {
-        'fetchTimeout': 300000,
-        'type': 'http',
-        'uris': [
-          'https://static.imply.io/data/wikipedia.json.gz'
-        ]
-      }
-    },
-    'tuningConfig': {
-      'type': 'index',
-      'forceExtendableShardSpecs': true,
-      'maxParseExceptions': 100,
-      'maxSavedParseExceptions': 10
-    }
-  };
-}
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx
index e9d92b5..8a29191 100644
--- a/web-console/src/utils/ingestion-spec.tsx
+++ b/web-console/src/utils/ingestion-spec.tsx
@@ -23,7 +23,7 @@ import React from 'react';
 import { Field } from '../components/auto-form/auto-form';
 import { ExternalLink } from '../components/external-link/external-link';
 
-import { TIMESTAMP_FORMAT_VALUES } from './druid-time';
+import { BASIC_FORMAT_VALUES, DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES } from './druid-time';
 import { deepGet, deepSet } from './object-change';
 
 // These constants are used to make sure that they are not constantly recreated thrashing the pure components
@@ -278,7 +278,18 @@ const TIMESTAMP_SPEC_FORM_FIELDS: Field<TimestampSpec>[] = [
     name: 'format',
     type: 'string',
     defaultValue: 'auto',
-    suggestions: ['auto'].concat(TIMESTAMP_FORMAT_VALUES),
+    suggestions: [
+      'auto',
+      ...BASIC_FORMAT_VALUES,
+      {
+        group: 'Date and time formats',
+        suggestions: DATE_TIME_FORMAT_VALUES
+      },
+      {
+        group: 'Date only formats',
+        suggestions: DATE_FORMAT_VALUES
+      }
+    ],
     isDefined: (timestampSpec: TimestampSpec) => isColumnTimestampSpec(timestampSpec),
     info: <p>
       Please specify your timestamp format by using the suggestions menu or typing in a <ExternalLink href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html">format string</ExternalLink>.
diff --git a/web-console/src/utils/joda-to-regexp.spec.ts b/web-console/src/utils/joda-to-regexp.spec.ts
new file mode 100644
index 0000000..5a44280
--- /dev/null
+++ b/web-console/src/utils/joda-to-regexp.spec.ts
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { jodaFormatToRegExp } from './joda-to-regexp';
+
+describe('jodaFormatToRegExp', () => {
+  it('works for common formats', () => {
+    expect(jodaFormatToRegExp('d/M/yyyy').toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('MM/dd/YYYY').toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('M/d/YY').toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('MM/dd/YYYY hh:mm:ss a' ).toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss' ).toString()).toMatchSnapshot();
+    expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').toString()).toMatchSnapshot();
+  });
+
+  it('matches dates when needed', () => {
+    expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').test('26-4-1986 01:23:40 am')).toEqual(true);
+    expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').test('26-4-1986 01:23:40 am')).toEqual(false);
+  });
+
+});
diff --git a/web-console/src/utils/joda-to-regexp.ts b/web-console/src/utils/joda-to-regexp.ts
new file mode 100644
index 0000000..d3d86b4
--- /dev/null
+++ b/web-console/src/utils/joda-to-regexp.ts
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Refer to https://www.joda.org/joda-time/key_format.html
+const TEXT = '\\w+';
+const NUMBER_2_DIGIT = '[0-9]{2}';
+const NUMBER_4_DIGIT = '[0-9]{4}';
+const JODA_FRAGMENT_TO_REG_EXP_STRING: Record<string, string> = {
+  C: '[0-9]{1,2}',
+  CC: NUMBER_2_DIGIT,
+  YY: NUMBER_2_DIGIT,
+  YYYY: NUMBER_4_DIGIT,
+
+  xx: NUMBER_2_DIGIT,
+  xxxx: NUMBER_4_DIGIT,
+  w: '[0-9]{1,2}',
+  ww: NUMBER_2_DIGIT,
+  e: '[0-7]',
+  E: TEXT,
+  EEEE: TEXT,
+
+  yy: NUMBER_2_DIGIT,
+  yyyy: NUMBER_4_DIGIT,
+  D: '[0-9]{1,3}',
+  DD: '[0-9]{2,3}',
+  DDD: '[0-9]{3}',
+  M: '(?:1[0-2]|[1-9])',
+  MM: '(?:1[0-2]|0[1-9])',
+  MMM: TEXT,
+  MMMM: TEXT,
+  d: '(?:3[0-1]|[12][0-9]|[1-9])',
+  dd: '(?:3[0-1]|[12][0-9]|0[1-9])',
+
+  a: '[ap]m',
+  K: '(?:1[01]|[0-9])',
+  KK: '(?:1[01]|0[0-9])',
+  h: '(?:1[0-2]|[1-9])',
+  hh: '(?:1[0-2]|0[1-9])',
+
+  H: '(?:2[0-3]|1[0-9]|[0-9])',
+  HH: '(?:2[0-3]|1[0-9]|0[0-9])',
+  k: '(?:2[0-4]|1[0-9]|[1-9])',
+  kk: '(?:2[0-4]|1[0-9]|0[1-9])',
+  m: '(?:[1-5][0-9]|[0-9])',
+  mm: '[0-5][0-9]',
+  s: '(?:[1-5][0-9]|[0-9])',
+  ss: '[0-5][0-9]',
+  S: '[0-9]{1,3}',
+  SS: '[0-9]{2,3}',
+  SSS: '[0-9]{3}',
+  z: TEXT,
+  Z: TEXT
+};
+
+export function jodaFormatToRegExp(jodaFormat: string): RegExp {
+  const regExpStr = jodaFormat.replace(/([a-zA-Z])\1{0,3}/g, jodaPart => {
+    const re = JODA_FRAGMENT_TO_REG_EXP_STRING[jodaPart];
+    if (!re) throw new Error(`could not convert ${jodaPart} to RegExp`);
+    return re;
+  });
+  return new RegExp(`^${regExpStr}$`, 'i');
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org