You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by fj...@apache.org on 2019/06/10 03:08:38 UTC
[incubator-druid] branch master updated: Support more JODA time
formats (#7857)
This is an automated email from the ASF dual-hosted git repository.
fjy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git
The following commit(s) were added to refs/heads/master by this push:
new c612ddc Support more JODA time formats (#7857)
c612ddc is described below
commit c612ddc0f4a4588e97e94173638f8cda286f7c0c
Author: Vadim Ogievetsky <va...@gmail.com>
AuthorDate: Sun Jun 9 20:08:29 2019 -0700
Support more JODA time formats (#7857)
---
.../src/components/header-bar/header-bar.tsx | 2 -
.../__snapshots__/joda-to-regexp.spec.ts.snap | 15 ++++
web-console/src/utils/druid-time.ts | 51 +++++++-----
web-console/src/utils/example-ingestion-spec.ts | 97 ----------------------
web-console/src/utils/ingestion-spec.tsx | 15 +++-
web-console/src/utils/joda-to-regexp.spec.ts | 37 +++++++++
web-console/src/utils/joda-to-regexp.ts | 77 +++++++++++++++++
7 files changed, 171 insertions(+), 123 deletions(-)
diff --git a/web-console/src/components/header-bar/header-bar.tsx b/web-console/src/components/header-bar/header-bar.tsx
index a229a8e..f48fab3 100644
--- a/web-console/src/components/header-bar/header-bar.tsx
+++ b/web-console/src/components/header-bar/header-bar.tsx
@@ -30,13 +30,11 @@ import {
Position
} from '@blueprintjs/core';
import { IconNames } from '@blueprintjs/icons';
-import classNames from 'classnames';
import React from 'react';
import { AboutDialog } from '../../dialogs/about-dialog/about-dialog';
import { CoordinatorDynamicConfigDialog } from '../../dialogs/coordinator-dynamic-config/coordinator-dynamic-config';
import { OverlordDynamicConfigDialog } from '../../dialogs/overlord-dynamic-config/overlord-dynamic-config';
-import { getWikipediaSpec } from '../../utils/example-ingestion-spec';
import {
DRUID_DOCS,
DRUID_GITHUB,
diff --git a/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap b/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap
new file mode 100644
index 0000000..7ee4beb
--- /dev/null
+++ b/web-console/src/utils/__snapshots__/joda-to-regexp.spec.ts.snap
@@ -0,0 +1,15 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`jodaFormatToRegExp works for common formats 1`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])\\\\/(?:1[0-2]|[1-9])\\\\/[0-9]{4}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 2`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 3`] = `"/^(?:1[0-2]|[1-9])\\\\/(?:3[0-1]|[12][0-9]|[1-9])\\\\/[0-9]{2}$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 4`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])-(?:1[0-2]|[1-9])-[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 5`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 6`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9]$/i"`;
+
+exports[`jodaFormatToRegExp works for common formats 7`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9].[0-9]{1,3}$/i"`;
diff --git a/web-console/src/utils/druid-time.ts b/web-console/src/utils/druid-time.ts
index 44b1761..a3a4520 100644
--- a/web-console/src/utils/druid-time.ts
+++ b/web-console/src/utils/druid-time.ts
@@ -16,14 +16,33 @@
* limitations under the License.
*/
-export type DruidTimestampFormat = 'iso' | 'millis' | 'posix' | 'auto' | 'd/M/yyyy' | 'dd-M-yyyy hh:mm:ss a' |
- 'MM/dd/YYYY' | 'M/d/YY' | 'MM/dd/YYYY hh:mm:ss a' | 'YYYY-MM-dd HH:mm:ss' | 'YYYY-MM-dd HH:mm:ss.S';
+import { jodaFormatToRegExp } from './joda-to-regexp';
-export const TIMESTAMP_FORMAT_VALUES: DruidTimestampFormat[] = [
- 'iso', 'millis', 'posix', 'MM/dd/YYYY hh:mm:ss a', 'MM/dd/YYYY', 'M/d/YY', 'd/M/yyyy',
- 'YYYY-MM-dd HH:mm:ss', 'YYYY-MM-dd HH:mm:ss.S'
+export const BASIC_FORMAT_VALUES: string[] = [
+ 'iso',
+ 'millis',
+ 'posix'
];
+export const DATE_FORMAT_VALUES: string[] = [
+ 'dd/MM/yyyy',
+ 'MM/dd/yyyy',
+ 'd/M/yy',
+ 'M/d/yy',
+ 'd/M/yyyy',
+ 'M/d/yyyy'
+];
+
+export const DATE_TIME_FORMAT_VALUES: string[] = [
+ 'd/M/yyyy H:mm:ss',
+ 'M/d/yyyy H:mm:ss',
+ 'MM/dd/yyyy hh:mm:ss a',
+ 'yyyy-MM-dd HH:mm:ss',
+ 'yyyy-MM-dd HH:mm:ss.S'
+];
+
+const ALL_FORMAT_VALUES: string[] = BASIC_FORMAT_VALUES.concat(DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES);
+
const EXAMPLE_DATE_ISO = '2015-10-29T23:00:00.000Z';
const EXAMPLE_DATE_VALUE = Date.parse(EXAMPLE_DATE_ISO);
const MIN_MILLIS = 3.15576e11; // 3 years in millis, so Tue Jan 01 1980
@@ -33,18 +52,9 @@ const MAX_POSIX = MAX_MILLIS / 1000;
// copied from http://goo.gl/0ejHHW with small tweak to make dddd not pass on its own
// tslint:disable-next-line:max-line-length
-export const ISO_MATCHER = new RegExp(/^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/);
-export const JODA_TO_REGEXP_LOOKUP: Record<string, RegExp> = {
- 'd/M/yyyy': /^[12]?\d\/1?\d\/\d\d\d\d$/,
- 'MM/dd/YYYY': /^\d\d\/\d\d\/\d\d\d\d$/,
- 'M/d/YY': /^1?\d\/[12]?\d\/\d\d$/,
- 'd-M-yyyy hh:mm:ss a': /^[12]?\d-1?\d-\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
- 'MM/dd/YYYY hh:mm:ss a' : /^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
- 'YYYY-MM-dd HH:mm:ss' : /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d$/,
- 'YYYY-MM-dd HH:mm:ss.S': /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d$/
-};
+export const ISO_MATCHER = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/;
-export function timeFormatMatches(format: DruidTimestampFormat, value: string | number): boolean {
+export function timeFormatMatches(format: string, value: string | number): boolean {
if (format === 'iso') {
return ISO_MATCHER.test(String(value));
}
@@ -59,14 +69,11 @@ export function timeFormatMatches(format: DruidTimestampFormat, value: string |
return MIN_POSIX < absValue && absValue < MAX_POSIX;
}
- const formatRegexp = JODA_TO_REGEXP_LOOKUP[format];
- if (!formatRegexp) throw new Error(`unknown Druid format ${format}`);
-
- return formatRegexp.test(String(value));
+ return jodaFormatToRegExp(format).test(String(value));
}
-export function possibleDruidFormatForValues(values: any[]): DruidTimestampFormat | null {
- return TIMESTAMP_FORMAT_VALUES.filter(format => {
+export function possibleDruidFormatForValues(values: any[]): string | null {
+ return ALL_FORMAT_VALUES.filter(format => {
return values.every(value => timeFormatMatches(format, value));
})[0] || null;
}
diff --git a/web-console/src/utils/example-ingestion-spec.ts b/web-console/src/utils/example-ingestion-spec.ts
deleted file mode 100644
index 0fd7614..0000000
--- a/web-console/src/utils/example-ingestion-spec.ts
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { IngestionSpec } from './ingestion-spec';
-
-export function getWikipediaSpec(dataSourceSuffix: string): IngestionSpec {
- return {
- 'type': 'index',
- 'dataSchema': {
- 'dataSource': 'wikipedia-' + dataSourceSuffix,
- 'parser': {
- 'type': 'string',
- 'parseSpec': {
- 'format': 'json',
- 'dimensionsSpec': {
- 'dimensions': [
- 'isRobot',
- 'channel',
- 'flags',
- 'isUnpatrolled',
- 'page',
- 'diffUrl',
- {
- 'name': 'added',
- 'type': 'long'
- },
- 'comment',
- {
- 'name': 'commentLength',
- 'type': 'long'
- },
- 'isNew',
- 'isMinor',
- {
- 'name': 'delta',
- 'type': 'long'
- },
- 'isAnonymous',
- 'user',
- {
- 'name': 'deltaBucket',
- 'type': 'long'
- },
- {
- 'name': 'deleted',
- 'type': 'long'
- },
- 'namespace'
- ]
- },
- 'timestampSpec': {
- 'column': 'timestamp',
- 'format': 'iso'
- }
- }
- },
- 'granularitySpec': {
- 'type': 'uniform',
- 'segmentGranularity': 'DAY',
- 'rollup': false,
- 'queryGranularity': 'none'
- },
- 'metricsSpec': []
- },
- 'ioConfig': {
- 'type': 'index',
- 'firehose': {
- 'fetchTimeout': 300000,
- 'type': 'http',
- 'uris': [
- 'https://static.imply.io/data/wikipedia.json.gz'
- ]
- }
- },
- 'tuningConfig': {
- 'type': 'index',
- 'forceExtendableShardSpecs': true,
- 'maxParseExceptions': 100,
- 'maxSavedParseExceptions': 10
- }
- };
-}
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx
index e9d92b5..8a29191 100644
--- a/web-console/src/utils/ingestion-spec.tsx
+++ b/web-console/src/utils/ingestion-spec.tsx
@@ -23,7 +23,7 @@ import React from 'react';
import { Field } from '../components/auto-form/auto-form';
import { ExternalLink } from '../components/external-link/external-link';
-import { TIMESTAMP_FORMAT_VALUES } from './druid-time';
+import { BASIC_FORMAT_VALUES, DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES } from './druid-time';
import { deepGet, deepSet } from './object-change';
// These constants are used to make sure that they are not constantly recreated thrashing the pure components
@@ -278,7 +278,18 @@ const TIMESTAMP_SPEC_FORM_FIELDS: Field<TimestampSpec>[] = [
name: 'format',
type: 'string',
defaultValue: 'auto',
- suggestions: ['auto'].concat(TIMESTAMP_FORMAT_VALUES),
+ suggestions: [
+ 'auto',
+ ...BASIC_FORMAT_VALUES,
+ {
+ group: 'Date and time formats',
+ suggestions: DATE_TIME_FORMAT_VALUES
+ },
+ {
+ group: 'Date only formats',
+ suggestions: DATE_FORMAT_VALUES
+ }
+ ],
isDefined: (timestampSpec: TimestampSpec) => isColumnTimestampSpec(timestampSpec),
info: <p>
Please specify your timestamp format by using the suggestions menu or typing in a <ExternalLink href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html">format string</ExternalLink>.
diff --git a/web-console/src/utils/joda-to-regexp.spec.ts b/web-console/src/utils/joda-to-regexp.spec.ts
new file mode 100644
index 0000000..5a44280
--- /dev/null
+++ b/web-console/src/utils/joda-to-regexp.spec.ts
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { jodaFormatToRegExp } from './joda-to-regexp';
+
+describe('jodaFormatToRegExp', () => {
+ it('works for common formats', () => {
+ expect(jodaFormatToRegExp('d/M/yyyy').toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('MM/dd/YYYY').toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('M/d/YY').toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('MM/dd/YYYY hh:mm:ss a' ).toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss' ).toString()).toMatchSnapshot();
+ expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').toString()).toMatchSnapshot();
+ });
+
+ it('matches dates when needed', () => {
+ expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').test('26-4-1986 01:23:40 am')).toEqual(true);
+ expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').test('26-4-1986 01:23:40 am')).toEqual(false);
+ });
+
+});
diff --git a/web-console/src/utils/joda-to-regexp.ts b/web-console/src/utils/joda-to-regexp.ts
new file mode 100644
index 0000000..d3d86b4
--- /dev/null
+++ b/web-console/src/utils/joda-to-regexp.ts
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Refer to https://www.joda.org/joda-time/key_format.html
+const TEXT = '\\w+';
+const NUMBER_2_DIGIT = '[0-9]{2}';
+const NUMBER_4_DIGIT = '[0-9]{4}';
+const JODA_FRAGMENT_TO_REG_EXP_STRING: Record<string, string> = {
+ C: '[0-9]{1,2}',
+ CC: NUMBER_2_DIGIT,
+ YY: NUMBER_2_DIGIT,
+ YYYY: NUMBER_4_DIGIT,
+
+ xx: NUMBER_2_DIGIT,
+ xxxx: NUMBER_4_DIGIT,
+ w: '[0-9]{1,2}',
+ ww: NUMBER_2_DIGIT,
+ e: '[0-7]',
+ E: TEXT,
+ EEEE: TEXT,
+
+ yy: NUMBER_2_DIGIT,
+ yyyy: NUMBER_4_DIGIT,
+ D: '[0-9]{1,3}',
+ DD: '[0-9]{2,3}',
+ DDD: '[0-9]{3}',
+ M: '(?:1[0-2]|[1-9])',
+ MM: '(?:1[0-2]|0[1-9])',
+ MMM: TEXT,
+ MMMM: TEXT,
+ d: '(?:3[0-1]|[12][0-9]|[1-9])',
+ dd: '(?:3[0-1]|[12][0-9]|0[1-9])',
+
+ a: '[ap]m',
+ K: '(?:1[01]|[0-9])',
+ KK: '(?:1[01]|0[0-9])',
+ h: '(?:1[0-2]|[1-9])',
+ hh: '(?:1[0-2]|0[1-9])',
+
+ H: '(?:2[0-3]|1[0-9]|[0-9])',
+ HH: '(?:2[0-3]|1[0-9]|0[0-9])',
+ k: '(?:2[0-4]|1[0-9]|[1-9])',
+ kk: '(?:2[0-4]|1[0-9]|0[1-9])',
+ m: '(?:[1-5][0-9]|[0-9])',
+ mm: '[0-5][0-9]',
+ s: '(?:[1-5][0-9]|[0-9])',
+ ss: '[0-5][0-9]',
+ S: '[0-9]{1,3}',
+ SS: '[0-9]{2,3}',
+ SSS: '[0-9]{3}',
+ z: TEXT,
+ Z: TEXT
+};
+
+export function jodaFormatToRegExp(jodaFormat: string): RegExp {
+ const regExpStr = jodaFormat.replace(/([a-zA-Z])\1{0,3}/g, jodaPart => {
+ const re = JODA_FRAGMENT_TO_REG_EXP_STRING[jodaPart];
+ if (!re) throw new Error(`could not convert ${jodaPart} to RegExp`);
+ return re;
+ });
+ return new RegExp(`^${regExpStr}$`, 'i');
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org