You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ab...@apache.org on 2023/05/03 08:33:19 UTC

[druid] branch 26.0.0 updated: Web console: allow stringly schemas in the data loader (#14189) (#14203)

This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch 26.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/26.0.0 by this push:
     new 3f58b6b726 Web console: allow stringly schemas in the data loader (#14189) (#14203)
3f58b6b726 is described below

commit 3f58b6b7268a8d30f731392ee6197b831d630a80
Author: Vadim Ogievetsky <va...@ogievetsky.com>
AuthorDate: Wed May 3 01:33:03 2023 -0700

    Web console: allow stringly schemas in the data loader (#14189) (#14203)
    
    Backport of #14189 to 26
---
 .../src/druid-models/execution/execution.ts        |   5 +-
 .../ingestion-spec/ingestion-spec.spec.ts          |   4 +-
 .../druid-models/ingestion-spec/ingestion-spec.tsx |  52 +++++---
 .../src/views/load-data-view/info-messages.tsx     |   8 +-
 .../src/views/load-data-view/load-data-view.tsx    | 135 ++++++++++++++++-----
 5 files changed, 144 insertions(+), 60 deletions(-)

diff --git a/web-console/src/druid-models/execution/execution.ts b/web-console/src/druid-models/execution/execution.ts
index 97ecc14b2f..fd2c0881dd 100644
--- a/web-console/src/druid-models/execution/execution.ts
+++ b/web-console/src/druid-models/execution/execution.ts
@@ -140,10 +140,11 @@ function formatPendingMessage(
 
   // If there are not enough slots free then there are two cases:
   if (totalNeeded <= totalTaskSlots) {
-    // (1) not enough free, but enough total: "Launched 2/4 tasks. Cluster is currently using 5/6 task slots. Waiting for 1 other task to finish."
+    // (1) not enough free, but enough total: "Launched 2/4 tasks. Cluster is currently using 5/6 task slots. Waiting for 1 task slot to become available."
     const tasksThatNeedToFinish = pendingTasks - availableTaskSlots;
     return (
-      baseMessage + ` Waiting for ${pluralIfNeeded(tasksThatNeedToFinish, 'other task')} to finish.`
+      baseMessage +
+      ` Waiting for ${pluralIfNeeded(tasksThatNeedToFinish, 'task slot')} to become available.`
     );
   } else {
     // (2) not enough total: "Launched 2/4 tasks. Cluster is currently using 2/2 task slots. Add more capacity or reduce maxNumTasks to 2 or lower."
diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
index 03a7cd1d26..79a4d93e30 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
@@ -757,7 +757,7 @@ describe('spec utils', () => {
   });
 
   it('updateSchemaWithSample', () => {
-    const withRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'specific', true);
+    const withRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'fixed', true);
 
     expect(withRollup).toMatchInlineSnapshot(`
       Object {
@@ -822,7 +822,7 @@ describe('spec utils', () => {
       }
     `);
 
-    const noRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'specific', false);
+    const noRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'fixed', false);
 
     expect(noRollup).toMatchInlineSnapshot(`
       Object {
diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
index de3e39e3fd..8741668b59 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
@@ -267,14 +267,17 @@ export interface DataSchema {
   metricsSpec?: MetricSpec[];
 }
 
-export type DimensionMode = 'specific' | 'auto-detect';
+export type SchemaMode = 'fixed' | 'string-only-discovery' | 'type-aware-discovery';
 
-export function getDimensionMode(spec: Partial<IngestionSpec>): DimensionMode {
+export function getSchemaMode(spec: Partial<IngestionSpec>): SchemaMode {
   if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery') === true) {
-    return 'auto-detect';
+    return 'type-aware-discovery';
+  }
+  if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions') === true) {
+    return 'string-only-discovery';
   }
   const dimensions = deepGet(spec, 'spec.dataSchema.dimensionsSpec.dimensions') || EMPTY_ARRAY;
-  return Array.isArray(dimensions) && dimensions.length === 0 ? 'auto-detect' : 'specific';
+  return Array.isArray(dimensions) && dimensions.length === 0 ? 'string-only-discovery' : 'fixed';
 }
 
 export function getRollup(spec: Partial<IngestionSpec>): boolean {
@@ -2418,7 +2421,7 @@ function getTypeHintsFromSpec(spec: Partial<IngestionSpec>): Record<string, stri
 export function updateSchemaWithSample(
   spec: Partial<IngestionSpec>,
   sampleResponse: SampleResponse,
-  dimensionMode: DimensionMode,
+  schemaMode: SchemaMode,
   rollup: boolean,
   forcePartitionInitialization = false,
 ): Partial<IngestionSpec> {
@@ -2429,20 +2432,31 @@ export function updateSchemaWithSample(
 
   let newSpec = spec;
 
-  if (dimensionMode === 'auto-detect') {
-    newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true);
-    newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions', true);
-    newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
-    newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
-  } else {
-    newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
-    newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
-    newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions');
-    newSpec = deepSet(
-      newSpec,
-      'spec.dataSchema.dimensionsSpec.dimensions',
-      getDimensionSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers, rollup),
-    );
+  switch (schemaMode) {
+    case 'type-aware-discovery':
+      newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true);
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+      newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
+      break;
+
+    case 'string-only-discovery':
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+      newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
+      break;
+
+    case 'fixed':
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+      newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions');
+      newSpec = deepSet(
+        newSpec,
+        'spec.dataSchema.dimensionsSpec.dimensions',
+        getDimensionSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers, rollup),
+      );
+      break;
   }
 
   if (rollup) {
diff --git a/web-console/src/views/load-data-view/info-messages.tsx b/web-console/src/views/load-data-view/info-messages.tsx
index 5c93b7a107..b88cf8a70c 100644
--- a/web-console/src/views/load-data-view/info-messages.tsx
+++ b/web-console/src/views/load-data-view/info-messages.tsx
@@ -20,7 +20,7 @@ import { Button, Callout, Code, FormGroup, Intent } from '@blueprintjs/core';
 import React from 'react';
 
 import { ExternalLink, LearnMore } from '../../components';
-import type { DimensionMode, IngestionSpec } from '../../druid-models';
+import type { IngestionSpec, SchemaMode } from '../../druid-models';
 import { getIngestionDocLink } from '../../druid-models';
 import { getLink } from '../../links';
 import { deepGet, deepSet } from '../../utils';
@@ -129,11 +129,11 @@ export const FilterMessage = React.memo(function FilterMessage() {
 });
 
 export interface SchemaMessageProps {
-  dimensionMode: DimensionMode;
+  schemaMode: SchemaMode;
 }
 
 export const SchemaMessage = React.memo(function SchemaMessage(props: SchemaMessageProps) {
-  const { dimensionMode } = props;
+  const { schemaMode } = props;
 
   return (
     <FormGroup>
@@ -142,7 +142,7 @@ export const SchemaMessage = React.memo(function SchemaMessage(props: SchemaMess
           Each column in Druid must have an assigned type (string, long, float, double, complex,
           etc).
         </p>
-        {dimensionMode === 'specific' && (
+        {schemaMode === 'fixed' && (
           <p>
             Default primitive types have been automatically assigned to your columns. If you want to
             change the type, click on the column header.
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 9d698fe1de..b332d277d3 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -56,7 +56,6 @@ import {
 } from '../../components';
 import { AsyncActionDialog } from '../../dialogs';
 import type {
-  DimensionMode,
   DimensionSpec,
   DruidFilter,
   FlattenField,
@@ -65,6 +64,7 @@ import type {
   InputFormat,
   IoConfig,
   MetricSpec,
+  SchemaMode,
   TimestampSpec,
   Transform,
   TuningConfig,
@@ -84,7 +84,6 @@ import {
   FILTER_FIELDS,
   FILTERS_FIELDS,
   FLATTEN_FIELD_FIELDS,
-  getDimensionMode,
   getDimensionSpecName,
   getIngestionComboType,
   getIngestionImage,
@@ -95,6 +94,7 @@ import {
   getMetricSpecName,
   getRequiredModule,
   getRollup,
+  getSchemaMode,
   getSecondaryPartitionRelatedFormFields,
   getSpecType,
   getTimestampExpressionFields,
@@ -367,7 +367,7 @@ export interface LoadDataViewState {
   continueToSpec: boolean;
   showResetConfirm: boolean;
   newRollup?: boolean;
-  newDimensionMode?: DimensionMode;
+  newSchemaMode?: SchemaMode;
 
   // welcome
   overlordModules?: string[];
@@ -414,6 +414,7 @@ export interface LoadDataViewState {
   selectedMetricSpec?: SelectedIndex<MetricSpec>;
 
   // for final step
+  existingDatasources?: string[];
   submitting: boolean;
 }
 
@@ -666,8 +667,10 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
       case 'partition':
       case 'publish':
       case 'tuning':
-      case 'spec':
         return;
+
+      case 'spec':
+        return this.queryForSpec();
     }
   }
 
@@ -1994,7 +1997,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
               newSpec = updateSchemaWithSample(
                 newSpec,
                 transformQueryState.data,
-                'specific',
+                'fixed',
                 typeof currentRollup === 'boolean' ? currentRollup : DEFAULT_ROLLUP_SETTING,
               );
             }
@@ -2197,7 +2200,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
               newSpec = updateSchemaWithSample(
                 newSpec,
                 filterQueryState.data,
-                'specific',
+                'fixed',
                 typeof currentRollup === 'boolean' ? currentRollup : DEFAULT_ROLLUP_SETTING,
               );
             }
@@ -2308,7 +2311,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
     const somethingSelected = Boolean(
       selectedAutoDimension || selectedDimensionSpec || selectedMetricSpec,
     );
-    const dimensionMode = getDimensionMode(spec);
+    const schemaMode = getSchemaMode(spec);
 
     let mainFill: JSX.Element | string;
     if (schemaQueryState.isInit()) {
@@ -2349,7 +2352,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
       <>
         <div className="main">{mainFill}</div>
         <div className="control">
-          <SchemaMessage dimensionMode={dimensionMode} />
+          <SchemaMessage schemaMode={schemaMode} />
           {!somethingSelected && (
             <>
               <FormGroupWithInfo
@@ -2375,16 +2378,16 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
                 }
               >
                 <Switch
-                  checked={dimensionMode === 'specific'}
+                  checked={schemaMode === 'fixed'}
                   onChange={() =>
                     this.setState({
-                      newDimensionMode: dimensionMode === 'specific' ? 'auto-detect' : 'specific',
+                      newSchemaMode: schemaMode === 'fixed' ? 'string-only-discovery' : 'fixed',
                     })
                   }
-                  label="Explicitly specify dimension list"
+                  label="Explicitly specify schema"
                 />
               </FormGroupWithInfo>
-              {dimensionMode === 'auto-detect' && (
+              {schemaMode !== 'fixed' && (
                 <AutoForm
                   fields={[
                     {
@@ -2470,7 +2473,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
               <FormGroup>
                 <Button
                   text="Add dimension"
-                  disabled={dimensionMode !== 'specific'}
+                  disabled={schemaMode !== 'fixed'}
                   onClick={() => {
                     this.setState({
                       selectedDimensionSpec: {
@@ -2606,7 +2609,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
         action={async () => {
           const sampleResponse = await sampleForTransform(spec, cacheRows);
           this.updateSpec(
-            updateSchemaWithSample(spec, sampleResponse, getDimensionMode(spec), newRollup, true),
+            updateSchemaWithSample(spec, sampleResponse, getSchemaMode(spec), newRollup, true),
           );
         }}
         confirmButtonText={`Yes - ${newRollup ? 'enable' : 'disable'} rollup`}
@@ -2622,32 +2625,55 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
   }
 
   renderChangeDimensionModeAction() {
-    const { newDimensionMode, spec, cacheRows } = this.state;
-    if (typeof newDimensionMode === 'undefined' || !cacheRows) return;
-    const autoDetect = newDimensionMode === 'auto-detect';
+    const { newSchemaMode, spec, cacheRows } = this.state;
+    if (!newSchemaMode || !cacheRows) return;
+    const autoDetect = newSchemaMode !== 'fixed';
 
     return (
       <AsyncActionDialog
         action={async () => {
           const sampleResponse = await sampleForTransform(spec, cacheRows);
           this.updateSpec(
-            updateSchemaWithSample(spec, sampleResponse, newDimensionMode, getRollup(spec)),
+            updateSchemaWithSample(spec, sampleResponse, newSchemaMode, getRollup(spec)),
           );
         }}
-        confirmButtonText={`Yes - ${autoDetect ? 'auto detect' : 'explicitly set'} columns`}
-        successText={`Dimension mode changes to ${
-          autoDetect ? 'auto detect' : 'specific list'
-        }. Schema has been updated.`}
-        failText="Could change dimension mode"
+        confirmButtonText={`Yes - ${autoDetect ? 'auto detect' : 'explicitly define'} schema`}
+        successText={`Schema mode changed to ${autoDetect ? 'auto detect' : 'explicitly defined'}.`}
+        failText="Could not change schema mode"
         intent={Intent.WARNING}
-        onClose={() => this.setState({ newDimensionMode: undefined })}
+        onClose={() => this.setState({ newSchemaMode: undefined })}
       >
         <p>
           {autoDetect
-            ? `Are you sure you don't want to explicitly specify a dimension list?`
-            : `Are you sure you want to explicitly specify a dimension list?`}
+            ? `Are you sure you want Druid to auto detect the data schema?`
+            : `Are you sure you want to explicitly specify a schema?`}
         </p>
-        <p>Making this change will reset any work you have done in this section.</p>
+        <p>Making this change will reset all schema configuration done so far.</p>
+        {autoDetect && (
+          <Switch
+            checked={newSchemaMode === 'type-aware-discovery'}
+            onChange={() => {
+              this.setState({
+                newSchemaMode:
+                  newSchemaMode === 'string-only-discovery'
+                    ? 'type-aware-discovery'
+                    : 'string-only-discovery',
+              });
+            }}
+          >
+            Use the new type-aware schema discovery capability. Avoid this if you are appending to a
+            datasource created with string-only schema discovery of Druid and want to preserve
+            schema compatibility. For more information see the{' '}
+            <ExternalLink
+              href={`${getLink(
+                'DOCS',
+              )}/ingestion/schema-design.html#schema-auto-discovery-for-dimensions`}
+            >
+              documentation
+            </ExternalLink>
+            .
+          </Switch>
+        )}
       </AsyncActionDialog>
     );
   }
@@ -2688,7 +2714,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
   renderDimensionSpecControls() {
     const { spec, selectedDimensionSpec } = this.state;
     if (!selectedDimensionSpec) return;
-    const dimensionMode = getDimensionMode(spec);
+    const schemaMode = getSchemaMode(spec);
 
     const dimensions = deepGet(spec, `spec.dataSchema.dimensionsSpec.dimensions`) || EMPTY_ARRAY;
 
@@ -2709,7 +2735,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
 
     const convertToMetric = (type: string, prefix: string) => {
       const specWithoutDimension =
-        dimensionMode === 'specific'
+        schemaMode === 'fixed'
           ? deepDelete(
               spec,
               `spec.dataSchema.dimensionsSpec.dimensions.${selectedDimensionSpec.index}`,
@@ -2811,7 +2837,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
   renderMetricSpecControls() {
     const { spec, selectedMetricSpec } = this.state;
     if (!selectedMetricSpec) return;
-    const dimensionMode = getDimensionMode(spec);
+    const schemaMode = getSchemaMode(spec);
     const selectedMetricSpecFieldName = selectedMetricSpec.value.fieldName;
 
     const convertToDimension = (type: string) => {
@@ -2862,7 +2888,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
         }
       >
         {selectedMetricSpec.index !== -1 &&
-          dimensionMode === 'specific' &&
+          schemaMode === 'fixed' &&
           selectedMetricSpecFieldName && (
             <FormGroup>
               <Popover2 content={convertToDimensionMenu}>
@@ -3242,9 +3268,24 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
     }
   };
 
+  async queryForSpec() {
+    let existingDatasources: string[];
+    try {
+      existingDatasources = (await Api.instance.get<string[]>('/druid/coordinator/v1/datasources'))
+        .data;
+    } catch {
+      return;
+    }
+
+    this.setState({
+      existingDatasources,
+    });
+  }
+
   renderSpecStep() {
-    const { spec, submitting } = this.state;
+    const { spec, existingDatasources, submitting } = this.state;
     const issueWithSpec = getIssueWithSpec(spec);
+    const datasource = deepGet(spec, 'spec.dataSchema.dataSource');
 
     return (
       <>
@@ -3267,6 +3308,34 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
               >{`There is an issue with the spec: ${issueWithSpec}`}</Callout>
             </FormGroup>
           )}
+          {getSchemaMode(spec) === 'type-aware-discovery' &&
+            existingDatasources?.includes(datasource) && (
+              <FormGroup>
+                <Callout intent={Intent.WARNING}>
+                  <p>
+                    You have enabled type-aware schema discovery (
+                    <Code>useSchemaDiscovery: true</Code>) to ingest data into the existing
+                    datasource <Code>{datasource}</Code>.
+                  </p>
+                  <p>
+                    If you used string-based schema discovery when first ingesting data to{' '}
+                    <Code>{datasource}</Code>, using type-aware schema discovery now can cause
+                    problems with the values multi-value string dimensions.
+                  </p>
+                  <p>
+                    For more information see the{' '}
+                    <ExternalLink
+                      href={`${getLink(
+                        'DOCS',
+                      )}/ingestion/schema-design.html#schema-auto-discovery-for-dimensions`}
+                    >
+                      documentation
+                    </ExternalLink>
+                    .
+                  </p>
+                </Callout>
+              </FormGroup>
+            )}
           <AppendToExistingIssue spec={spec} onChangeSpec={this.updateSpec} />
         </div>
         <div className="next-bar">
@@ -3279,7 +3348,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
             />
           )}
           <Button
-            text="Submit"
+            text={submitting ? 'Submitting...' : 'Submit'}
             rightIcon={IconNames.CLOUD_UPLOAD}
             intent={Intent.PRIMARY}
             disabled={submitting || Boolean(issueWithSpec)}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org