You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@echarts.apache.org by sh...@apache.org on 2021/07/13 14:30:18 UTC

[echarts] branch dataset-perf updated: refact(data): omit unused dimensions in createDimensions

This is an automated email from the ASF dual-hosted git repository.

shenyi pushed a commit to branch dataset-perf
in repository https://gitbox.apache.org/repos/asf/echarts.git


The following commit(s) were added to refs/heads/dataset-perf by this push:
     new 53113c4  refact(data): omit unused dimensions in createDimensions
53113c4 is described below

commit 53113c44a5141851fab313630c9da9a984d1fa9c
Author: pissang <bm...@gmail.com>
AuthorDate: Tue Jul 13 22:28:59 2021 +0800

    refact(data): omit unused dimensions in createDimensions
---
 src/chart/helper/createSeriesDataFromArray.ts |  50 ++++---
 src/data/DataStorage.ts                       |  10 +-
 src/data/helper/createDimensions.ts           | 180 ++++++++++++++++----------
 src/data/helper/dimensionHelper.ts            |  11 --
 src/data/helper/sourceManager.ts              |   4 +-
 5 files changed, 146 insertions(+), 109 deletions(-)

diff --git a/src/chart/helper/createSeriesDataFromArray.ts b/src/chart/helper/createSeriesDataFromArray.ts
index 02d7176..b84bf0e 100644
--- a/src/chart/helper/createSeriesDataFromArray.ts
+++ b/src/chart/helper/createSeriesDataFromArray.ts
@@ -19,8 +19,8 @@
 
 import * as zrUtil from 'zrender/src/core/util';
 import SeriesData from '../../data/SeriesData';
-import createDimensions, { CreateDimensionsParams } from '../../data/helper/createDimensions';
-import {getDimensionTypeByAxis, omitUnusedDimensions} from '../../data/helper/dimensionHelper';
+import createDimensions, { CreateDimensionsParams, getDimCount } from '../../data/helper/createDimensions';
+import {getDimensionTypeByAxis} from '../../data/helper/dimensionHelper';
 import {getDataItemValue} from '../../util/model';
 import CoordinateSystem from '../../core/CoordinateSystem';
 import {getCoordSysInfoBySeries} from '../../model/referHelper';
@@ -120,38 +120,46 @@ function createListFromArray(
         );
     }
 
+    const source = isDataStorage(sourceOrStore) ? sourceOrStore.getSource() : sourceOrStore;
     const coordSysInfo = getCoordSysInfoBySeries(seriesModel);
     const coordSysDimDefs = getCoordSysDimDefs(seriesModel, coordSysInfo);
     const useEncodeDefaulter = opt.useEncodeDefaulter;
 
-    // NOTE: don't call createDimensions on same source multiple times.
-    // It will break the encodeDefaulter which has sideeffects.
-    let dimInfoList = createDimensions(sourceOrStore, {
+    // Try to ignore unsed dimensions if sharing a high dimension datastorage
+    // 10 is an experience value.
+    const omitUnusedDimensions = isDataStorage(sourceOrStore) && sourceOrStore.getDimensionCount() > 10;
+    const encodeDefaulter = zrUtil.isFunction(useEncodeDefaulter)
+        ? useEncodeDefaulter
+        : useEncodeDefaulter
+        ? zrUtil.curry(makeSeriesEncodeForAxisCoordSys, coordSysDimDefs, seriesModel)
+        : null;
+
+    const createDimensionOptions = {
         coordDimensions: coordSysDimDefs,
         generateCoord: opt.generateCoord,
-        encodeDefine: seriesModel.getEncode(),
-        encodeDefaulter: zrUtil.isFunction(useEncodeDefaulter)
-            ? useEncodeDefaulter
-            : useEncodeDefaulter
-            ? zrUtil.curry(makeSeriesEncodeForAxisCoordSys, coordSysDimDefs, seriesModel)
-            : null
-    });
+        encodeDefine: seriesModel.getEncode()
+            // NOTE: If we call createDimensions on same source multiple times.
+            // It will break the encodeDefaulter which has sideeffects.
+            // So we prepare the default encode here instead of passing encoderDefaulter function.
+            || (encodeDefaulter && encodeDefaulter(
+                source, getDimCount(source, coordSysDimDefs, source.dimensionsDefine || [])
+            )),
+        omitUnusedDimensions
+    };
+    let dimInfoList = createDimensions(sourceOrStore, createDimensionOptions);
     let firstCategoryDimIndex = injectOrdinalMeta(dimInfoList, opt.createInvertedIndices, coordSysInfo);
 
-    // Try to ignore unsed dimensions if sharing a high dimension datastorage
-    // 10 is an experience value.
-    if (isDataStorage(sourceOrStore) && sourceOrStore.getDimensionCount() > 10) {
-        const omitedDimInfoList = omitUnusedDimensions(dimInfoList);
+    if (omitUnusedDimensions) {
         // sourceOrStore
-        if (sourceOrStore.syncDimensionTypes(omitedDimInfoList)) {
-            dimInfoList = omitedDimInfoList;
-        }
-        else {
+        if (!(sourceOrStore as DataStorage).syncDimensionTypes(dimInfoList)) {
+            dimInfoList = createDimensions(sourceOrStore, zrUtil.extend(createDimensionOptions, {
+                omitUnusedDimensions: true
+            }));
             // Fallback
             firstCategoryDimIndex = injectOrdinalMeta(
                 dimInfoList, opt.createInvertedIndices, coordSysInfo
             );
-            sourceOrStore = sourceOrStore.getSource();
+            sourceOrStore = source;
         }
     }
 
diff --git a/src/data/DataStorage.ts b/src/data/DataStorage.ts
index c089e1b..7bc93f6 100644
--- a/src/data/DataStorage.ts
+++ b/src/data/DataStorage.ts
@@ -34,6 +34,12 @@ import { Source } from './Source';
 const UNDEFINED = 'undefined';
 /* global Float64Array, Int32Array, Uint32Array, Uint16Array */
 
+// Caution: MUST not use `new CtorUint32Array(arr, 0, len)`, because the Ctor of array is
+// different from the Ctor of typed array.
+export const CtorUint32Array = typeof Uint32Array === UNDEFINED ? Array : Uint32Array;
+export const CtorUint16Array = typeof Uint16Array === UNDEFINED ? Array : Uint16Array;
+export const CtorInt32Array = typeof Int32Array === UNDEFINED ? Array : Int32Array;
+export const CtorFloat64Array = typeof Float64Array === UNDEFINED ? Array : Int32Array;
 /**
  * Multi dimensional data storage
  */
@@ -54,10 +60,6 @@ type DataTypedArray = Uint32Array | Int32Array | Uint16Array | Float64Array;
 type DataTypedArrayConstructor = typeof Uint32Array | typeof Int32Array | typeof Uint16Array | typeof Float64Array;
 type DataArrayLikeConstructor = typeof Array | DataTypedArrayConstructor;
 
-// Caution: MUST not use `new CtorUint32Array(arr, 0, len)`, because the Ctor of array is
-// different from the Ctor of typed array.
-const CtorUint32Array = typeof Uint32Array === UNDEFINED ? Array : Uint32Array;
-const CtorUint16Array = typeof Uint16Array === UNDEFINED ? Array : Uint16Array;
 
 type DataValueChunk = ArrayLike<ParsedValue>;
 
diff --git a/src/data/helper/createDimensions.ts b/src/data/helper/createDimensions.ts
index 57be8e9..efd1dfb 100644
--- a/src/data/helper/createDimensions.ts
+++ b/src/data/helper/createDimensions.ts
@@ -29,10 +29,10 @@ import {
 } from '../../util/types';
 import SeriesData from '../SeriesData';
 import DataDimensionInfo from '../DataDimensionInfo';
-import { clone, createHashMap, defaults, each, extend, HashMap, isObject, isString, keys } from 'zrender/src/core/util';
+import { clone, createHashMap, defaults, each, extend, HashMap, isObject, isString, keys, map } from 'zrender/src/core/util';
 import OrdinalMeta from '../OrdinalMeta';
 import { createSourceFromSeriesDataOption, isSourceInstance, Source } from '../Source';
-import DataStorage from '../DataStorage';
+import DataStorage, { CtorInt32Array, CtorUint32Array } from '../DataStorage';
 import { normalizeToArray } from '../../util/model';
 import { BE_ORDINAL, guessOrdinal } from './sourceHelper';
 
@@ -62,7 +62,13 @@ export type CreateDimensionsParams = {
      */
     encodeDefaulter?: EncodeDefaulter,
     generateCoord?: string,
-    generateCoordCount?: number
+    generateCoordCount?: number,
+
+    /**
+     * If omit unused dimension
+     * Used to improve the performance on high dimension data.
+     */
+    omitUnusedDimensions?: boolean
 };
 
 /**
@@ -96,37 +102,57 @@ export default function createDimensions(
     const dimsDef = (opt.dimensionsDefine || source.dimensionsDefine || []).slice();
     const dataDimNameMap = createHashMap<DimensionIndex, DimensionName>();
     const coordDimNameMap = createHashMap<true, DimensionName>();
-    // let valueCandidate;
     const result: DataDimensionInfo[] = [];
+    const omitUnusedDimensions = opt.omitUnusedDimensions;
 
     const dimCount = getDimCount(source, sysDims, dimsDef, opt.dimensionsCount);
 
+    let encodeDef = opt.encodeDefine;
+    if (!encodeDef && opt.encodeDefaulter) {
+        encodeDef = opt.encodeDefaulter(source, dimCount);
+    }
+    const encodeDefMap = createHashMap<DimensionIndex[] | false, DimensionName>(encodeDef as any);
+
+    const indicesMap = new CtorInt32Array(dimCount);
+    for (let i = 0; i < indicesMap.length; i++) {
+        indicesMap[i] = -1;
+    }
+    function getResultItem(dimIdx: number) {
+        const idx = indicesMap[dimIdx];
+        if (idx < 0) {
+            const dimDefItemRaw = dimsDef[dimIdx];
+            const dimDefItem = isObject(dimDefItemRaw) ? dimDefItemRaw : { name: dimDefItemRaw };
+            const resultItem = new DataDimensionInfo();
+            const userDimName = dimDefItem.name;
+            if (dataDimNameMap.get(userDimName) != null) {
+                resultItem.name = resultItem.displayName = userDimName;
+            }
+            dimDefItem.type != null && (resultItem.type = dimDefItem.type);
+            dimDefItem.displayName != null && (resultItem.displayName = dimDefItem.displayName);
+            const newIdx = result.length;
+            indicesMap[dimIdx] = newIdx;
+            result.push(resultItem);
+            return resultItem;
+        }
+        return result[idx];
+    }
+
     // Apply user defined dims (`name` and `type`) and init result.
     for (let i = 0; i < dimCount; i++) {
         const dimDefItemRaw = dimsDef[i];
-        const dimDefItem = dimsDef[i] = extend(
-            {}, isObject(dimDefItemRaw) ? dimDefItemRaw : { name: dimDefItemRaw }
-        );
-
-        const userDimName = dimDefItem.name;
-        const resultItem = result[i] = new DataDimensionInfo();
+        const userDimName = isObject(dimDefItemRaw) ? dimDefItemRaw.name : dimDefItemRaw;
         // Name will be applied later for avoiding duplication.
         if (userDimName != null && dataDimNameMap.get(userDimName) == null) {
             // Only if `series.dimensions` is defined in option
             // displayName, will be set, and dimension will be diplayed vertically in
             // tooltip by default.
-            resultItem.name = resultItem.displayName = userDimName;
             dataDimNameMap.set(userDimName, i);
         }
-        dimDefItem.type != null && (resultItem.type = dimDefItem.type);
-        dimDefItem.displayName != null && (resultItem.displayName = dimDefItem.displayName);
-    }
 
-    let encodeDef = opt.encodeDefine;
-    if (!encodeDef && opt.encodeDefaulter) {
-        encodeDef = opt.encodeDefaulter(source, dimCount);
+        if (!omitUnusedDimensions) {
+            getResultItem(i);
+        }
     }
-    const encodeDefMap = createHashMap<DimensionIndex[] | false, DimensionName>(encodeDef as any);
 
     // Set `coordDim` and `coordDimIndex` by `encodeDefMap` and normalize `encodeDefMap`.
     encodeDefMap.each(function (dataDimsRaw, coordDim) {
@@ -148,7 +174,7 @@ export default function createDimensions(
                 : resultDimIdxOrName;
             if (resultDimIdx != null && resultDimIdx < dimCount) {
                 validDataDims[idx] = resultDimIdx;
-                applyDim(result[resultDimIdx], coordDim, idx);
+                applyDim(getResultItem(resultDimIdx), coordDim, idx);
             }
         });
     });
@@ -190,16 +216,16 @@ export default function createDimensions(
         // dimensions provides default dim sequences.
         if (!dataDims.length) {
             for (let i = 0; i < (sysDimItemDimsDef && sysDimItemDimsDef.length || 1); i++) {
-                while (availDimIdx < result.length && result[availDimIdx].coordDim != null) {
+                while (availDimIdx < dimCount && getResultItem(availDimIdx).coordDim != null) {
                     availDimIdx++;
                 }
-                availDimIdx < result.length && dataDims.push(availDimIdx++);
+                availDimIdx < dimCount && dataDims.push(availDimIdx++);
             }
         }
 
         // Apply templates.
         each(dataDims, function (resultDimIdx, coordDimIndex) {
-            const resultItem = result[resultDimIdx];
+            const resultItem = getResultItem(resultDimIdx);
             applyDim(defaults(resultItem, sysDimItem), coordDim, coordDimIndex);
             if (resultItem.name == null && sysDimItemDimsDef) {
                 let sysDimItemDimsDefItem = sysDimItemDimsDef[coordDimIndex];
@@ -233,59 +259,71 @@ export default function createDimensions(
     let dataDimNameAutoIdx = 0;
 
     // Set dim `name` and other `coordDim` and other props.
-    for (let resultDimIdx = 0; resultDimIdx < dimCount; resultDimIdx++) {
-        const resultItem = result[resultDimIdx] = result[resultDimIdx] || new DataDimensionInfo();
-        const coordDim = resultItem.coordDim;
+    if (!omitUnusedDimensions) {
+        for (let resultDimIdx = 0; resultDimIdx < dimCount; resultDimIdx++) {
+            const resultItem = getResultItem(resultDimIdx);
+            const coordDim = resultItem.coordDim;
 
-        if (coordDim == null) {
-            const res = genName(
-                extra, coordDimNameMap, coordDimNameAutoIdx, fromZero
-            );
-            coordDimNameAutoIdx = res.autoIdx;
-            resultItem.coordDim = res.name;
-            resultItem.coordDimIndex = 0;
-            // Series specified generateCoord is using out.
-            if (!generateCoord || generateCoordCount <= 0) {
-                resultItem.isExtraCoord = true;
+            if (coordDim == null) {
+                const res = genName(
+                    extra, coordDimNameMap, coordDimNameAutoIdx, fromZero
+                );
+                coordDimNameAutoIdx = res.autoIdx;
+                resultItem.coordDim = res.name;
+                resultItem.coordDimIndex = 0;
+                // Series specified generateCoord is using out.
+                if (!generateCoord || generateCoordCount <= 0) {
+                    resultItem.isExtraCoord = true;
+                }
+                generateCoordCount--;
             }
-            generateCoordCount--;
-        }
 
-        if (resultItem.name == null) {
-            const res = genName(
-                resultItem.coordDim, dataDimNameMap, dataDimNameAutoIdx, false
-            );
-            resultItem.name = res.name;
-            dataDimNameAutoIdx = res.autoIdx;
-        }
+            if (resultItem.name == null) {
+                const res = genName(
+                    resultItem.coordDim, dataDimNameMap, dataDimNameAutoIdx, false
+                );
+                resultItem.name = res.name;
+                dataDimNameAutoIdx = res.autoIdx;
+            }
 
-        if (resultItem.type == null
-            && (
-                guessOrdinal(source, resultDimIdx) === BE_ORDINAL.Must
-                // Consider the case:
-                // {
-                //    dataset: {source: [
-                //        ['2001', 123],
-                //        ['2002', 456],
-                //        ...
-                //        ['The others', 987],
-                //    ]},
-                //    series: {type: 'pie'}
-                // }
-                // The first colum should better be treated as a "ordinal" although it
-                // might not able to be detected as an "ordinal" by `guessOrdinal`.
-                || (resultItem.isExtraCoord
-                    && (resultItem.otherDims.itemName != null
-                        || resultItem.otherDims.seriesName != null
+            if (resultItem.type == null
+                && (
+                    guessOrdinal(source, resultDimIdx) === BE_ORDINAL.Must
+                    // Consider the case:
+                    // {
+                    //    dataset: {source: [
+                    //        ['2001', 123],
+                    //        ['2002', 456],
+                    //        ...
+                    //        ['The others', 987],
+                    //    ]},
+                    //    series: {type: 'pie'}
+                    // }
+                    // The first colum should better be treated as a "ordinal" although it
+                    // might not able to be detected as an "ordinal" by `guessOrdinal`.
+                    || (resultItem.isExtraCoord
+                        && (resultItem.otherDims.itemName != null
+                            || resultItem.otherDims.seriesName != null
+                        )
                     )
                 )
-            )
-        ) {
-            resultItem.type = 'ordinal';
+            ) {
+                resultItem.type = 'ordinal';
+            }
         }
+        return result;
+    }
+    else {
+        // Sort dimensions
+        const toSort = [];
+        for (let i = 0; i < indicesMap.length; i++) {
+            if (indicesMap[i] >= 0) {
+                toSort.push({ i, o: result[indicesMap[i]]});
+            }
+        }
+        toSort.sort((a, b) => a.i - b.i);
+        return map(toSort, item => item.o);
     }
-
-    return result;
 }
 
 
@@ -299,11 +337,11 @@ export default function createDimensions(
 // (2) sometimes user need to calcualte bubble size or use visualMap
 // on other dimensions besides coordSys needed.
 // So, dims that is not used by system, should be shared in storage?
-function getDimCount(
+export function getDimCount(
     source: Source,
     sysDims: CoordDimensionDefinitionLoose[],
     dimsDef: DimensionDefinitionLoose[],
-    optDimCount: number
+    optDimCount?: number
 ): number {
     // Note that the result dimCount should not small than columns count
     // of data, otherwise `dataDimNameMap` checking will be incorrect.
@@ -335,8 +373,8 @@ function genName(
             i++;
         }
         name += i;
-        autoIdx = i;
+        autoIdx = i + 1;
     }
     map.set(name, true);
     return { name, autoIdx };
-}
+}
\ No newline at end of file
diff --git a/src/data/helper/dimensionHelper.ts b/src/data/helper/dimensionHelper.ts
index 492c488..9a97b9e 100644
--- a/src/data/helper/dimensionHelper.ts
+++ b/src/data/helper/dimensionHelper.ts
@@ -42,17 +42,6 @@ export type DimensionSummary = {
     encodeFirstDimNotExtra: {[coordDim: string]: DimensionName},
 };
 
-/**
- * Omit unused dimensions.
- * This will improve performance signifantly when multiple series
- * is sharing a extra high dimension dataset.
- */
-export function omitUnusedDimensions(dims: DataDimensionInfo[]) {
-    return filter(dims, (dim) => {
-        return !dim.isExtraCoord || keys(dim.otherDims).length > 0;
-    });
-}
-
 export function summarizeDimensions(data: SeriesData): DimensionSummary {
     const summary: DimensionSummary = {} as DimensionSummary;
     const encode = summary.encode = {} as DimensionSummaryEncode;
diff --git a/src/data/helper/sourceManager.ts b/src/data/helper/sourceManager.ts
index 8790bd7..da73f55 100644
--- a/src/data/helper/sourceManager.ts
+++ b/src/data/helper/sourceManager.ts
@@ -223,8 +223,8 @@ export class SourceManager {
             }
 
             // See [REQUIREMENT_MEMO], merge settings on series and parent dataset if it is root.
-            const newMetaRawOption = this._getSourceMetaRawOption();
-            const upMetaRawOption = upSource ? upSource.metaRawOption : {} as SourceMetaRawOption;
+            const newMetaRawOption = this._getSourceMetaRawOption() || {} as SourceMetaRawOption;
+            const upMetaRawOption = upSource && upSource.metaRawOption || {} as SourceMetaRawOption;
             const seriesLayoutBy = retrieve2(newMetaRawOption.seriesLayoutBy, upMetaRawOption.seriesLayoutBy) || null;
             const sourceHeader = retrieve2(newMetaRawOption.sourceHeader, upMetaRawOption.sourceHeader) || null;
             // Note here we should not use `upSource.dimensionsDefine`. Consider the case:

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@echarts.apache.org
For additional commands, e-mail: commits-help@echarts.apache.org