You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by jo...@apache.org on 2023/10/20 17:05:12 UTC

[superset] branch master updated: refactor: use DATE_TRUNC for Elasticsearch time grain (#25717)

This is an automated email from the ASF dual-hosted git repository.

johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 9972ac6908 refactor: use DATE_TRUNC for Elasticsearch time grain (#25717)
9972ac6908 is described below

commit 9972ac69088ae0bf4b72d2340ee7ba62a4760ac3
Author: Mikel Vuka <mi...@gmail.com>
AuthorDate: Fri Oct 20 19:05:05 2023 +0200

    refactor: use DATE_TRUNC for Elasticsearch time grain (#25717)
    
    Co-authored-by: Mikel Vuka <mi...@zalando.de>
---
 superset/db_engine_specs/elasticsearch.py          | 18 ++++++++------
 .../db_engine_specs/elasticsearch_tests.py         | 29 ++++++++++++----------
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py
index 4f18397d8c..163bc640a6 100644
--- a/superset/db_engine_specs/elasticsearch.py
+++ b/superset/db_engine_specs/elasticsearch.py
@@ -40,15 +40,19 @@ class ElasticSearchEngineSpec(BaseEngineSpec):  # pylint: disable=abstract-metho
     allows_subqueries = True
     allows_sql_comments = False
 
+    _date_trunc_functions = {
+        "DATETIME": "DATE_TRUNC",
+    }
+
     _time_grain_expressions = {
         None: "{col}",
-        TimeGrain.SECOND: "HISTOGRAM({col}, INTERVAL 1 SECOND)",
-        TimeGrain.MINUTE: "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
-        TimeGrain.HOUR: "HISTOGRAM({col}, INTERVAL 1 HOUR)",
-        TimeGrain.DAY: "HISTOGRAM({col}, INTERVAL 1 DAY)",
-        TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
-        TimeGrain.MONTH: "HISTOGRAM({col}, INTERVAL 1 MONTH)",
-        TimeGrain.YEAR: "HISTOGRAM({col}, INTERVAL 1 YEAR)",
+        TimeGrain.SECOND: "{func}('second', {col})",
+        TimeGrain.MINUTE: "{func}('minute', {col})",
+        TimeGrain.HOUR: "{func}('hour', {col})",
+        TimeGrain.DAY: "{func}('day', {col})",
+        TimeGrain.WEEK: "{func}('week', {col})",
+        TimeGrain.MONTH: "{func}('month', {col})",
+        TimeGrain.YEAR: "{func}('year', {col})",
     }
 
     type_code_map: dict[int, str] = {}  # loaded from get_datatype only if needed
diff --git a/tests/integration_tests/db_engine_specs/elasticsearch_tests.py b/tests/integration_tests/db_engine_specs/elasticsearch_tests.py
index 7140e10ee1..8b07b2ebdd 100644
--- a/tests/integration_tests/db_engine_specs/elasticsearch_tests.py
+++ b/tests/integration_tests/db_engine_specs/elasticsearch_tests.py
@@ -14,27 +14,30 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from parameterized import parameterized
 from sqlalchemy import column
 
+from superset.constants import TimeGrain
 from superset.db_engine_specs.elasticsearch import ElasticSearchEngineSpec
 from tests.integration_tests.db_engine_specs.base_tests import TestDbEngineSpec
 
 
 class TestElasticsearchDbEngineSpec(TestDbEngineSpec):
-    def test_time_grain_week_expression(self):
+    @parameterized.expand(
+        [
+            [TimeGrain.SECOND, "DATE_TRUNC('second', ts)"],
+            [TimeGrain.MINUTE, "DATE_TRUNC('minute', ts)"],
+            [TimeGrain.HOUR, "DATE_TRUNC('hour', ts)"],
+            [TimeGrain.DAY, "DATE_TRUNC('day', ts)"],
+            [TimeGrain.WEEK, "DATE_TRUNC('week', ts)"],
+            [TimeGrain.MONTH, "DATE_TRUNC('month', ts)"],
+            [TimeGrain.YEAR, "DATE_TRUNC('year', ts)"],
+        ]
+    )
+    def test_time_grain_expressions(self, time_grain, expected_time_grain_expression):
         col = column("ts")
-        col.type = "datetime"
-        expected_time_grain_expression = "DATE_TRUNC('week', ts)"
+        col.type = "DATETIME"
         actual = ElasticSearchEngineSpec.get_timestamp_expr(
-            col=col, pdf=None, time_grain="P1W"
-        )
-        self.assertEqual(str(actual), expected_time_grain_expression)
-
-    def test_time_grain_hour_expression(self):
-        col = column("ts")
-        col.type = "datetime"
-        expected_time_grain_expression = "HISTOGRAM(ts, INTERVAL 1 HOUR)"
-        actual = ElasticSearchEngineSpec.get_timestamp_expr(
-            col=col, pdf=None, time_grain="PT1H"
+            col=col, pdf=None, time_grain=time_grain
         )
         self.assertEqual(str(actual), expected_time_grain_expression)