You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/11/17 07:45:02 UTC

[GitHub] [iceberg] aokolnychyi opened a new pull request, #6207: Spark 3.3: Add years transform function

aokolnychyi opened a new pull request, #6207:
URL: https://github.com/apache/iceberg/pull/6207

   This PR adds `years` transform function to Spark 3.3.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1026003401


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(date('1969-12-31'))"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS DATE))"));
+  }
+
+  @Test
+  public void testTimestamps() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(TIMESTAMP '2017-12-01 10:12:55.038194 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(TIMESTAMP '1969-12-31 23:59:58.999999 UTC+00:00')"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS TIMESTAMP))"));
+  }
+
+  @Test
+  public void testWrongNumberOfArguments() {
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with zero arguments",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years()"));
+
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with more than one argument",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (date, date): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years(date('1969-12-31'), date('1969-12-31'))"));
+  }
+
+  @Test
+  public void testInvalidInputTypes() {
+    AssertHelpers.assertThrows(
+        "Int type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (int): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1)"));
+
+    AssertHelpers.assertThrows(
+        "Long type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (bigint): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1L)"));
+  }
+
+  @Test
+  public void testThatMagicFunctionsAreInvoked() {
+    String dateValue = "date('2017-12-01')";
+    Assertions.assertThat(scalarSql("EXPLAIN EXTENDED SELECT system.years(%s)", dateValue))
+        .asString()
+        .isNotNull()
+        .contains(
+            "staticinvoke(class org.apache.iceberg.spark.functions.YearsFunction$DateToYearsFunction");

Review Comment:
   Sounds more reliable, I'll try.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] RussellSpitzer commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
RussellSpitzer commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1025452084


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(

Review Comment:
   Just a suggestion but I prefer to also have a property style test, maybe something like 
   ```
   for (i in 1 to 1000) {
     x = random()
     assert(0, system.years(add_months(epoch, 12 * x) + system.years(add_months(epoch, -12 * x))
   }
   ```
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1030910971


##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");
+    }
+  }
+
+  @Override
+  public String description() {
+    return name()
+        + "(col) - Call Iceberg's year transform\n"
+        + "  col :: source column (must be date or timestamp)";
+  }
+
+  @Override
+  public String name() {
+    return "years";
+  }
+
+  private abstract static class BaseToYearsFunction implements ScalarFunction<Integer> {
+    @Override
+    public String name() {
+      return "years";
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+  }
+
+  public static class DateToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(int days) {
+      return DateTimeUtil.daysToYears(days);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.DateType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(date)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      // return null for null input to match what Spark does in codegen
+      return input.isNullAt(0) ? null : invoke(input.getInt(0));
+    }
+  }
+
+  public static class TimestampToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(long micros) {
+      return DateTimeUtil.microsToYears(micros);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.TimestampType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(timestamp)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {

Review Comment:
   They are slightly different. One fetches integers and the other one fetches longs from `input`.



##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");

Review Comment:
   Good idea, added.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1026002989


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",

Review Comment:
   Oops, will fix.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1030908641


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",

Review Comment:
   Fixed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] RussellSpitzer commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
RussellSpitzer commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1025453770


##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");
+    }
+  }
+
+  @Override
+  public String description() {
+    return name()
+        + "(col) - Call Iceberg's year transform\n"
+        + "  col :: source column (must be date or timestamp)";
+  }
+
+  @Override
+  public String name() {
+    return "years";
+  }
+
+  private abstract static class BaseToYearsFunction implements ScalarFunction<Integer> {
+    @Override
+    public String name() {
+      return "years";
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+  }
+
+  public static class DateToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(int days) {
+      return DateTimeUtil.daysToYears(days);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.DateType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(date)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      // return null for null input to match what Spark does in codegen

Review Comment:
   Makes sense, as long as this is what Spark does 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] RussellSpitzer commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
RussellSpitzer commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1025443474


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",

Review Comment:
   typo



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi merged pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi merged PR #6207:
URL: https://github.com/apache/iceberg/pull/6207


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] nastra commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
nastra commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1031127542


##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");
+    }
+  }
+
+  @Override
+  public String description() {
+    return name()
+        + "(col) - Call Iceberg's year transform\n"
+        + "  col :: source column (must be date or timestamp)";
+  }
+
+  @Override
+  public String name() {
+    return "years";
+  }
+
+  private abstract static class BaseToYearsFunction implements ScalarFunction<Integer> {
+    @Override
+    public String name() {
+      return "years";
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+  }
+
+  public static class DateToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(int days) {
+      return DateTimeUtil.daysToYears(days);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.DateType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(date)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      // return null for null input to match what Spark does in codegen
+      return input.isNullAt(0) ? null : invoke(input.getInt(0));
+    }
+  }
+
+  public static class TimestampToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(long micros) {
+      return DateTimeUtil.microsToYears(micros);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.TimestampType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(timestamp)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {

Review Comment:
   ah good call, I missed this tiny detail :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1026001944


##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");
+    }
+  }
+
+  @Override
+  public String description() {
+    return name()
+        + "(col) - Call Iceberg's year transform\n"
+        + "  col :: source column (must be date or timestamp)";
+  }
+
+  @Override
+  public String name() {
+    return "years";
+  }
+
+  private abstract static class BaseToYearsFunction implements ScalarFunction<Integer> {
+    @Override
+    public String name() {
+      return "years";
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+  }
+
+  public static class DateToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(int days) {
+      return DateTimeUtil.daysToYears(days);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.DateType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(date)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      // return null for null input to match what Spark does in codegen

Review Comment:
   Yep, that's what we decided to do in other functions such as `bucket` and `truncate`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1030910335


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(

Review Comment:
   After a closer look, I think current tests are easier to read as we provide the expected value explicitly.



##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(date('1969-12-31'))"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS DATE))"));
+  }
+
+  @Test
+  public void testTimestamps() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(TIMESTAMP '2017-12-01 10:12:55.038194 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(TIMESTAMP '1969-12-31 23:59:58.999999 UTC+00:00')"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS TIMESTAMP))"));
+  }
+
+  @Test
+  public void testWrongNumberOfArguments() {
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with zero arguments",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years()"));
+
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with more than one argument",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (date, date): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years(date('1969-12-31'), date('1969-12-31'))"));
+  }
+
+  @Test
+  public void testInvalidInputTypes() {
+    AssertHelpers.assertThrows(
+        "Int type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (int): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1)"));
+
+    AssertHelpers.assertThrows(
+        "Long type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (bigint): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1L)"));
+  }
+
+  @Test
+  public void testThatMagicFunctionsAreInvoked() {
+    String dateValue = "date('2017-12-01')";
+    Assertions.assertThat(scalarSql("EXPLAIN EXTENDED SELECT system.years(%s)", dateValue))
+        .asString()
+        .isNotNull()
+        .contains(
+            "staticinvoke(class org.apache.iceberg.spark.functions.YearsFunction$DateToYearsFunction");

Review Comment:
   Changed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#issuecomment-1325788994

   Thanks for reviewing, @RussellSpitzer @nastra!


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] RussellSpitzer commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
RussellSpitzer commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1025458116


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(date('2017-12-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(date('1970-01-01'))"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(date('1969-12-31'))"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS DATE))"));
+  }
+
+  @Test
+  public void testTimestamps() {
+    Assert.assertEquals(
+        "Expected to produce 2017 - 1970 = 47",
+        47,
+        scalarSql("SELECT system.years(TIMESTAMP '2017-12-01 10:12:55.038194 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 1970 - 1970 = 0",
+        0,
+        scalarSql("SELECT system.years(TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00')"));
+    Assert.assertEquals(
+        "Expected to produce 19690 - 1970 = -1",
+        -1,
+        scalarSql("SELECT system.years(TIMESTAMP '1969-12-31 23:59:58.999999 UTC+00:00')"));
+    Assert.assertNull(scalarSql("SELECT system.years(CAST(null AS TIMESTAMP))"));
+  }
+
+  @Test
+  public void testWrongNumberOfArguments() {
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with zero arguments",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years()"));
+
+    AssertHelpers.assertThrows(
+        "Function resolution should not work with more than one argument",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (date, date): Wrong number of inputs",
+        () -> scalarSql("SELECT system.years(date('1969-12-31'), date('1969-12-31'))"));
+  }
+
+  @Test
+  public void testInvalidInputTypes() {
+    AssertHelpers.assertThrows(
+        "Int type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (int): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1)"));
+
+    AssertHelpers.assertThrows(
+        "Long type should not be coercible to date/timestamp",
+        AnalysisException.class,
+        "Function 'years' cannot process input: (bigint): Expected value to be date or timestamp",
+        () -> scalarSql("SELECT system.years(1L)"));
+  }
+
+  @Test
+  public void testThatMagicFunctionsAreInvoked() {
+    String dateValue = "date('2017-12-01')";
+    Assertions.assertThat(scalarSql("EXPLAIN EXTENDED SELECT system.years(%s)", dateValue))
+        .asString()
+        .isNotNull()
+        .contains(
+            "staticinvoke(class org.apache.iceberg.spark.functions.YearsFunction$DateToYearsFunction");

Review Comment:
   nit: should we get the package and function name programmatically? Probably not a big deal as I can't imagine this changing



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] nastra commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
nastra commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1026166812


##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");

Review Comment:
   nit: should we print what the underlying type of `inputType` was?



##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/functions/YearsFunction.java:
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.functions;
+
+import org.apache.iceberg.util.DateTimeUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.TimestampType;
+
+/**
+ * A Spark function implementation for the Iceberg year transform.
+ *
+ * <p>Example usage: {@code SELECT system.years('source_col')}.
+ */
+public class YearsFunction implements UnboundFunction {
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.size() != 1) {
+      throw new UnsupportedOperationException("Wrong number of inputs (expected value)");
+    }
+
+    StructField valueField = inputType.fields()[0];
+    DataType valueType = valueField.dataType();
+
+    if (valueType instanceof DateType) {
+      return new DateToYearsFunction();
+    } else if (valueType instanceof TimestampType) {
+      return new TimestampToYearsFunction();
+    } else {
+      throw new UnsupportedOperationException("Expected value to be date or timestamp");
+    }
+  }
+
+  @Override
+  public String description() {
+    return name()
+        + "(col) - Call Iceberg's year transform\n"
+        + "  col :: source column (must be date or timestamp)";
+  }
+
+  @Override
+  public String name() {
+    return "years";
+  }
+
+  private abstract static class BaseToYearsFunction implements ScalarFunction<Integer> {
+    @Override
+    public String name() {
+      return "years";
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+  }
+
+  public static class DateToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(int days) {
+      return DateTimeUtil.daysToYears(days);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.DateType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(date)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      // return null for null input to match what Spark does in codegen
+      return input.isNullAt(0) ? null : invoke(input.getInt(0));
+    }
+  }
+
+  public static class TimestampToYearsFunction extends BaseToYearsFunction {
+
+    // magic method used in codegen
+    public static int invoke(long micros) {
+      return DateTimeUtil.microsToYears(micros);
+    }
+
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] {DataTypes.TimestampType};
+    }
+
+    @Override
+    public String canonicalName() {
+      return "iceberg.years(timestamp)";
+    }
+
+    @Override
+    public Integer produceResult(InternalRow input) {

Review Comment:
   nit: this method appears to be the same for TimestampToYearsFunction + DateToYearsFunction, should we maybe move it to BaseToYearsFunction?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#issuecomment-1318219571

   cc @kbendick @rdblue @RussellSpitzer @szehon-ho @flyrain @gaborkaszab @nastra 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #6207: Spark 3.3: Add years transform function

Posted by GitBox <gi...@apache.org>.
aokolnychyi commented on code in PR #6207:
URL: https://github.com/apache/iceberg/pull/6207#discussion_r1026004169


##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkYearsFunction.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.spark.SparkTestBaseWithCatalog;
+import org.apache.spark.sql.AnalysisException;
+import org.assertj.core.api.Assertions;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestSparkYearsFunction extends SparkTestBaseWithCatalog {
+
+  @Before
+  public void useCatalog() {
+    sql("USE %s", catalogName);
+  }
+
+  @Test
+  public void testDates() {
+    Assert.assertEquals(

Review Comment:
   I was inspired by our existing tests in core but let me add one more test for this too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org