You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2024/01/29 16:27:43 UTC
(arrow) branch main updated: GH-39740: [C++] Fix filter and take kernel for month_day_nano intervals (#39795)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 800254fb16 GH-39740: [C++] Fix filter and take kernel for month_day_nano intervals (#39795)
800254fb16 is described below
commit 800254fb16f23af57916768124fb90e0050a8335
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Jan 29 17:27:36 2024 +0100
GH-39740: [C++] Fix filter and take kernel for month_day_nano intervals (#39795)
### Rationale for this change
The filter and take functions were not correctly supported on month_day_nano intervals.
### What changes are included in this PR?
* Expand the primitive filter implementation to handle all possible fixed-width primitive types (including fixed-size binary)
* Expand the take filter implementation to handle all well-known fixed-width primitive types (including month_day_nano, decimal128 and decimal256)
* Add benchmarks for taking and filtering fixed-size binary
These changes allow for very significant performance improvements filtering and taking fixed-size binary data:
```
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Non-regressions: (90)
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
benchmark baseline contender change % counters
FilterFixedSizeBinaryFilterNoNulls/524288/0/8 1.716 GiB/sec 33.814 GiB/sec 1870.862 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2462, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8 380.056M items/sec 7.098G items/sec 1767.491 {'family_index': 3, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 505, 'byte_width': 8.0, 'null_percent': 100.0}
FilterFixedSizeBinaryFilterNoNulls/524288/0/9 1.916 GiB/sec 33.721 GiB/sec 1659.766 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2750, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/9/8 917.713 MiB/sec 9.193 GiB/sec 925.719 {'family_index': 0, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1271, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/12/8 1.004 GiB/sec 9.374 GiB/sec 833.673 {'family_index': 0, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1440, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/3/8 1.625 GiB/sec 15.009 GiB/sec 823.442 {'family_index': 0, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2328, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/9/9 1021.638 MiB/sec 9.126 GiB/sec 814.670 {'family_index': 0, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1428, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/6/8 1.235 GiB/sec 10.814 GiB/sec 775.869 {'family_index': 0, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1762, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/12/9 1.123 GiB/sec 9.120 GiB/sec 712.196 {'family_index': 0, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1598, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/6/9 1.370 GiB/sec 10.499 GiB/sec 666.348 {'family_index': 0, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1958, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/3/9 1.814 GiB/sec 13.394 GiB/sec 638.343 {'family_index': 0, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2600, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/2/8 12.155 GiB/sec 77.799 GiB/sec 540.051 {'family_index': 0, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17222, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/2/9 13.507 GiB/sec 84.361 GiB/sec 524.592 {'family_index': 0, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19469, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0}
TakeFixedSizeBinaryMonotonicIndices/524288/1/8 194.493M items/sec 732.378M items/sec 276.557 {'family_index': 4, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 259, 'byte_width': 8.0, 'null_percent': 100.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8 200.981M items/sec 747.628M items/sec 271.989 {'family_index': 2, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 268, 'byte_width': 8.0, 'null_percent': 100.0}
FilterFixedSizeBinaryFilterWithNulls/524288/0/8 947.631 MiB/sec 3.318 GiB/sec 258.565 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1329, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterWithNulls/524288/3/8 911.406 MiB/sec 3.121 GiB/sec 250.677 {'family_index': 1, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1275, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterNoNulls/524288/1/8 1.045 GiB/sec 3.535 GiB/sec 238.406 {'family_index': 0, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1496, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/6/8 899.161 MiB/sec 2.915 GiB/sec 232.029 {'family_index': 1, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1260, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterWithNulls/524288/9/8 829.852 MiB/sec 2.617 GiB/sec 222.914 {'family_index': 1, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1157, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8 234.268M items/sec 752.809M items/sec 221.345 {'family_index': 2, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 312, 'byte_width': 8.0, 'null_percent': 0.0}
FilterFixedSizeBinaryFilterNoNulls/524288/1/9 1.171 GiB/sec 3.711 GiB/sec 216.957 {'family_index': 0, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1674, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0}
TakeFixedSizeBinaryMonotonicIndices/524288/0/8 249.393M items/sec 787.274M items/sec 215.676 {'family_index': 4, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 333, 'byte_width': 8.0, 'null_percent': 0.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8 234.268M items/sec 736.727M items/sec 214.481 {'family_index': 3, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 313, 'byte_width': 8.0, 'null_percent': 0.0}
TakeFixedSizeBinaryMonotonicIndices/524288/1000/8 134.852M items/sec 423.748M items/sec 214.231 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 202, 'byte_width': 8.0, 'null_percent': 0.1}
FilterFixedSizeBinaryFilterWithNulls/524288/12/8 913.734 MiB/sec 2.599 GiB/sec 191.245 {'family_index': 1, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1292, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8 138.218M items/sec 309.307M items/sec 123.783 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 184, 'byte_width': 8.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8 132.755M items/sec 293.027M items/sec 120.727 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 179, 'byte_width': 8.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8 125.492M items/sec 272.996M items/sec 117.540 {'family_index': 2, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 174, 'byte_width': 8.0, 'null_percent': 10.0}
FilterFixedSizeBinaryFilterWithNulls/524288/9/9 926.938 MiB/sec 1.904 GiB/sec 110.379 {'family_index': 1, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1295, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9}
TakeFixedSizeBinaryMonotonicIndices/524288/10/8 158.754M items/sec 331.106M items/sec 108.565 {'family_index': 4, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 167, 'byte_width': 8.0, 'null_percent': 10.0}
FilterFixedSizeBinaryFilterWithNulls/524288/0/9 1.031 GiB/sec 2.129 GiB/sec 106.621 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1477, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterWithNulls/524288/3/9 1020.776 MiB/sec 2.056 GiB/sec 106.293 {'family_index': 1, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1430, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterWithNulls/524288/4/8 890.785 MiB/sec 1.768 GiB/sec 103.293 {'family_index': 1, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1242, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/6/9 1005.839 MiB/sec 1.984 GiB/sec 102.023 {'family_index': 1, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1407, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9}
FilterFixedSizeBinaryFilterWithNulls/524288/1/8 916.810 MiB/sec 1.762 GiB/sec 96.757 {'family_index': 1, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1270, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/7/8 890.211 MiB/sec 1.694 GiB/sec 94.853 {'family_index': 1, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1235, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8 95.788M items/sec 184.004M items/sec 92.095 {'family_index': 2, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 124, 'byte_width': 8.0, 'null_percent': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/10/8 862.497 MiB/sec 1.616 GiB/sec 91.823 {'family_index': 1, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1200, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/12/9 1.005 GiB/sec 1.904 GiB/sec 89.431 {'family_index': 1, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1442, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9}
TakeFixedSizeBinaryMonotonicIndices/524288/2/8 123.065M items/sec 228.755M items/sec 85.881 {'family_index': 4, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 8.0, 'null_percent': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/10/8 930.637 MiB/sec 1.669 GiB/sec 83.659 {'family_index': 0, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1293, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/4/8 1.034 GiB/sec 1.871 GiB/sec 81.019 {'family_index': 0, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1482, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/7/8 1004.789 MiB/sec 1.772 GiB/sec 80.538 {'family_index': 0, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1404, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/13/8 920.819 MiB/sec 1.616 GiB/sec 79.686 {'family_index': 1, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1285, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/13/8 974.713 MiB/sec 1.669 GiB/sec 75.388 {'family_index': 0, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1363, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8 107.165M items/sec 187.372M items/sec 74.845 {'family_index': 3, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 143, 'byte_width': 8.0, 'null_percent': 10.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8 72.662M items/sec 114.781M items/sec 57.965 {'family_index': 3, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 8.0, 'null_percent': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/10/9 976.180 MiB/sec 1.480 GiB/sec 55.260 {'family_index': 1, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1358, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/10/9 1.023 GiB/sec 1.581 GiB/sec 54.502 {'family_index': 0, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1466, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/4/9 992.477 MiB/sec 1.453 GiB/sec 49.957 {'family_index': 1, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1400, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/7/9 997.679 MiB/sec 1.450 GiB/sec 48.846 {'family_index': 1, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1389, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/13/9 1.071 GiB/sec 1.581 GiB/sec 47.526 {'family_index': 0, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1538, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/13/9 1.008 GiB/sec 1.485 GiB/sec 47.328 {'family_index': 1, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1446, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/1/9 1.003 GiB/sec 1.452 GiB/sec 44.708 {'family_index': 1, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1437, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/7/9 1.105 GiB/sec 1.568 GiB/sec 41.954 {'family_index': 0, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1587, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterNoNulls/524288/4/9 1.163 GiB/sec 1.613 GiB/sec 38.639 {'family_index': 0, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1662, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0}
FilterFixedSizeBinaryFilterWithNulls/524288/14/9 8.884 GiB/sec 12.117 GiB/sec 36.381 {'family_index': 1, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12508, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/11/9 8.886 GiB/sec 12.075 GiB/sec 35.892 {'family_index': 1, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12716, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0}
TakeFixedSizeBinaryMonotonicIndices/524288/1000/9 134.765M items/sec 182.868M items/sec 35.694 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 206, 'byte_width': 9.0, 'null_percent': 0.1}
FilterFixedSizeBinaryFilterNoNulls/524288/5/8 11.393 GiB/sec 15.091 GiB/sec 32.453 {'family_index': 0, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16510, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/8/8 11.573 GiB/sec 15.102 GiB/sec 30.496 {'family_index': 0, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16684, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/11/8 7.740 GiB/sec 10.059 GiB/sec 29.956 {'family_index': 1, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10972, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/14/8 7.733 GiB/sec 9.915 GiB/sec 28.213 {'family_index': 1, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/5/8 7.682 GiB/sec 9.765 GiB/sec 27.109 {'family_index': 1, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/8/9 8.856 GiB/sec 11.180 GiB/sec 26.241 {'family_index': 1, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12571, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/8/8 7.735 GiB/sec 9.710 GiB/sec 25.530 {'family_index': 1, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11069, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0}
TakeFixedSizeBinaryMonotonicIndices/524288/10/9 128.606M items/sec 160.249M items/sec 24.604 {'family_index': 4, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 209, 'byte_width': 9.0, 'null_percent': 10.0}
FilterFixedSizeBinaryFilterNoNulls/524288/11/8 12.033 GiB/sec 14.737 GiB/sec 22.478 {'family_index': 0, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17220, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/14/8 12.141 GiB/sec 14.761 GiB/sec 21.579 {'family_index': 0, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17343, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/5/9 8.825 GiB/sec 10.633 GiB/sec 20.489 {'family_index': 1, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12543, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/2/8 8.300 GiB/sec 9.969 GiB/sec 20.117 {'family_index': 1, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11819, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/5/9 12.954 GiB/sec 15.192 GiB/sec 17.273 {'family_index': 0, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18572, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/8/9 13.181 GiB/sec 15.222 GiB/sec 15.490 {'family_index': 0, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18904, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterWithNulls/524288/2/9 9.344 GiB/sec 10.632 GiB/sec 13.784 {'family_index': 1, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13291, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/11/9 13.566 GiB/sec 14.894 GiB/sec 9.789 {'family_index': 0, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19349, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0}
FilterFixedSizeBinaryFilterNoNulls/524288/14/9 13.603 GiB/sec 14.863 GiB/sec 9.265 {'family_index': 0, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19490, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9 124.390M items/sec 133.566M items/sec 7.377 {'family_index': 2, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 9.0, 'null_percent': 10.0}
TakeFixedSizeBinaryMonotonicIndices/524288/2/9 116.792M items/sec 124.182M items/sec 6.328 {'family_index': 4, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 161, 'byte_width': 9.0, 'null_percent': 50.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9 135.860M items/sec 142.524M items/sec 4.905 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 180, 'byte_width': 9.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9 131.123M items/sec 137.400M items/sec 4.788 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 176, 'byte_width': 9.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9 220.634M items/sec 230.872M items/sec 4.640 {'family_index': 2, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9 97.425M items/sec 101.477M items/sec 4.159 {'family_index': 2, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 130, 'byte_width': 9.0, 'null_percent': 50.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9 104.830M items/sec 108.346M items/sec 3.354 {'family_index': 3, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 100, 'byte_width': 9.0, 'null_percent': 10.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9 378.858M items/sec 387.322M items/sec 2.234 {'family_index': 3, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 506, 'byte_width': 9.0, 'null_percent': 100.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9 221.900M items/sec 226.450M items/sec 2.050 {'family_index': 3, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0}
TakeFixedSizeBinaryMonotonicIndices/524288/0/9 248.664M items/sec 253.037M items/sec 1.758 {'family_index': 4, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 332, 'byte_width': 9.0, 'null_percent': 0.0}
TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9 197.730M items/sec 201.173M items/sec 1.741 {'family_index': 2, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 264, 'byte_width': 9.0, 'null_percent': 100.0}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9 73.196M items/sec 74.167M items/sec 1.327 {'family_index': 3, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 9.0, 'null_percent': 50.0}
TakeFixedSizeBinaryMonotonicIndices/524288/1/9 192.545M items/sec 188.138M items/sec -2.289 {'family_index': 4, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 257, 'byte_width': 9.0, 'null_percent': 100.0}
```
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #39740
Authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
.../compute/kernels/vector_selection_benchmark.cc | 72 ++++++++-
.../kernels/vector_selection_filter_internal.cc | 167 ++++++++++++---------
.../compute/kernels/vector_selection_internal.cc | 22 ++-
.../compute/kernels/vector_selection_internal.h | 2 +-
.../kernels/vector_selection_take_internal.cc | 76 +++++++---
.../arrow/compute/kernels/vector_selection_test.cc | 150 +++++++++++++-----
6 files changed, 348 insertions(+), 141 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
index 25e30e65a3..e65d5dbcab 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
@@ -128,6 +128,13 @@ struct TakeBenchmark {
Bench(values);
}
+ void FixedSizeBinary() {
+ const int32_t byte_width = static_cast<int32_t>(state.range(2));
+ auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion);
+ Bench(values);
+ state.counters["byte_width"] = byte_width;
+ }
+
void String() {
int32_t string_min_length = 0, string_max_length = 32;
auto values = std::static_pointer_cast<StringArray>(rand.String(
@@ -149,6 +156,7 @@ struct TakeBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Take(values, indices).status());
}
+ state.SetItemsProcessed(state.iterations() * values->length());
}
};
@@ -166,8 +174,7 @@ struct FilterBenchmark {
void Int64() {
const int64_t array_size = args.size / sizeof(int64_t);
- auto values = std::static_pointer_cast<NumericArray<Int64Type>>(
- rand.Int64(array_size, -100, 100, args.values_null_proportion));
+ auto values = rand.Int64(array_size, -100, 100, args.values_null_proportion);
Bench(values);
}
@@ -181,6 +188,15 @@ struct FilterBenchmark {
Bench(values);
}
+ void FixedSizeBinary() {
+ const int32_t byte_width = static_cast<int32_t>(state.range(2));
+ const int64_t array_size = args.size / byte_width;
+ auto values =
+ rand.FixedSizeBinary(array_size, byte_width, args.values_null_proportion);
+ Bench(values);
+ state.counters["byte_width"] = byte_width;
+ }
+
void String() {
int32_t string_min_length = 0, string_max_length = 32;
int32_t string_mean_length = (string_max_length + string_min_length) / 2;
@@ -202,6 +218,7 @@ struct FilterBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Filter(values, filter).status());
}
+ state.SetItemsProcessed(state.iterations() * values->length());
}
void BenchRecordBatch() {
@@ -236,6 +253,7 @@ struct FilterBenchmark {
for (auto _ : state) {
ABORT_NOT_OK(Filter(batch, filter).status());
}
+ state.SetItemsProcessed(state.iterations() * num_rows);
}
};
@@ -255,6 +273,14 @@ static void FilterFSLInt64FilterWithNulls(benchmark::State& state) {
FilterBenchmark(state, true).FSLInt64();
}
+static void FilterFixedSizeBinaryFilterNoNulls(benchmark::State& state) {
+ FilterBenchmark(state, false).FixedSizeBinary();
+}
+
+static void FilterFixedSizeBinaryFilterWithNulls(benchmark::State& state) {
+ FilterBenchmark(state, true).FixedSizeBinary();
+}
+
static void FilterStringFilterNoNulls(benchmark::State& state) {
FilterBenchmark(state, false).String();
}
@@ -283,6 +309,19 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) {
TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).Int64();
}
+static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) {
+ TakeBenchmark(state, false).FixedSizeBinary();
+}
+
+static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) {
+ TakeBenchmark(state, true).FixedSizeBinary();
+}
+
+static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) {
+ TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+ .FixedSizeBinary();
+}
+
static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) {
TakeBenchmark(state, false).FSLInt64();
}
@@ -315,8 +354,22 @@ void FilterSetArgs(benchmark::internal::Benchmark* bench) {
}
}
+void FilterFSBSetArgs(benchmark::internal::Benchmark* bench) {
+ for (int64_t size : g_data_sizes) {
+ for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
+ // FixedSizeBinary of primitive sizes (powers of two up to 32)
+ // have a faster path.
+ for (int32_t byte_width : {8, 9}) {
+ bench->Args({static_cast<ArgsType>(size), i, byte_width});
+ }
+ }
+ }
+}
+
BENCHMARK(FilterInt64FilterNoNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterInt64FilterWithNulls)->Apply(FilterSetArgs);
+BENCHMARK(FilterFixedSizeBinaryFilterNoNulls)->Apply(FilterFSBSetArgs);
+BENCHMARK(FilterFixedSizeBinaryFilterWithNulls)->Apply(FilterFSBSetArgs);
BENCHMARK(FilterFSLInt64FilterNoNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterFSLInt64FilterWithNulls)->Apply(FilterSetArgs);
BENCHMARK(FilterStringFilterNoNulls)->Apply(FilterSetArgs);
@@ -340,9 +393,24 @@ void TakeSetArgs(benchmark::internal::Benchmark* bench) {
}
}
+void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) {
+ for (int64_t size : g_data_sizes) {
+ for (auto nulls : std::vector<ArgsType>({1000, 10, 2, 1, 0})) {
+ // FixedSizeBinary of primitive sizes (powers of two up to 32)
+ // have a faster path.
+ for (int32_t byte_width : {8, 9}) {
+ bench->Args({static_cast<ArgsType>(size), nulls, byte_width});
+ }
+ }
+ }
+}
+
BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeFixedSizeBinaryRandomIndicesNoNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeFixedSizeBinaryRandomIndicesWithNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeFixedSizeBinaryMonotonicIndices)->Apply(TakeFSBSetArgs);
BENCHMARK(TakeFSLInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeFSLInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
BENCHMARK(TakeFSLInt64MonotonicIndices)->Apply(TakeSetArgs);
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
index a25b04ae4f..8825d697fd 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
@@ -146,36 +146,40 @@ class DropNullCounter {
/// \brief The Filter implementation for primitive (fixed-width) types does not
/// use the logical Arrow type but rather the physical C type. This way we only
-/// generate one take function for each byte width. We use the same
-/// implementation here for boolean and fixed-byte-size inputs with some
-/// template specialization.
-template <typename ArrowType>
+/// generate one take function for each byte width.
+///
+/// We use compile-time specialization for two variations:
+/// - operating on boolean data (using kIsBoolean = true)
+/// - operating on fixed-width data of arbitrary width (using kByteWidth = -1),
+/// with the actual width only known at runtime
+template <int32_t kByteWidth, bool kIsBoolean = false>
class PrimitiveFilterImpl {
public:
- using T = typename std::conditional<std::is_same<ArrowType, BooleanType>::value,
- uint8_t, typename ArrowType::c_type>::type;
-
PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter,
FilterOptions::NullSelectionBehavior null_selection,
ArrayData* out_arr)
- : values_is_valid_(values.buffers[0].data),
- values_data_(reinterpret_cast<const T*>(values.buffers[1].data)),
+ : byte_width_(values.type->byte_width()),
+ values_is_valid_(values.buffers[0].data),
+ values_data_(values.buffers[1].data),
values_null_count_(values.null_count),
values_offset_(values.offset),
values_length_(values.length),
filter_(filter),
null_selection_(null_selection) {
- if (values.type->id() != Type::BOOL) {
+ if constexpr (kByteWidth >= 0 && !kIsBoolean) {
+ DCHECK_EQ(kByteWidth, byte_width_);
+ }
+ if constexpr (!kIsBoolean) {
// No offset applied for boolean because it's a bitmap
- values_data_ += values.offset;
+ values_data_ += values.offset * byte_width();
}
if (out_arr->buffers[0] != nullptr) {
// May be unallocated if neither filter nor values contain nulls
out_is_valid_ = out_arr->buffers[0]->mutable_data();
}
- out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
- out_offset_ = out_arr->offset;
+ out_data_ = out_arr->buffers[1]->mutable_data();
+ DCHECK_EQ(out_arr->offset, 0);
out_length_ = out_arr->length;
out_position_ = 0;
}
@@ -201,14 +205,11 @@ class PrimitiveFilterImpl {
[&](int64_t position, int64_t segment_length, bool filter_valid) {
if (filter_valid) {
CopyBitmap(values_is_valid_, values_offset_ + position, segment_length,
- out_is_valid_, out_offset_ + out_position_);
+ out_is_valid_, out_position_);
WriteValueSegment(position, segment_length);
} else {
- bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
- segment_length, false);
- memset(out_data_ + out_offset_ + out_position_, 0,
- segment_length * sizeof(T));
- out_position_ += segment_length;
+ bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false);
+ WriteNullSegment(segment_length);
}
return true;
});
@@ -218,7 +219,7 @@ class PrimitiveFilterImpl {
if (out_is_valid_) {
// Set all to valid, so only if nulls are produced by EMIT_NULL, we need
// to set out_is_valid[i] to false.
- bit_util::SetBitsTo(out_is_valid_, out_offset_, out_length_, true);
+ bit_util::SetBitsTo(out_is_valid_, 0, out_length_, true);
}
return VisitPlainxREEFilterOutputSegments(
filter_, /*filter_may_have_nulls=*/true, null_selection_,
@@ -226,11 +227,8 @@ class PrimitiveFilterImpl {
if (filter_valid) {
WriteValueSegment(position, segment_length);
} else {
- bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
- segment_length, false);
- memset(out_data_ + out_offset_ + out_position_, 0,
- segment_length * sizeof(T));
- out_position_ += segment_length;
+ bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false);
+ WriteNullSegment(segment_length);
}
return true;
});
@@ -260,13 +258,13 @@ class PrimitiveFilterImpl {
values_length_);
auto WriteNotNull = [&](int64_t index) {
- bit_util::SetBit(out_is_valid_, out_offset_ + out_position_);
+ bit_util::SetBit(out_is_valid_, out_position_);
// Increments out_position_
WriteValue(index);
};
auto WriteMaybeNull = [&](int64_t index) {
- bit_util::SetBitTo(out_is_valid_, out_offset_ + out_position_,
+ bit_util::SetBitTo(out_is_valid_, out_position_,
bit_util::GetBit(values_is_valid_, values_offset_ + index));
// Increments out_position_
WriteValue(index);
@@ -279,15 +277,14 @@ class PrimitiveFilterImpl {
BitBlockCount data_block = data_counter.NextWord();
if (filter_block.AllSet() && data_block.AllSet()) {
// Fastest path: all values in block are included and not null
- bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
- filter_block.length, true);
+ bit_util::SetBitsTo(out_is_valid_, out_position_, filter_block.length, true);
WriteValueSegment(in_position, filter_block.length);
in_position += filter_block.length;
} else if (filter_block.AllSet()) {
// Faster: all values are selected, but some values are null
// Batch copy bits from values validity bitmap to output validity bitmap
CopyBitmap(values_is_valid_, values_offset_ + in_position, filter_block.length,
- out_is_valid_, out_offset_ + out_position_);
+ out_is_valid_, out_position_);
WriteValueSegment(in_position, filter_block.length);
in_position += filter_block.length;
} else if (filter_block.NoneSet() && null_selection_ == FilterOptions::DROP) {
@@ -326,7 +323,7 @@ class PrimitiveFilterImpl {
WriteNotNull(in_position);
} else if (!is_valid) {
// Filter slot is null, so we have a null in the output
- bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_);
+ bit_util::ClearBit(out_is_valid_, out_position_);
WriteNull();
}
++in_position;
@@ -362,7 +359,7 @@ class PrimitiveFilterImpl {
WriteMaybeNull(in_position);
} else if (!is_valid) {
// Filter slot is null, so we have a null in the output
- bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_);
+ bit_util::ClearBit(out_is_valid_, out_position_);
WriteNull();
}
++in_position;
@@ -376,54 +373,72 @@ class PrimitiveFilterImpl {
// Write the next out_position given the selected in_position for the input
// data and advance out_position
void WriteValue(int64_t in_position) {
- out_data_[out_offset_ + out_position_++] = values_data_[in_position];
+ if constexpr (kIsBoolean) {
+ bit_util::SetBitTo(out_data_, out_position_,
+ bit_util::GetBit(values_data_, values_offset_ + in_position));
+ } else {
+ memcpy(out_data_ + out_position_ * byte_width(),
+ values_data_ + in_position * byte_width(), byte_width());
+ }
+ ++out_position_;
}
void WriteValueSegment(int64_t in_start, int64_t length) {
- std::memcpy(out_data_ + out_position_, values_data_ + in_start, length * sizeof(T));
+ if constexpr (kIsBoolean) {
+ CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_,
+ out_position_);
+ } else {
+ memcpy(out_data_ + out_position_ * byte_width(),
+ values_data_ + in_start * byte_width(), length * byte_width());
+ }
out_position_ += length;
}
void WriteNull() {
- // Zero the memory
- out_data_[out_offset_ + out_position_++] = T{};
+ if constexpr (kIsBoolean) {
+ // Zero the bit
+ bit_util::ClearBit(out_data_, out_position_);
+ } else {
+ // Zero the memory
+ memset(out_data_ + out_position_ * byte_width(), 0, byte_width());
+ }
+ ++out_position_;
+ }
+
+ void WriteNullSegment(int64_t length) {
+ if constexpr (kIsBoolean) {
+ // Zero the bits
+ bit_util::SetBitsTo(out_data_, out_position_, length, false);
+ } else {
+ // Zero the memory
+ memset(out_data_ + out_position_ * byte_width(), 0, length * byte_width());
+ }
+ out_position_ += length;
+ }
+
+ constexpr int32_t byte_width() const {
+ if constexpr (kByteWidth >= 0) {
+ return kByteWidth;
+ } else {
+ return byte_width_;
+ }
}
private:
+ int32_t byte_width_;
const uint8_t* values_is_valid_;
- const T* values_data_;
+ const uint8_t* values_data_;
int64_t values_null_count_;
int64_t values_offset_;
int64_t values_length_;
const ArraySpan& filter_;
FilterOptions::NullSelectionBehavior null_selection_;
uint8_t* out_is_valid_ = NULLPTR;
- T* out_data_;
- int64_t out_offset_;
+ uint8_t* out_data_;
int64_t out_length_;
int64_t out_position_;
};
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteValue(int64_t in_position) {
- bit_util::SetBitTo(out_data_, out_offset_ + out_position_++,
- bit_util::GetBit(values_data_, values_offset_ + in_position));
-}
-
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteValueSegment(int64_t in_start,
- int64_t length) {
- CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_,
- out_offset_ + out_position_);
- out_position_ += length;
-}
-
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteNull() {
- // Zero the bit
- bit_util::ClearBit(out_data_, out_offset_ + out_position_++);
-}
-
Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const ArraySpan& values = batch[0].array;
const ArraySpan& filter = batch[1].array;
@@ -459,22 +474,32 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult
switch (bit_width) {
case 1:
- PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec();
+ PrimitiveFilterImpl<1, /*kIsBoolean=*/true>(values, filter, null_selection, out_arr)
+ .Exec();
break;
case 8:
- PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec();
+ PrimitiveFilterImpl<1>(values, filter, null_selection, out_arr).Exec();
break;
case 16:
- PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec();
+ PrimitiveFilterImpl<2>(values, filter, null_selection, out_arr).Exec();
break;
case 32:
- PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec();
+ PrimitiveFilterImpl<4>(values, filter, null_selection, out_arr).Exec();
break;
case 64:
- PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec();
+ PrimitiveFilterImpl<8>(values, filter, null_selection, out_arr).Exec();
+ break;
+ case 128:
+ // For INTERVAL_MONTH_DAY_NANO, DECIMAL128
+ PrimitiveFilterImpl<16>(values, filter, null_selection, out_arr).Exec();
+ break;
+ case 256:
+ // For DECIMAL256
+ PrimitiveFilterImpl<32>(values, filter, null_selection, out_arr).Exec();
break;
default:
- DCHECK(false) << "Invalid values bit width";
+ // Non-specializing on byte width
+ PrimitiveFilterImpl<-1>(values, filter, null_selection, out_arr).Exec();
break;
}
return Status::OK();
@@ -1050,10 +1075,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) {
{InputType(match::Primitive()), plain_filter, PrimitiveFilterExec},
{InputType(match::BinaryLike()), plain_filter, BinaryFilterExec},
{InputType(match::LargeBinaryLike()), plain_filter, BinaryFilterExec},
- {InputType(Type::FIXED_SIZE_BINARY), plain_filter, FSBFilterExec},
{InputType(null()), plain_filter, NullFilterExec},
- {InputType(Type::DECIMAL128), plain_filter, FSBFilterExec},
- {InputType(Type::DECIMAL256), plain_filter, FSBFilterExec},
+ {InputType(Type::FIXED_SIZE_BINARY), plain_filter, PrimitiveFilterExec},
+ {InputType(Type::DECIMAL128), plain_filter, PrimitiveFilterExec},
+ {InputType(Type::DECIMAL256), plain_filter, PrimitiveFilterExec},
{InputType(Type::DICTIONARY), plain_filter, DictionaryFilterExec},
{InputType(Type::EXTENSION), plain_filter, ExtensionFilterExec},
{InputType(Type::LIST), plain_filter, ListFilterExec},
@@ -1068,10 +1093,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) {
{InputType(match::Primitive()), ree_filter, PrimitiveFilterExec},
{InputType(match::BinaryLike()), ree_filter, BinaryFilterExec},
{InputType(match::LargeBinaryLike()), ree_filter, BinaryFilterExec},
- {InputType(Type::FIXED_SIZE_BINARY), ree_filter, FSBFilterExec},
{InputType(null()), ree_filter, NullFilterExec},
- {InputType(Type::DECIMAL128), ree_filter, FSBFilterExec},
- {InputType(Type::DECIMAL256), ree_filter, FSBFilterExec},
+ {InputType(Type::FIXED_SIZE_BINARY), ree_filter, PrimitiveFilterExec},
+ {InputType(Type::DECIMAL128), ree_filter, PrimitiveFilterExec},
+ {InputType(Type::DECIMAL256), ree_filter, PrimitiveFilterExec},
{InputType(Type::DICTIONARY), ree_filter, DictionaryFilterExec},
{InputType(Type::EXTENSION), ree_filter, ExtensionFilterExec},
{InputType(Type::LIST), ree_filter, ListFilterExec},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
index 98eb37e9c5..a0fe2808e3 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
@@ -77,7 +77,8 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
if (bit_width == 1) {
ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length));
} else {
- ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->Allocate(length * bit_width / 8));
+ ARROW_ASSIGN_OR_RAISE(out->buffers[1],
+ ctx->Allocate(bit_util::BytesForBits(length * bit_width)));
}
return Status::OK();
}
@@ -899,10 +900,6 @@ Status FilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
} // namespace
-Status FSBFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
- return FilterExec<FSBSelectionImpl>(ctx, batch, out);
-}
-
Status ListFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
return FilterExec<ListSelectionImpl<ListType>>(ctx, batch, out);
}
@@ -946,7 +943,20 @@ Status LargeVarBinaryTakeExec(KernelContext* ctx, const ExecSpan& batch,
}
Status FSBTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
- return TakeExec<FSBSelectionImpl>(ctx, batch, out);
+ const ArraySpan& values = batch[0].array;
+ const auto byte_width = values.type->byte_width();
+ // Use primitive Take implementation (presumably faster) for some byte widths
+ switch (byte_width) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ case 16:
+ case 32:
+ return PrimitiveTakeExec(ctx, batch, out);
+ default:
+ return TakeExec<FSBSelectionImpl>(ctx, batch, out);
+ }
}
Status ListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h
index b9eba6ea66..95f3e51cd6 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h
+++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h
@@ -70,7 +70,6 @@ void VisitPlainxREEFilterOutputSegments(
FilterOptions::NullSelectionBehavior null_selection,
const EmitREEFilterSegment& emit_segment);
-Status FSBFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
Status ListFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
Status LargeListFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
Status FSLFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
@@ -79,6 +78,7 @@ Status MapFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
Status VarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
Status LargeVarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
+Status PrimitiveTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
Status FSBTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
Status ListTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
Status LargeListTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
index 612de8505d..89b3f7d0d3 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
@@ -334,11 +334,15 @@ using TakeState = OptionsWrapper<TakeOptions>;
/// only generate one take function for each byte width.
///
/// This function assumes that the indices have been boundschecked.
-template <typename IndexCType, typename ValueCType>
+template <typename IndexCType, typename ValueWidthConstant>
struct PrimitiveTakeImpl {
+ static constexpr int kValueWidth = ValueWidthConstant::value;
+
static void Exec(const ArraySpan& values, const ArraySpan& indices,
ArrayData* out_arr) {
- const auto* values_data = values.GetValues<ValueCType>(1);
+ DCHECK_EQ(values.type->byte_width(), kValueWidth);
+ const auto* values_data =
+ values.GetValues<uint8_t>(1, 0) + kValueWidth * values.offset;
const uint8_t* values_is_valid = values.buffers[0].data;
auto values_offset = values.offset;
@@ -346,9 +350,10 @@ struct PrimitiveTakeImpl {
const uint8_t* indices_is_valid = indices.buffers[0].data;
auto indices_offset = indices.offset;
- auto out = out_arr->GetMutableValues<ValueCType>(1);
+ auto out = out_arr->GetMutableValues<uint8_t>(1, 0) + kValueWidth * out_arr->offset;
auto out_is_valid = out_arr->buffers[0]->mutable_data();
auto out_offset = out_arr->offset;
+ DCHECK_EQ(out_offset, 0);
// If either the values or indices have nulls, we preemptively zero out the
// out validity bitmap so that we don't have to use ClearBit in each
@@ -357,6 +362,19 @@ struct PrimitiveTakeImpl {
bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false);
}
+ auto WriteValue = [&](int64_t position) {
+ memcpy(out + position * kValueWidth,
+ values_data + indices_data[position] * kValueWidth, kValueWidth);
+ };
+
+ auto WriteZero = [&](int64_t position) {
+ memset(out + position * kValueWidth, 0, kValueWidth);
+ };
+
+ auto WriteZeroSegment = [&](int64_t position, int64_t length) {
+ memset(out + position * kValueWidth, 0, kValueWidth * length);
+ };
+
OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset,
indices.length);
int64_t position = 0;
@@ -370,7 +388,7 @@ struct PrimitiveTakeImpl {
// Fastest path: neither values nor index nulls
bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true);
for (int64_t i = 0; i < block.length; ++i) {
- out[position] = values_data[indices_data[position]];
+ WriteValue(position);
++position;
}
} else if (block.popcount > 0) {
@@ -379,14 +397,14 @@ struct PrimitiveTakeImpl {
if (bit_util::GetBit(indices_is_valid, indices_offset + position)) {
// index is not null
bit_util::SetBit(out_is_valid, out_offset + position);
- out[position] = values_data[indices_data[position]];
+ WriteValue(position);
} else {
- out[position] = ValueCType{};
+ WriteZero(position);
}
++position;
}
} else {
- memset(out + position, 0, sizeof(ValueCType) * block.length);
+ WriteZeroSegment(position, block.length);
position += block.length;
}
} else {
@@ -397,11 +415,11 @@ struct PrimitiveTakeImpl {
if (bit_util::GetBit(values_is_valid,
values_offset + indices_data[position])) {
// value is not null
- out[position] = values_data[indices_data[position]];
+ WriteValue(position);
bit_util::SetBit(out_is_valid, out_offset + position);
++valid_count;
} else {
- out[position] = ValueCType{};
+ WriteZero(position);
}
++position;
}
@@ -414,16 +432,16 @@ struct PrimitiveTakeImpl {
bit_util::GetBit(values_is_valid,
values_offset + indices_data[position])) {
// index is not null && value is not null
- out[position] = values_data[indices_data[position]];
+ WriteValue(position);
bit_util::SetBit(out_is_valid, out_offset + position);
++valid_count;
} else {
- out[position] = ValueCType{};
+ WriteZero(position);
}
++position;
}
} else {
- memset(out + position, 0, sizeof(ValueCType) * block.length);
+ WriteZeroSegment(position, block.length);
position += block.length;
}
}
@@ -554,6 +572,8 @@ void TakeIndexDispatch(const ArraySpan& values, const ArraySpan& indices,
}
}
+} // namespace
+
Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const ArraySpan& values = batch[0].array;
const ArraySpan& indices = batch[1].array;
@@ -577,24 +597,40 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
break;
case 8:
- TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 1>>(
+ values, indices, out_arr);
break;
case 16:
- TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 2>>(
+ values, indices, out_arr);
break;
case 32:
- TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 4>>(
+ values, indices, out_arr);
break;
case 64:
- TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 8>>(
+ values, indices, out_arr);
break;
- default:
- DCHECK(false) << "Invalid values byte width";
+ case 128:
+ // For INTERVAL_MONTH_DAY_NANO, DECIMAL128
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 16>>(
+ values, indices, out_arr);
+ break;
+ case 256:
+ // For DECIMAL256
+ TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 32>>(
+ values, indices, out_arr);
break;
+ default:
+ return Status::NotImplemented("Unsupported primitive type for take: ",
+ *values.type);
}
return Status::OK();
}
+namespace {
+
// ----------------------------------------------------------------------
// Null take
@@ -836,8 +872,8 @@ void PopulateTakeKernels(std::vector<SelectionKernelData>* out) {
{InputType(match::LargeBinaryLike()), take_indices, LargeVarBinaryTakeExec},
{InputType(Type::FIXED_SIZE_BINARY), take_indices, FSBTakeExec},
{InputType(null()), take_indices, NullTakeExec},
- {InputType(Type::DECIMAL128), take_indices, FSBTakeExec},
- {InputType(Type::DECIMAL256), take_indices, FSBTakeExec},
+ {InputType(Type::DECIMAL128), take_indices, PrimitiveTakeExec},
+ {InputType(Type::DECIMAL256), take_indices, PrimitiveTakeExec},
{InputType(Type::DICTIONARY), take_indices, DictionaryTake},
{InputType(Type::EXTENSION), take_indices, ExtensionTake},
{InputType(Type::LIST), take_indices, ListTakeExec},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index bdf9f5454f..ec94b328ea 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -309,6 +309,33 @@ class TestFilterKernel : public ::testing::Test {
AssertFilter(values_array, ree_filter, expected_array);
}
+ void TestNumericBasics(const std::shared_ptr<DataType>& type) {
+ ARROW_SCOPED_TRACE("type = ", *type);
+ AssertFilter(type, "[]", "[]", "[]");
+
+ AssertFilter(type, "[9]", "[0]", "[]");
+ AssertFilter(type, "[9]", "[1]", "[9]");
+ AssertFilter(type, "[9]", "[null]", "[null]");
+ AssertFilter(type, "[null]", "[0]", "[]");
+ AssertFilter(type, "[null]", "[1]", "[null]");
+ AssertFilter(type, "[null]", "[null]", "[null]");
+
+ AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]");
+ AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]");
+ AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]");
+ AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]");
+ AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]");
+
+ AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"),
+ ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3),
+ ArrayFromJSON(type, "[7, 9]"));
+
+ ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
+ ArrayFromJSON(boolean(), "[]"), emit_null_));
+ ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
+ ArrayFromJSON(boolean(), "[]"), drop_));
+ }
+
const FilterOptions emit_null_, drop_;
};
@@ -342,6 +369,33 @@ void ValidateFilter(const std::shared_ptr<Array>& values,
/*verbose=*/true);
}
+TEST_F(TestFilterKernel, Temporal) {
+ this->TestNumericBasics(time32(TimeUnit::MILLI));
+ this->TestNumericBasics(time64(TimeUnit::MICRO));
+ this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris"));
+ this->TestNumericBasics(duration(TimeUnit::SECOND));
+ this->TestNumericBasics(date32());
+ this->AssertFilter(date64(), "[0, 86400000, null]", "[null, 1, 0]", "[null, 86400000]");
+}
+
+TEST_F(TestFilterKernel, Duration) {
+ for (auto type : DurationTypes()) {
+ this->TestNumericBasics(type);
+ }
+}
+
+TEST_F(TestFilterKernel, Interval) {
+ this->TestNumericBasics(month_interval());
+
+ auto type = day_time_interval();
+ this->AssertFilter(type, "[[1, -600], [2, 3000], null]", "[null, 1, 0]",
+ "[null, [2, 3000]]");
+ type = month_day_nano_interval();
+ this->AssertFilter(type,
+ "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
+ "[null, 1, 0]", "[null, [2, 3, -34567890123456789]]");
+}
+
class TestFilterKernelWithNull : public TestFilterKernel {
protected:
void AssertFilter(const std::string& values, const std::string& filter,
@@ -401,30 +455,7 @@ class TestFilterKernelWithNumeric : public TestFilterKernel {
TYPED_TEST_SUITE(TestFilterKernelWithNumeric, NumericArrowTypes);
TYPED_TEST(TestFilterKernelWithNumeric, FilterNumeric) {
- auto type = this->type_singleton();
- this->AssertFilter(type, "[]", "[]", "[]");
-
- this->AssertFilter(type, "[9]", "[0]", "[]");
- this->AssertFilter(type, "[9]", "[1]", "[9]");
- this->AssertFilter(type, "[9]", "[null]", "[null]");
- this->AssertFilter(type, "[null]", "[0]", "[]");
- this->AssertFilter(type, "[null]", "[1]", "[null]");
- this->AssertFilter(type, "[null]", "[null]", "[null]");
-
- this->AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]");
- this->AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]");
- this->AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]");
- this->AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]");
- this->AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]");
-
- this->AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"),
- ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3),
- ArrayFromJSON(type, "[7, 9]"));
-
- ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
- ArrayFromJSON(boolean(), "[]"), this->emit_null_));
- ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
- ArrayFromJSON(boolean(), "[]"), this->drop_));
+ this->TestNumericBasics(this->type_singleton());
}
template <typename CType>
@@ -588,7 +619,7 @@ TYPED_TEST(TestFilterKernelWithDecimal, FilterNumeric) {
ArrayFromJSON(boolean(), "[]"), this->drop_));
}
-TEST(TestFilterKernel, NoValidityBitmapButUnknownNullCount) {
+TEST_F(TestFilterKernel, NoValidityBitmapButUnknownNullCount) {
auto values = ArrayFromJSON(int32(), "[1, 2, 3, 4]");
auto filter = ArrayFromJSON(boolean(), "[true, true, false, true]");
@@ -1136,6 +1167,20 @@ class TestTakeKernel : public ::testing::Test {
TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values),
ArrayFromJSON(int16(), indices));
}
+
+ void TestNumericBasics(const std::shared_ptr<DataType>& type) {
+ ARROW_SCOPED_TRACE("type = ", *type);
+ CheckTake(type, "[7, 8, 9]", "[]", "[]");
+ CheckTake(type, "[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
+ CheckTake(type, "[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
+ CheckTake(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
+ CheckTake(type, "[null, 8, 9]", "[]", "[]");
+ CheckTake(type, "[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
+
+ std::shared_ptr<Array> arr;
+ ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, 9, 0]", &arr));
+ ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, -1, 0]", &arr));
+ }
};
template <typename ArrowType>
@@ -1201,6 +1246,34 @@ TEST_F(TestTakeKernel, TakeBoolean) {
TakeJSON(boolean(), "[true, false, true]", int8(), "[0, -1, 0]", &arr));
}
+TEST_F(TestTakeKernel, Temporal) {
+ this->TestNumericBasics(time32(TimeUnit::MILLI));
+ this->TestNumericBasics(time64(TimeUnit::MICRO));
+ this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris"));
+ this->TestNumericBasics(duration(TimeUnit::SECOND));
+ this->TestNumericBasics(date32());
+ CheckTake(date64(), "[0, 86400000, null]", "[null, 1, 1, 0]",
+ "[null, 86400000, 86400000, 0]");
+}
+
+TEST_F(TestTakeKernel, Duration) {
+ for (auto type : DurationTypes()) {
+ this->TestNumericBasics(type);
+ }
+}
+
+TEST_F(TestTakeKernel, Interval) {
+ this->TestNumericBasics(month_interval());
+
+ auto type = day_time_interval();
+ CheckTake(type, "[[1, -600], [2, 3000], null]", "[0, null, 2, 1]",
+ "[[1, -600], null, null, [2, 3000]]");
+ type = month_day_nano_interval();
+ CheckTake(type, "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
+ "[0, null, 2, 1]",
+ "[[1, -2, 34567890123456789], null, null, [2, 3, -34567890123456789]]");
+}
+
template <typename ArrowType>
class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
protected:
@@ -1216,18 +1289,7 @@ class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
TYPED_TEST_SUITE(TestTakeKernelWithNumeric, NumericArrowTypes);
TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) {
- this->AssertTake("[7, 8, 9]", "[]", "[]");
- this->AssertTake("[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
- this->AssertTake("[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
- this->AssertTake("[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
- this->AssertTake("[null, 8, 9]", "[]", "[]");
- this->AssertTake("[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
-
- std::shared_ptr<Array> arr;
- ASSERT_RAISES(IndexError,
- TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(), "[0, 9, 0]", &arr));
- ASSERT_RAISES(IndexError, TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(),
- "[0, -1, 0]", &arr));
+ this->TestNumericBasics(this->type_singleton());
}
template <typename TypeClass>
@@ -1816,6 +1878,7 @@ TEST(TestTakeMetaFunction, ArityChecking) {
template <typename Unused = void>
struct FilterRandomTest {
static void Test(const std::shared_ptr<DataType>& type) {
+ ARROW_SCOPED_TRACE("type = ", *type);
auto rand = random::RandomArrayGenerator(kRandomSeed);
const int64_t length = static_cast<int64_t>(1ULL << 10);
for (auto null_probability : {0.0, 0.01, 0.1, 0.999, 1.0}) {
@@ -1856,6 +1919,7 @@ void CheckTakeRandom(const std::shared_ptr<Array>& values, int64_t indices_lengt
template <typename ValuesType>
struct TakeRandomTest {
static void Test(const std::shared_ptr<DataType>& type) {
+ ARROW_SCOPED_TRACE("type = ", *type);
auto rand = random::RandomArrayGenerator(kRandomSeed);
const int64_t values_length = 64 * 16 + 1;
const int64_t indices_length = 64 * 4 + 1;
@@ -1897,8 +1961,10 @@ TEST(TestFilter, RandomString) {
}
TEST(TestFilter, RandomFixedSizeBinary) {
- FilterRandomTest<>::Test(fixed_size_binary(0));
- FilterRandomTest<>::Test(fixed_size_binary(16));
+ // FixedSizeBinary filter is special-cased for some widths
+ for (int32_t width : {0, 1, 16, 32, 35}) {
+ FilterRandomTest<>::Test(fixed_size_binary(width));
+ }
}
TEST(TestTake, PrimitiveRandom) { TestRandomPrimitiveCTypes<TakeRandomTest>(); }
@@ -1911,8 +1977,10 @@ TEST(TestTake, RandomString) {
}
TEST(TestTake, RandomFixedSizeBinary) {
- TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(0));
- TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(16));
+ // FixedSizeBinary take is special-cased for some widths
+ for (int32_t width : {0, 1, 16, 32, 35}) {
+ TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(width));
+ }
}
// ----------------------------------------------------------------------