Commit e6b4660
[SPARK-23736][SQL] Extending the concat function to support array columns
## What changes were proposed in this pull request?
The PR adds a logic for easy concatenation of multiple array columns and covers:
- Concat expression has been extended to support array columns
- A Python wrapper
## How was this patch tested?
New tests added into:
- CollectionExpressionsSuite
- DataFrameFunctionsSuite
- typeCoercion/native/concat.sql
## Codegen examples
### Primitive-type elements
```
val df = Seq(
(Seq(1 ,2), Seq(3, 4)),
(Seq(1, 2, 3), null)
).toDF("a", "b")
df.filter('a.isNotNull).select(concat('a, 'b)).debugCodegen()
```
Result:
```
/* 033 */ boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 034 */ ArrayData inputadapter_value = inputadapter_isNull ?
/* 035 */ null : (inputadapter_row.getArray(0));
/* 036 */
/* 037 */ if (!(!inputadapter_isNull)) continue;
/* 038 */
/* 039 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 040 */
/* 041 */ ArrayData[] project_args = new ArrayData[2];
/* 042 */
/* 043 */ if (!false) {
/* 044 */ project_args[0] = inputadapter_value;
/* 045 */ }
/* 046 */
/* 047 */ boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 048 */ ArrayData inputadapter_value1 = inputadapter_isNull1 ?
/* 049 */ null : (inputadapter_row.getArray(1));
/* 050 */ if (!inputadapter_isNull1) {
/* 051 */ project_args[1] = inputadapter_value1;
/* 052 */ }
/* 053 */
/* 054 */ ArrayData project_value = new Object() {
/* 055 */ public ArrayData concat(ArrayData[] args) {
/* 056 */ for (int z = 0; z < 2; z++) {
/* 057 */ if (args[z] == null) return null;
/* 058 */ }
/* 059 */
/* 060 */ long project_numElements = 0L;
/* 061 */ for (int z = 0; z < 2; z++) {
/* 062 */ project_numElements += args[z].numElements();
/* 063 */ }
/* 064 */ if (project_numElements > 2147483632) {
/* 065 */ throw new RuntimeException("Unsuccessful try to concat arrays with " + project_numElements +
/* 066 */ " elements due to exceeding the array size limit 2147483632.");
/* 067 */ }
/* 068 */
/* 069 */ long project_size = UnsafeArrayData.calculateSizeOfUnderlyingByteArray(
/* 070 */ project_numElements,
/* 071 */ 4);
/* 072 */ if (project_size > 2147483632) {
/* 073 */ throw new RuntimeException("Unsuccessful try to concat arrays with " + project_size +
/* 074 */ " bytes of data due to exceeding the limit 2147483632 bytes" +
/* 075 */ " for UnsafeArrayData.");
/* 076 */ }
/* 077 */
/* 078 */ byte[] project_array = new byte[(int)project_size];
/* 079 */ UnsafeArrayData project_arrayData = new UnsafeArrayData();
/* 080 */ Platform.putLong(project_array, 16, project_numElements);
/* 081 */ project_arrayData.pointTo(project_array, 16, (int)project_size);
/* 082 */ int project_counter = 0;
/* 083 */ for (int y = 0; y < 2; y++) {
/* 084 */ for (int z = 0; z < args[y].numElements(); z++) {
/* 085 */ if (args[y].isNullAt(z)) {
/* 086 */ project_arrayData.setNullAt(project_counter);
/* 087 */ } else {
/* 088 */ project_arrayData.setInt(
/* 089 */ project_counter,
/* 090 */ args[y].getInt(z)
/* 091 */ );
/* 092 */ }
/* 093 */ project_counter++;
/* 094 */ }
/* 095 */ }
/* 096 */ return project_arrayData;
/* 097 */ }
/* 098 */ }.concat(project_args);
/* 099 */ boolean project_isNull = project_value == null;
```
### Non-primitive-type elements
```
val df = Seq(
(Seq("aa" ,"bb"), Seq("ccc", "ddd")),
(Seq("x", "y"), null)
).toDF("a", "b")
df.filter('a.isNotNull).select(concat('a, 'b)).debugCodegen()
```
Result:
```
/* 033 */ boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 034 */ ArrayData inputadapter_value = inputadapter_isNull ?
/* 035 */ null : (inputadapter_row.getArray(0));
/* 036 */
/* 037 */ if (!(!inputadapter_isNull)) continue;
/* 038 */
/* 039 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 040 */
/* 041 */ ArrayData[] project_args = new ArrayData[2];
/* 042 */
/* 043 */ if (!false) {
/* 044 */ project_args[0] = inputadapter_value;
/* 045 */ }
/* 046 */
/* 047 */ boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 048 */ ArrayData inputadapter_value1 = inputadapter_isNull1 ?
/* 049 */ null : (inputadapter_row.getArray(1));
/* 050 */ if (!inputadapter_isNull1) {
/* 051 */ project_args[1] = inputadapter_value1;
/* 052 */ }
/* 053 */
/* 054 */ ArrayData project_value = new Object() {
/* 055 */ public ArrayData concat(ArrayData[] args) {
/* 056 */ for (int z = 0; z < 2; z++) {
/* 057 */ if (args[z] == null) return null;
/* 058 */ }
/* 059 */
/* 060 */ long project_numElements = 0L;
/* 061 */ for (int z = 0; z < 2; z++) {
/* 062 */ project_numElements += args[z].numElements();
/* 063 */ }
/* 064 */ if (project_numElements > 2147483632) {
/* 065 */ throw new RuntimeException("Unsuccessful try to concat arrays with " + project_numElements +
/* 066 */ " elements due to exceeding the array size limit 2147483632.");
/* 067 */ }
/* 068 */
/* 069 */ Object[] project_arrayObjects = new Object[(int)project_numElements];
/* 070 */ int project_counter = 0;
/* 071 */ for (int y = 0; y < 2; y++) {
/* 072 */ for (int z = 0; z < args[y].numElements(); z++) {
/* 073 */ project_arrayObjects[project_counter] = args[y].getUTF8String(z);
/* 074 */ project_counter++;
/* 075 */ }
/* 076 */ }
/* 077 */ return new org.apache.spark.sql.catalyst.util.GenericArrayData(project_arrayObjects);
/* 078 */ }
/* 079 */ }.concat(project_args);
/* 080 */ boolean project_isNull = project_value == null;
```
Author: mn-mikke <mrkAha12346github>
Closes apache#20858 from mn-mikke/feature/array-api-concat_arrays-to-master.1 parent b3fde5a commit e6b4660
File tree
13 files changed
+529
-111
lines changed- common/unsafe/src/main/java/org/apache/spark/unsafe/array
- python/pyspark/sql
- sql
- catalyst/src
- main
- java/org/apache/spark/sql/catalyst/expressions
- scala/org/apache/spark/sql/catalyst
- analysis
- expressions
- test/scala/org/apache/spark/sql/catalyst/expressions
- core/src
- main/scala/org/apache/spark/sql
- test
- resources/sql-tests
- inputs/typeCoercion/native
- results/typeCoercion/native
- scala/org/apache/spark/sql
- execution/command
13 files changed
+529
-111
lines changedLines changed: 5 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
33 | 33 | | |
34 | 34 | | |
35 | 35 | | |
36 | | - | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
37 | 41 | | |
38 | 42 | | |
39 | 43 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1425 | 1425 | | |
1426 | 1426 | | |
1427 | 1427 | | |
1428 | | - | |
1429 | | - | |
1430 | | - | |
1431 | | - | |
1432 | | - | |
1433 | | - | |
1434 | | - | |
1435 | | - | |
1436 | | - | |
1437 | | - | |
1438 | | - | |
1439 | | - | |
1440 | | - | |
1441 | | - | |
1442 | | - | |
1443 | 1428 | | |
1444 | 1429 | | |
1445 | 1430 | | |
| |||
1845 | 1830 | | |
1846 | 1831 | | |
1847 | 1832 | | |
| 1833 | + | |
| 1834 | + | |
| 1835 | + | |
| 1836 | + | |
| 1837 | + | |
| 1838 | + | |
| 1839 | + | |
| 1840 | + | |
| 1841 | + | |
| 1842 | + | |
| 1843 | + | |
| 1844 | + | |
| 1845 | + | |
| 1846 | + | |
| 1847 | + | |
| 1848 | + | |
| 1849 | + | |
| 1850 | + | |
| 1851 | + | |
1848 | 1852 | | |
1849 | 1853 | | |
1850 | 1854 | | |
| |||
Lines changed: 10 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
56 | 56 | | |
57 | 57 | | |
58 | 58 | | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
59 | 63 | | |
60 | 64 | | |
61 | 65 | | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
62 | 72 | | |
63 | 73 | | |
64 | 74 | | |
| |||
Lines changed: 1 addition & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
308 | 308 | | |
309 | 309 | | |
310 | 310 | | |
311 | | - | |
312 | 311 | | |
313 | 312 | | |
314 | 313 | | |
| |||
413 | 412 | | |
414 | 413 | | |
415 | 414 | | |
| 415 | + | |
416 | 416 | | |
417 | 417 | | |
418 | 418 | | |
| |||
Lines changed: 8 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
520 | 520 | | |
521 | 521 | | |
522 | 522 | | |
| 523 | + | |
| 524 | + | |
| 525 | + | |
| 526 | + | |
| 527 | + | |
| 528 | + | |
| 529 | + | |
| 530 | + | |
523 | 531 | | |
524 | 532 | | |
525 | 533 | | |
| |||
Lines changed: 219 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
23 | 23 | | |
24 | 24 | | |
25 | 25 | | |
26 | | - | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
27 | 29 | | |
28 | 30 | | |
29 | 31 | | |
| |||
665 | 667 | | |
666 | 668 | | |
667 | 669 | | |
| 670 | + | |
| 671 | + | |
| 672 | + | |
| 673 | + | |
| 674 | + | |
| 675 | + | |
| 676 | + | |
| 677 | + | |
| 678 | + | |
| 679 | + | |
| 680 | + | |
| 681 | + | |
| 682 | + | |
| 683 | + | |
| 684 | + | |
| 685 | + | |
| 686 | + | |
| 687 | + | |
| 688 | + | |
| 689 | + | |
| 690 | + | |
| 691 | + | |
| 692 | + | |
| 693 | + | |
| 694 | + | |
| 695 | + | |
| 696 | + | |
| 697 | + | |
| 698 | + | |
| 699 | + | |
| 700 | + | |
| 701 | + | |
| 702 | + | |
| 703 | + | |
| 704 | + | |
| 705 | + | |
| 706 | + | |
| 707 | + | |
| 708 | + | |
| 709 | + | |
| 710 | + | |
| 711 | + | |
| 712 | + | |
| 713 | + | |
| 714 | + | |
| 715 | + | |
| 716 | + | |
| 717 | + | |
| 718 | + | |
| 719 | + | |
| 720 | + | |
| 721 | + | |
| 722 | + | |
| 723 | + | |
| 724 | + | |
| 725 | + | |
| 726 | + | |
| 727 | + | |
| 728 | + | |
| 729 | + | |
| 730 | + | |
| 731 | + | |
| 732 | + | |
| 733 | + | |
| 734 | + | |
| 735 | + | |
| 736 | + | |
| 737 | + | |
| 738 | + | |
| 739 | + | |
| 740 | + | |
| 741 | + | |
| 742 | + | |
| 743 | + | |
| 744 | + | |
| 745 | + | |
| 746 | + | |
| 747 | + | |
| 748 | + | |
| 749 | + | |
| 750 | + | |
| 751 | + | |
| 752 | + | |
| 753 | + | |
| 754 | + | |
| 755 | + | |
| 756 | + | |
| 757 | + | |
| 758 | + | |
| 759 | + | |
| 760 | + | |
| 761 | + | |
| 762 | + | |
| 763 | + | |
| 764 | + | |
| 765 | + | |
| 766 | + | |
| 767 | + | |
| 768 | + | |
| 769 | + | |
| 770 | + | |
| 771 | + | |
| 772 | + | |
| 773 | + | |
| 774 | + | |
| 775 | + | |
| 776 | + | |
| 777 | + | |
| 778 | + | |
| 779 | + | |
| 780 | + | |
| 781 | + | |
| 782 | + | |
| 783 | + | |
| 784 | + | |
| 785 | + | |
| 786 | + | |
| 787 | + | |
| 788 | + | |
| 789 | + | |
| 790 | + | |
| 791 | + | |
| 792 | + | |
| 793 | + | |
| 794 | + | |
| 795 | + | |
| 796 | + | |
| 797 | + | |
| 798 | + | |
| 799 | + | |
| 800 | + | |
| 801 | + | |
| 802 | + | |
| 803 | + | |
| 804 | + | |
| 805 | + | |
| 806 | + | |
| 807 | + | |
| 808 | + | |
| 809 | + | |
| 810 | + | |
| 811 | + | |
| 812 | + | |
| 813 | + | |
| 814 | + | |
| 815 | + | |
| 816 | + | |
| 817 | + | |
| 818 | + | |
| 819 | + | |
| 820 | + | |
| 821 | + | |
| 822 | + | |
| 823 | + | |
| 824 | + | |
| 825 | + | |
| 826 | + | |
| 827 | + | |
| 828 | + | |
| 829 | + | |
| 830 | + | |
| 831 | + | |
| 832 | + | |
| 833 | + | |
| 834 | + | |
| 835 | + | |
| 836 | + | |
| 837 | + | |
| 838 | + | |
| 839 | + | |
| 840 | + | |
| 841 | + | |
| 842 | + | |
| 843 | + | |
| 844 | + | |
| 845 | + | |
| 846 | + | |
| 847 | + | |
| 848 | + | |
| 849 | + | |
| 850 | + | |
| 851 | + | |
| 852 | + | |
| 853 | + | |
| 854 | + | |
| 855 | + | |
| 856 | + | |
| 857 | + | |
| 858 | + | |
| 859 | + | |
| 860 | + | |
| 861 | + | |
| 862 | + | |
| 863 | + | |
| 864 | + | |
| 865 | + | |
| 866 | + | |
| 867 | + | |
| 868 | + | |
| 869 | + | |
| 870 | + | |
| 871 | + | |
| 872 | + | |
| 873 | + | |
| 874 | + | |
| 875 | + | |
| 876 | + | |
| 877 | + | |
| 878 | + | |
| 879 | + | |
| 880 | + | |
| 881 | + | |
| 882 | + | |
| 883 | + | |
| 884 | + | |
| 885 | + | |
0 commit comments