|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | + |
| 18 | +package org.apache.spark.sql.execution.joins; |
| 19 | + |
| 20 | +import java.util.NoSuchElementException; |
| 21 | +import javax.annotation.Nullable; |
| 22 | + |
| 23 | +import scala.Function1; |
| 24 | +import scala.collection.Iterator; |
| 25 | +import scala.reflect.ClassTag; |
| 26 | +import scala.reflect.ClassTag$; |
| 27 | + |
| 28 | +import org.apache.spark.sql.AbstractScalaRowIterator; |
| 29 | +import org.apache.spark.sql.Row; |
| 30 | +import org.apache.spark.sql.catalyst.expressions.JoinedRow5; |
| 31 | +import org.apache.spark.sql.catalyst.expressions.RowOrdering; |
| 32 | +import org.apache.spark.util.collection.CompactBuffer; |
| 33 | + |
| 34 | +/** |
| 35 | + * Implements the merge step of sort-merge join. |
| 36 | + */ |
| 37 | +class SortMergeJoinIterator extends AbstractScalaRowIterator { |
| 38 | + |
| 39 | + private static final ClassTag<Row> ROW_CLASS_TAG = ClassTag$.MODULE$.apply(Row.class); |
| 40 | + private final Iterator<Row> leftIter; |
| 41 | + private final Iterator<Row> rightIter; |
| 42 | + private final Function1<Row, Row> leftKeyGenerator; |
| 43 | + private final Function1<Row, Row> rightKeyGenerator; |
| 44 | + private final RowOrdering keyOrdering; |
| 45 | + private final JoinedRow5 joinRow = new JoinedRow5(); |
| 46 | + |
| 47 | + @Nullable private Row leftElement; |
| 48 | + @Nullable private Row rightElement; |
| 49 | + private Row leftKey; |
| 50 | + private Row rightKey; |
| 51 | + @Nullable private CompactBuffer<Row> rightMatches; |
| 52 | + private int rightPosition = -1; |
| 53 | + private boolean stop = false; |
| 54 | + private Row matchKey; |
| 55 | + |
| 56 | + public SortMergeJoinIterator( |
| 57 | + Iterator<Row> leftIter, |
| 58 | + Iterator<Row> rightIter, |
| 59 | + Function1<Row, Row> leftKeyGenerator, |
| 60 | + Function1<Row, Row> rightKeyGenerator, |
| 61 | + RowOrdering keyOrdering) { |
| 62 | + this.leftIter = leftIter; |
| 63 | + this.rightIter = rightIter; |
| 64 | + this.leftKeyGenerator = leftKeyGenerator; |
| 65 | + this.rightKeyGenerator = rightKeyGenerator; |
| 66 | + this.keyOrdering = keyOrdering; |
| 67 | + fetchLeft(); |
| 68 | + fetchRight(); |
| 69 | + } |
| 70 | + |
| 71 | + private void fetchLeft() { |
| 72 | + if (leftIter.hasNext()) { |
| 73 | + leftElement = leftIter.next(); |
| 74 | + leftKey = leftKeyGenerator.apply(leftElement); |
| 75 | + } else { |
| 76 | + leftElement = null; |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + private void fetchRight() { |
| 81 | + if (rightIter.hasNext()) { |
| 82 | + rightElement = rightIter.next(); |
| 83 | + rightKey = rightKeyGenerator.apply(rightElement); |
| 84 | + } else { |
| 85 | + rightElement = null; |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + /** |
| 90 | + * Searches the right iterator for the next rows that have matches in left side, and store |
| 91 | + * them in a buffer. |
| 92 | + * |
| 93 | + * @return true if the search is successful, and false if the right iterator runs out of |
| 94 | + * tuples. |
| 95 | + */ |
| 96 | + private boolean nextMatchingPair() { |
| 97 | + if (!stop && rightElement != null) { |
| 98 | + // run both side to get the first match pair |
| 99 | + while (!stop && leftElement != null && rightElement != null) { |
| 100 | + final int comparing = keyOrdering.compare(leftKey, rightKey); |
| 101 | + // for inner join, we need to filter those null keys |
| 102 | + stop = comparing == 0 && !leftKey.anyNull(); |
| 103 | + if (comparing > 0 || rightKey.anyNull()) { |
| 104 | + fetchRight(); |
| 105 | + } else if (comparing < 0 || leftKey.anyNull()) { |
| 106 | + fetchLeft(); |
| 107 | + } |
| 108 | + } |
| 109 | + rightMatches = new CompactBuffer<Row>(ROW_CLASS_TAG); |
| 110 | + if (stop) { |
| 111 | + stop = false; |
| 112 | + // Iterate the right side to buffer all rows that match. |
| 113 | + // As the records should be ordered, exit when we meet the first record that not match. |
| 114 | + while (!stop && rightElement != null) { |
| 115 | + rightMatches.$plus$eq(rightElement); |
| 116 | + fetchRight(); |
| 117 | + stop = keyOrdering.compare(leftKey, rightKey) != 0; |
| 118 | + } |
| 119 | + if (rightMatches.size() > 0) { |
| 120 | + rightPosition = 0; |
| 121 | + matchKey = leftKey; |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + return rightMatches != null && rightMatches.size() > 0; |
| 126 | + } |
| 127 | + |
| 128 | + @Override |
| 129 | + public boolean hasNext() { |
| 130 | + return nextMatchingPair(); |
| 131 | + } |
| 132 | + |
| 133 | + @Override |
| 134 | + public Row next() { |
| 135 | + if (hasNext()) { |
| 136 | + // We are using the buffered right rows and run down left iterator |
| 137 | + final Row joinedRow = joinRow.apply(leftElement, rightMatches.apply(rightPosition)); |
| 138 | + rightPosition += 1; |
| 139 | + if (rightPosition >= rightMatches.size()) { |
| 140 | + rightPosition = 0; |
| 141 | + fetchLeft(); |
| 142 | + if (leftElement == null || keyOrdering.compare(leftKey, matchKey) != 0) { |
| 143 | + stop = false; |
| 144 | + rightMatches = null; |
| 145 | + } |
| 146 | + } |
| 147 | + return joinedRow; |
| 148 | + } else { |
| 149 | + // No more results |
| 150 | + throw new NoSuchElementException(); |
| 151 | + } |
| 152 | + } |
| 153 | +} |
0 commit comments