You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2017/07/27 01:49:25 UTC
[2/7] metamodel git commit: Simple nested loop join implementation
Simple nested loop join implementation
Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/ee2b9167
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/ee2b9167
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/ee2b9167
Branch: refs/heads/master
Commit: ee2b91671d8cb6b35046aabba8426724831b7205
Parents: ef5ac06
Author: Jörg Unbehauen <jo...@unbehauen.net>
Authored: Tue May 3 12:34:03 2016 +0200
Committer: Jörg Unbehauen <jo...@unbehauen.net>
Committed: Fri Jul 21 23:25:38 2017 +0200
----------------------------------------------------------------------
.../java/org/apache/metamodel/JoinHelper.java | 133 +++++++++++++++++++
.../org/apache/metamodel/MetaModelHelper.java | 79 +++--------
2 files changed, 152 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/JoinHelper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/metamodel/JoinHelper.java b/core/src/main/java/org/apache/metamodel/JoinHelper.java
new file mode 100644
index 0000000..c8cdfa7
--- /dev/null
+++ b/core/src/main/java/org/apache/metamodel/JoinHelper.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel;
+
+import com.google.common.collect.Lists;
+import org.apache.metamodel.data.*;
+import org.apache.metamodel.query.FilterItem;
+import org.apache.metamodel.query.SelectItem;
+
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+/**
+ * Join Execution and related methods.
+ */
+public abstract class JoinHelper {
+
+
+ /**
+ * Executes a simple nested loop join. The innerLoopDs will be copied in an in-memory dataset.
+ *
+ * @param outerLoopDs
+ * @param innerLoopDs
+ * @param filters
+ * @return
+ */
+ public static InMemoryDataSet nestedLoopJoin( DataSet innerLoopDs, DataSet outerLoopDs, Collection<FilterItem> filters){
+
+ List<Row> innerRows = innerLoopDs.toRows();
+
+
+ List<SelectItem> innerSelItems = Lists.newArrayList(innerLoopDs.getSelectItems());
+ List<SelectItem> outerSelItems = Lists.newArrayList(outerLoopDs.getSelectItems());
+ List<SelectItem> allItems = Lists.newArrayList(innerSelItems);
+ allItems.addAll(outerSelItems);
+
+
+ Set<FilterItem> filterAll = applicableFilters(filters, allItems);
+
+
+ DataSetHeader jointHeader = joinHeader(outerLoopDs, innerLoopDs);
+
+ List<Row> resultRows = Lists.newArrayList();
+ for(Row outerRow: outerLoopDs){
+ for(Row innerRow: innerRows){
+ Row joinedRow = joinRow(outerRow,innerRow,jointHeader);
+ if(filterAll.isEmpty()|| filterAll.stream().allMatch(fi -> fi.accept(joinedRow))){
+ resultRows.add(joinedRow);
+ }
+ }
+ }
+
+
+
+ return new InMemoryDataSet(jointHeader,resultRows);
+ }
+
+
+ public static Set<FilterItem> applicableFilters(Collection<FilterItem> filters, Collection<SelectItem> selectItemList) {
+
+ Set<SelectItem> items = new HashSet<>(selectItemList);
+
+ return filters.stream().filter( fi -> {
+ Collection<SelectItem> fiSelectItems = Lists.newArrayList(fi.getSelectItem());
+ Object operand = fi.getOperand();
+ if(operand instanceof SelectItem){
+ fiSelectItems.add((SelectItem) operand);
+ }
+
+ return items.containsAll(fiSelectItems);
+
+ }).collect(Collectors.toSet());
+ }
+
+
+
+
+
+ /**
+ * joins two datasetheader.
+ * @param ds1 the headers for the left
+ * @param ds2 the tright headers
+ * @return
+ */
+ public static DataSetHeader joinHeader(DataSet ds1, DataSet ds2){
+ List<SelectItem> joinedSelectItems = Lists.newArrayList(ds1.getSelectItems());
+ joinedSelectItems.addAll(Lists.newArrayList(ds2.getSelectItems()));
+ return new CachingDataSetHeader(joinedSelectItems);
+
+
+ }
+
+ /**
+ * Joins two rows into one.
+ *
+ * Consider parameter ordering to maintain backwards compatbility
+ *
+ * @param row1 the tuples, that will be on the left
+ * @param row2 the tuples, that will be on the right
+ * @param jointHeader
+ * @return
+ */
+ public static Row joinRow(Row row1, Row row2, DataSetHeader jointHeader){
+ Object[] joinedRow = new Object[row1.getValues().length + row2.getValues().length];
+
+ System.arraycopy(row1.getValues(),0,joinedRow,0,row1.getValues().length);
+ System.arraycopy(row2.getValues(),0,joinedRow,row1.getValues().length,row2.getValues().length);
+
+
+ return new DefaultRow(jointHeader,joinedRow);
+
+
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
index 09d47bc..c788633 100644
--- a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
+++ b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
@@ -18,17 +18,10 @@
*/
package org.apache.metamodel;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.Map.Entry;
+import com.google.common.collect.Lists;
import org.apache.metamodel.data.CachingDataSetHeader;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.DataSetHeader;
@@ -46,6 +39,7 @@ import org.apache.metamodel.data.SubSelectionDataSet;
import org.apache.metamodel.query.FilterItem;
import org.apache.metamodel.query.FromItem;
import org.apache.metamodel.query.GroupByItem;
+import org.apache.metamodel.query.OperatorType;
import org.apache.metamodel.query.OrderByItem;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.query.ScalarFunction;
@@ -176,71 +170,36 @@ public final class MetaModelHelper {
public static DataSet getCarthesianProduct(DataSet... fromDataSets) {
return getCarthesianProduct(fromDataSets, new FilterItem[0]);
}
+
+
+
public static DataSet getCarthesianProduct(DataSet[] fromDataSets, Iterable<FilterItem> whereItems) {
+ assert(fromDataSets.length>0);
// First check if carthesian product is even nescesary
if (fromDataSets.length == 1) {
return getFiltered(fromDataSets[0], whereItems);
}
+ // do a nested loop join, no matter what
+ Iterator<DataSet> dsIter = Lists.newArrayList(fromDataSets).iterator();
- List<SelectItem> selectItems = new ArrayList<SelectItem>();
- for (DataSet dataSet : fromDataSets) {
- for (int i = 0; i < dataSet.getSelectItems().length; i++) {
- SelectItem item = dataSet.getSelectItems()[i];
- selectItems.add(item);
- }
- }
+ DataSet joined = dsIter.next();
- int selectItemOffset = 0;
- List<Object[]> data = new ArrayList<Object[]>();
- for (int fromDataSetIndex = 0; fromDataSetIndex < fromDataSets.length; fromDataSetIndex++) {
- DataSet fromDataSet = fromDataSets[fromDataSetIndex];
- SelectItem[] fromSelectItems = fromDataSet.getSelectItems();
- if (fromDataSetIndex == 0) {
- while (fromDataSet.next()) {
- Object[] values = fromDataSet.getRow().getValues();
- Object[] row = new Object[selectItems.size()];
- System.arraycopy(values, 0, row, selectItemOffset, values.length);
- data.add(row);
- }
- fromDataSet.close();
- } else {
- List<Object[]> fromDataRows = new ArrayList<Object[]>();
- while (fromDataSet.next()) {
- fromDataRows.add(fromDataSet.getRow().getValues());
- }
- fromDataSet.close();
- for (int i = 0; i < data.size(); i = i + fromDataRows.size()) {
- Object[] originalRow = data.get(i);
- data.remove(i);
- for (int j = 0; j < fromDataRows.size(); j++) {
- Object[] newRow = fromDataRows.get(j);
- System.arraycopy(newRow, 0, originalRow, selectItemOffset, newRow.length);
- data.add(i + j, originalRow.clone());
- }
- }
- }
- selectItemOffset += fromSelectItems.length;
- }
+ while(dsIter.hasNext()){
+ joined = JoinHelper.nestedLoopJoin(
+ dsIter.next(),
+ joined,
+ Lists.newArrayList(whereItems));
- if (data.isEmpty()) {
- return new EmptyDataSet(selectItems);
}
- final DataSetHeader header = new CachingDataSetHeader(selectItems);
- final List<Row> rows = new ArrayList<Row>(data.size());
- for (Object[] objects : data) {
- rows.add(new DefaultRow(header, objects, null));
- }
+ return joined;
+
- DataSet result = new InMemoryDataSet(header, rows);
- if (whereItems != null) {
- DataSet filteredResult = getFiltered(result, whereItems);
- result = filteredResult;
- }
- return result;
}
+
+
public static DataSet getCarthesianProduct(DataSet[] fromDataSets, FilterItem... filterItems) {
return getCarthesianProduct(fromDataSets, Arrays.asList(filterItems));
}