You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2017/07/27 01:49:25 UTC

[2/7] metamodel git commit: Simple nested loop join implementation

Simple nested loop join implementation


Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/ee2b9167
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/ee2b9167
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/ee2b9167

Branch: refs/heads/master
Commit: ee2b91671d8cb6b35046aabba8426724831b7205
Parents: ef5ac06
Author: Jörg Unbehauen <jo...@unbehauen.net>
Authored: Tue May 3 12:34:03 2016 +0200
Committer: Jörg Unbehauen <jo...@unbehauen.net>
Committed: Fri Jul 21 23:25:38 2017 +0200

----------------------------------------------------------------------
 .../java/org/apache/metamodel/JoinHelper.java   | 133 +++++++++++++++++++
 .../org/apache/metamodel/MetaModelHelper.java   |  79 +++--------
 2 files changed, 152 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/JoinHelper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/metamodel/JoinHelper.java b/core/src/main/java/org/apache/metamodel/JoinHelper.java
new file mode 100644
index 0000000..c8cdfa7
--- /dev/null
+++ b/core/src/main/java/org/apache/metamodel/JoinHelper.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel;
+
+import com.google.common.collect.Lists;
+import org.apache.metamodel.data.*;
+import org.apache.metamodel.query.FilterItem;
+import org.apache.metamodel.query.SelectItem;
+
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+/**
+ * Join Execution and related methods.
+ */
+public abstract class JoinHelper {
+
+
+    /**
+     * Executes a simple nested loop join. The innerLoopDs will be copied in an in-memory dataset.
+     *
+     * @param outerLoopDs
+     * @param innerLoopDs
+     * @param filters
+     * @return
+     */
+    public static InMemoryDataSet nestedLoopJoin( DataSet innerLoopDs,  DataSet outerLoopDs, Collection<FilterItem> filters){
+
+        List<Row> innerRows = innerLoopDs.toRows();
+
+
+        List<SelectItem> innerSelItems = Lists.newArrayList(innerLoopDs.getSelectItems());
+        List<SelectItem> outerSelItems = Lists.newArrayList(outerLoopDs.getSelectItems());
+        List<SelectItem> allItems = Lists.newArrayList(innerSelItems);
+        allItems.addAll(outerSelItems);
+
+
+        Set<FilterItem> filterAll = applicableFilters(filters, allItems);
+
+
+        DataSetHeader jointHeader = joinHeader(outerLoopDs, innerLoopDs);
+
+        List<Row> resultRows = Lists.newArrayList();
+        for(Row outerRow: outerLoopDs){
+            for(Row innerRow: innerRows){
+                Row joinedRow =  joinRow(outerRow,innerRow,jointHeader);
+                if(filterAll.isEmpty()|| filterAll.stream().allMatch(fi -> fi.accept(joinedRow))){
+                    resultRows.add(joinedRow);
+                }
+            }
+        }
+
+
+
+        return new InMemoryDataSet(jointHeader,resultRows);
+    }
+
+
+    public static  Set<FilterItem> applicableFilters(Collection<FilterItem> filters, Collection<SelectItem> selectItemList) {
+
+        Set<SelectItem> items = new HashSet<>(selectItemList);
+
+        return filters.stream().filter( fi -> {
+            Collection<SelectItem> fiSelectItems = Lists.newArrayList(fi.getSelectItem());
+            Object operand = fi.getOperand();
+            if(operand instanceof SelectItem){
+                fiSelectItems.add((SelectItem) operand);
+            }
+
+            return items.containsAll(fiSelectItems);
+
+        }).collect(Collectors.toSet());
+    }
+
+
+
+
+
+    /**
+     * joins two datasetheader.
+     * @param ds1 the headers for the left
+     * @param ds2 the tright headers
+     * @return
+     */
+    public static DataSetHeader joinHeader(DataSet ds1, DataSet ds2){
+        List<SelectItem> joinedSelectItems = Lists.newArrayList(ds1.getSelectItems());
+        joinedSelectItems.addAll(Lists.newArrayList(ds2.getSelectItems()));
+        return  new CachingDataSetHeader(joinedSelectItems);
+
+
+    }
+
+    /**
+     * Joins two rows into one.
+     *
+     * Consider parameter ordering to maintain backwards compatbility
+     *
+     * @param row1 the tuples, that will be on the left
+     * @param row2 the tuples, that will be on the right
+     * @param jointHeader
+     * @return
+     */
+    public static Row joinRow(Row row1, Row row2, DataSetHeader jointHeader){
+        Object[] joinedRow = new Object[row1.getValues().length + row2.getValues().length];
+
+        System.arraycopy(row1.getValues(),0,joinedRow,0,row1.getValues().length);
+        System.arraycopy(row2.getValues(),0,joinedRow,row1.getValues().length,row2.getValues().length);
+
+
+        return new DefaultRow(jointHeader,joinedRow);
+
+
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
index 09d47bc..c788633 100644
--- a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
+++ b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java
@@ -18,17 +18,10 @@
  */
 package org.apache.metamodel;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.Map.Entry;
 
+import com.google.common.collect.Lists;
 import org.apache.metamodel.data.CachingDataSetHeader;
 import org.apache.metamodel.data.DataSet;
 import org.apache.metamodel.data.DataSetHeader;
@@ -46,6 +39,7 @@ import org.apache.metamodel.data.SubSelectionDataSet;
 import org.apache.metamodel.query.FilterItem;
 import org.apache.metamodel.query.FromItem;
 import org.apache.metamodel.query.GroupByItem;
+import org.apache.metamodel.query.OperatorType;
 import org.apache.metamodel.query.OrderByItem;
 import org.apache.metamodel.query.Query;
 import org.apache.metamodel.query.ScalarFunction;
@@ -176,71 +170,36 @@ public final class MetaModelHelper {
     public static DataSet getCarthesianProduct(DataSet... fromDataSets) {
         return getCarthesianProduct(fromDataSets, new FilterItem[0]);
     }
+    
+    
+    
 
     public static DataSet getCarthesianProduct(DataSet[] fromDataSets, Iterable<FilterItem> whereItems) {
+        assert(fromDataSets.length>0);
         // First check if carthesian product is even nescesary
         if (fromDataSets.length == 1) {
             return getFiltered(fromDataSets[0], whereItems);
         }
+        // do a nested loop join, no matter what
+        Iterator<DataSet> dsIter = Lists.newArrayList(fromDataSets).iterator();
 
-        List<SelectItem> selectItems = new ArrayList<SelectItem>();
-        for (DataSet dataSet : fromDataSets) {
-            for (int i = 0; i < dataSet.getSelectItems().length; i++) {
-                SelectItem item = dataSet.getSelectItems()[i];
-                selectItems.add(item);
-            }
-        }
+        DataSet joined = dsIter.next();
 
-        int selectItemOffset = 0;
-        List<Object[]> data = new ArrayList<Object[]>();
-        for (int fromDataSetIndex = 0; fromDataSetIndex < fromDataSets.length; fromDataSetIndex++) {
-            DataSet fromDataSet = fromDataSets[fromDataSetIndex];
-            SelectItem[] fromSelectItems = fromDataSet.getSelectItems();
-            if (fromDataSetIndex == 0) {
-                while (fromDataSet.next()) {
-                    Object[] values = fromDataSet.getRow().getValues();
-                    Object[] row = new Object[selectItems.size()];
-                    System.arraycopy(values, 0, row, selectItemOffset, values.length);
-                    data.add(row);
-                }
-                fromDataSet.close();
-            } else {
-                List<Object[]> fromDataRows = new ArrayList<Object[]>();
-                while (fromDataSet.next()) {
-                    fromDataRows.add(fromDataSet.getRow().getValues());
-                }
-                fromDataSet.close();
-                for (int i = 0; i < data.size(); i = i + fromDataRows.size()) {
-                    Object[] originalRow = data.get(i);
-                    data.remove(i);
-                    for (int j = 0; j < fromDataRows.size(); j++) {
-                        Object[] newRow = fromDataRows.get(j);
-                        System.arraycopy(newRow, 0, originalRow, selectItemOffset, newRow.length);
-                        data.add(i + j, originalRow.clone());
-                    }
-                }
-            }
-            selectItemOffset += fromSelectItems.length;
-        }
+        while(dsIter.hasNext()){
+            joined = JoinHelper.nestedLoopJoin(
+                    dsIter.next(),
+                    joined,
+                    Lists.newArrayList(whereItems));
 
-        if (data.isEmpty()) {
-            return new EmptyDataSet(selectItems);
         }
 
-        final DataSetHeader header = new CachingDataSetHeader(selectItems);
-        final List<Row> rows = new ArrayList<Row>(data.size());
-        for (Object[] objects : data) {
-            rows.add(new DefaultRow(header, objects, null));
-        }
+        return joined;
+
 
-        DataSet result = new InMemoryDataSet(header, rows);
-        if (whereItems != null) {
-            DataSet filteredResult = getFiltered(result, whereItems);
-            result = filteredResult;
-        }
-        return result;
     }
 
+    
+
     public static DataSet getCarthesianProduct(DataSet[] fromDataSets, FilterItem... filterItems) {
         return getCarthesianProduct(fromDataSets, Arrays.asList(filterItems));
     }