You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2011/04/25 04:50:16 UTC

svn commit: r1096397 [2/8] - in /hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ lib/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/index/ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/ ql/src/...

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java Mon Apr 25 02:50:14 2011
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javaewah.EWAHCompressedBitmap;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput;
+import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectOutput;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableVoidObjectInspector;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * An abstract class for a UDF that performs a binary operation between two EWAH-compressed bitmaps.
+ * For example: Bitmap OR and AND operations between two EWAH-compressed bitmaps.
+ */
+abstract public class AbstractGenericUDFEWAHBitmapBop extends GenericUDF {
+  protected final ArrayList<Object> ret = new ArrayList<Object>();
+  private ObjectInspector b1OI;
+  private ObjectInspector b2OI;
+  private final String name;
+
+  AbstractGenericUDFEWAHBitmapBop(String name) {
+    this.name = name;
+  }
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    if (arguments.length != 2) {
+      throw new UDFArgumentLengthException(
+        "The function " + name + "(b1, b2) takes exactly 2 arguments");
+    }
+
+    if (arguments[0].getCategory().equals(Category.LIST)) {
+      b1OI = (ListObjectInspector) arguments[0];
+    } else {
+        throw new UDFArgumentTypeException(0, "\""
+          + Category.LIST.toString().toLowerCase()
+          + "\" is expected at function " + name + ", but \""
+          + arguments[0].getTypeName() + "\" is found");
+    }
+
+    if (arguments[1].getCategory().equals(Category.LIST)) {
+      b2OI = (ListObjectInspector) arguments[1];
+    } else {
+        throw new UDFArgumentTypeException(1, "\""
+          + Category.LIST.toString().toLowerCase()
+          + "\" is expected at function " + name + ", but \""
+          + arguments[1].getTypeName() + "\" is found");
+    }
+
+    return ObjectInspectorFactory
+        .getStandardListObjectInspector(PrimitiveObjectInspectorFactory
+            .writableLongObjectInspector);
+  }
+
+  protected abstract EWAHCompressedBitmap bitmapBop(
+    EWAHCompressedBitmap bitmap1, EWAHCompressedBitmap bitmap2);
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    assert (arguments.length == 2);
+    Object b1 = arguments[0].get();
+    Object b2 = arguments[1].get();
+
+    EWAHCompressedBitmap bitmap1 = wordArrayToBitmap(b1);
+    EWAHCompressedBitmap bitmap2 = wordArrayToBitmap(b2);
+
+    EWAHCompressedBitmap bitmapAnd = bitmapBop(bitmap1, bitmap2);
+
+    BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput();
+    try {
+      bitmapAnd.writeExternal(bitmapObjOut);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    List<LongWritable> retList = bitmapToWordArray(bitmapAnd);
+    for (LongWritable l : retList) {
+      ret.add(l);
+    }
+    return ret;
+  }
+  
+  protected EWAHCompressedBitmap wordArrayToBitmap(Object b) {
+    ListObjectInspector lloi = (ListObjectInspector) b1OI;
+    int length = lloi.getListLength(b);
+    ArrayList<LongWritable> bitmapArray = new ArrayList<LongWritable>();
+    for (int i = 0; i < length; i++) {
+      long l = PrimitiveObjectInspectorUtils.getLong(
+          lloi.getListElement(b, i), 
+          (PrimitiveObjectInspector) lloi.getListElementObjectInspector());
+      bitmapArray.add(new LongWritable(l));
+    }
+
+    BitmapObjectInput bitmapObjIn = new BitmapObjectInput(bitmapArray);
+    EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap();
+    try {
+      bitmap.readExternal(bitmapObjIn);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return bitmap;
+  }
+
+  protected List<LongWritable> bitmapToWordArray(EWAHCompressedBitmap bitmap) {
+    BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput();
+    try {
+      bitmap.writeExternal(bitmapObjOut);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return bitmapObjOut.list();
+  }
+  
+  @Override
+  public String getDisplayString(String[] children) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(name);
+    sb.append("(");
+    for (int i = 0; i < children.length; i++) {
+      sb.append(children[i]);
+      if (i + 1 != children.length) {
+        sb.append(",");
+      }
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java Mon Apr 25 02:50:14 2011
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javaewah.EWAHCompressedBitmap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput;
+import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectOutput;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+
+/**
+ * GenericUDAFEWAHBitmap.
+ *
+ */
+@Description(name = "ewah_bitmap", value = "_FUNC_(expr) - Returns an EWAH-compressed bitmap representation of a column.")
+public class GenericUDAFEWAHBitmap extends AbstractGenericUDAFResolver {
+
+  static final Log LOG = LogFactory.getLog(GenericUDAFEWAHBitmap.class.getName());
+
+  @Override
+  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+    throws SemanticException {
+    if (parameters.length != 1) {
+      throw new UDFArgumentTypeException(parameters.length - 1,
+          "Exactly one argument is expected.");
+    }
+    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
+    if (!ObjectInspectorUtils.compareSupported(oi)) {
+      throw new UDFArgumentTypeException(parameters.length - 1,
+          "Cannot support comparison of map<> type or complex type containing map<>.");
+    }
+    return new GenericUDAFEWAHBitmapEvaluator();
+  }
+
+  //The UDAF evaluator assumes that all rows it's evaluating have
+  //the same (desired) value.
+  public static class GenericUDAFEWAHBitmapEvaluator extends GenericUDAFEvaluator {
+
+    // For PARTIAL1 and COMPLETE: ObjectInspectors for original data
+    private PrimitiveObjectInspector inputOI;
+    private LongObjectInspector bitmapLongOI;
+
+    // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations
+    // (lists of bitmaps)
+    private StandardListObjectInspector loi;
+    private StandardListObjectInspector internalMergeOI;
+
+    @Override
+    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
+        throws HiveException {
+      super.init(m, parameters);
+      // init output object inspectors
+      // The output of a partial aggregation is a list
+      if (m == Mode.PARTIAL1) {
+        inputOI = (PrimitiveObjectInspector) parameters[0];
+        return ObjectInspectorFactory
+            .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+      } else {
+        //no map aggregation
+        internalMergeOI = (StandardListObjectInspector) parameters[0];
+        bitmapLongOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+        inputOI = PrimitiveObjectInspectorFactory.writableByteObjectInspector;
+        loi = (StandardListObjectInspector) ObjectInspectorFactory
+            .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        return loi;
+      }
+    }
+
+    /** class for storing the current partial result aggregation */
+    static class BitmapAgg implements AggregationBuffer {
+      EWAHCompressedBitmap bitmap;
+    }
+
+    @Override
+    public void reset(AggregationBuffer agg) throws HiveException {
+
+        ((BitmapAgg) agg).bitmap = new EWAHCompressedBitmap();
+    }
+
+    @Override
+    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+      BitmapAgg result = new BitmapAgg();
+      reset(result);
+      return result;
+    }
+
+    @Override
+    public void iterate(AggregationBuffer agg, Object[] parameters)
+        throws HiveException {
+        assert (parameters.length == 1);
+        Object p = parameters[0];
+        if (p != null) {
+            BitmapAgg myagg = (BitmapAgg) agg;
+            try {
+                int row = PrimitiveObjectInspectorUtils.getInt(p, inputOI);
+                addBitmap(row, myagg);
+            } catch (NumberFormatException e) {
+                LOG.warn(getClass().getSimpleName() + " " +
+                        StringUtils.stringifyException(e));
+            }
+        }
+    }
+
+
+    @Override
+    public Object terminate(AggregationBuffer agg) throws HiveException {
+        BitmapAgg myagg = (BitmapAgg) agg;
+
+        BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput();
+        try {
+          myagg.bitmap.writeExternal(bitmapObjOut);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        return bitmapObjOut.list();
+    }
+
+    @Override
+    public void merge(AggregationBuffer agg, Object partial)
+        throws HiveException {
+      BitmapAgg myagg = (BitmapAgg) agg;
+      ArrayList<LongWritable> partialResult = (ArrayList<LongWritable>) internalMergeOI.getList(partial);
+      BitmapObjectInput bitmapObjIn = new BitmapObjectInput(partialResult);
+      EWAHCompressedBitmap partialBitmap = new EWAHCompressedBitmap();
+      try {
+        partialBitmap.readExternal(bitmapObjIn);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      myagg.bitmap = myagg.bitmap.or(partialBitmap);
+    }
+
+    @Override
+    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+      BitmapAgg myagg = (BitmapAgg) agg;
+      BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput();
+      try {
+        myagg.bitmap.writeExternal(bitmapObjOut);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      return bitmapObjOut.list();
+    }
+
+    private void addBitmap(int newRow, BitmapAgg myagg) {
+        if (!myagg.bitmap.set(newRow)) {
+          throw new RuntimeException("Can't set bits out of order with EWAHCompressedBitmap");
+        }
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java Mon Apr 25 02:50:14 2011
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import javaewah.EWAHCompressedBitmap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+/**
+ * GenericEWAHUDFBitmapAnd.
+ *
+ */
+@Description(name = "ewah_bitmap_and",
+  value = "_FUNC_(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise AND of two bitmaps.")
+public class GenericUDFEWAHBitmapAnd extends AbstractGenericUDFEWAHBitmapBop {
+
+  public GenericUDFEWAHBitmapAnd() {
+    super("EWAH_BITMAP_AND");
+  }
+
+  @Override
+  protected EWAHCompressedBitmap bitmapBop(
+      EWAHCompressedBitmap bitmap1, EWAHCompressedBitmap bitmap2) {
+    return bitmap1.and(bitmap2);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java Mon Apr 25 02:50:14 2011
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import javaewah.EWAHCompressedBitmap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.LongWritable;
+
+@Description(name = "ewah_bitmap_empty", value = "_FUNC_(bitmap) - "
+    + "Predicate that tests whether an EWAH-compressed bitmap is all zeros ")
+public class GenericUDFEWAHBitmapEmpty extends GenericUDF {
+  ObjectInspector bitmapOI;
+  BooleanObjectInspector boolOI;
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+  if (arguments.length != 1) {
+    throw new UDFArgumentLengthException(
+      "The function EWAH_BITMAP_EMPTY(b) takes exactly 1 argument");
+  }
+
+  if (arguments[0].getCategory().equals(Category.LIST)) {
+    bitmapOI = (ListObjectInspector) arguments[0];
+  } else {
+      throw new UDFArgumentTypeException(0, "\""
+        + Category.LIST.toString().toLowerCase()
+        + "\" is expected at function EWAH_BITMAP_EMPTY, but \""
+        + arguments[0].getTypeName() + "\" is found");
+  }
+
+  boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
+  return boolOI;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    assert (arguments.length == 2);
+    Object b = arguments[0].get();
+
+    ListObjectInspector lloi = (ListObjectInspector) bitmapOI;
+    int length = lloi.getListLength(b);
+    ArrayList<LongWritable> bitmapArray = new ArrayList<LongWritable>();
+    for (int i = 0; i < length; i++) {
+      long l = PrimitiveObjectInspectorUtils.getLong(
+          lloi.getListElement(b, i),
+          (PrimitiveObjectInspector) lloi.getListElementObjectInspector());
+      bitmapArray.add(new LongWritable(l));
+    }
+
+    BitmapObjectInput bitmapObjIn = new BitmapObjectInput(bitmapArray);
+    EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap();
+    try {
+      bitmap.readExternal(bitmapObjIn);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+     // Add return true only if bitmap is all zeros.
+     return new BooleanWritable(!bitmap.iterator().hasNext());
+  }
+
+
+  @Override
+  public String getDisplayString(String[] children) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("EWAH_BITMAP_EMPTY(");
+    for (int i = 0; i < children.length; i++) {
+      sb.append(children[i]);
+      if (i + 1 != children.length) {
+        sb.append(",");
+      }
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java Mon Apr 25 02:50:14 2011
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import javaewah.EWAHCompressedBitmap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+/**
+ * GenericUDFEWAHBitmapOr.
+ *
+ */
+@Description(name = "ewah_bitmap_or",
+  value = "_FUNC_(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise OR of two bitmaps.")
+public class GenericUDFEWAHBitmapOr extends AbstractGenericUDFEWAHBitmapBop {
+
+  public GenericUDFEWAHBitmapOr() {
+    super("EWAH_BITMAP_OR");
+  }
+
+  @Override
+  protected EWAHCompressedBitmap bitmapBop(
+      EWAHCompressedBitmap bitmap1, EWAHCompressedBitmap bitmap2) {
+    return bitmap1.or(bitmap2);
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,47 @@
+DROP INDEX srcpart_index_proj on srcpart;
+
+EXPLAIN
+CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_index_proj ON srcpart REBUILD;
+SELECT x.* FROM default__srcpart_srcpart_index_proj__ x WHERE x.ds = '2008-04-08' and x.hr = 11 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_test_index_result" SELECT `_bucketname`,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_srcpart_index_proj__
+x WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND x.key=100 AND x.ds = '2008-04-08' GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_test_index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart WHERE key=100 AND ds = '2008-04-08' ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_test_index_result" SELECT `_bucketname` ,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_srcpart_index_proj__
+x WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND x.key=100 AND x.ds = '2008-04-08' and x.hr = 11 GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_test_index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart WHERE key=100 AND ds = '2008-04-08' and hr = 11 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM srcpart WHERE key=100 AND ds = '2008-04-08' and hr = 11 ORDER BY key;
+
+DROP INDEX srcpart_index_proj on srcpart;
+
+EXPLAIN
+CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER  INDEX srcpart_index_proj ON srcpart REBUILD;
+SELECT x.* FROM default__srcpart_srcpart_index_proj__ x;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_result" SELECT `_bucketname` ,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_srcpart_index_proj__
+WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND key=100 GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart WHERE key=100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM srcpart WHERE key=100 ORDER BY key;
+
+DROP INDEX srcpart_index_proj on srcpart;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap1.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap1.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,18 @@
+EXPLAIN
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+SELECT x.* FROM default__src_src_index__ x ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_result" SELECT `_bucketname`,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__src_src_index__ WHERE NOT
+EWAH_BITMAP_EMPTY(`_bitmaps`) AND key=100 GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM src WHERE key=100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM src WHERE key=100 ORDER BY key;
+
+DROP INDEX src_index ON src;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap2.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap2.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,35 @@
+EXPLAIN
+CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+EXPLAIN
+CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD;
+
+CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src1_index ON src REBUILD;
+ALTER INDEX src2_index ON src REBUILD;
+SELECT * FROM default__src_src1_index__ ORDER BY key;
+SELECT * FROM default__src_src2_index__ ORDER BY value;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+INSERT OVERWRITE DIRECTORY "/tmp/index_result" 
+SELECT t.bucketname as `_bucketname`, COLLECT_SET(t.offset) AS `_offsets` FROM
+  (SELECT `_bucketname` AS bucketname, `_offset` AS offset
+      FROM default__src_src1_index__ 
+      WHERE key = 0 AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`) UNION ALL
+   SELECT `_bucketname` AS bucketname, `_offset` AS offset
+      FROM default__src_src2_index__
+      WHERE value = "val2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t
+GROUP BY t.bucketname;
+
+SET hive.index.blockfilter.file=/tmp/index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+
+SELECT key, value FROM src WHERE key=0 OR value = "val_2" ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM src WHERE key=0 OR value = "val_2" ORDER BY key;
+
+DROP INDEX src1_index ON src;
+DROP INDEX src2_index ON src;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,47 @@
+EXPLAIN
+CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+EXPLAIN
+CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD;
+
+CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src1_index ON src REBUILD;
+ALTER INDEX src2_index ON src REBUILD;
+SELECT * FROM default__src_src1_index__ ORDER BY key;
+SELECT * FROM default__src_src2_index__ ORDER BY value;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+        WHERE key = 0) a
+  JOIN 
+     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+        WHERE value = "val_0") b
+  ON
+    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+
+INSERT OVERWRITE DIRECTORY "/tmp/index_result" 
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+        WHERE key = 0) a
+  JOIN 
+     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+        WHERE value = "val_0") b
+  ON
+    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+
+SET hive.index.blockfilter.file=/tmp/index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+
+SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
+
+DROP INDEX src1_index ON src;
+DROP INDEX src2_index ON src;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_rc.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_rc.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_rc.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_rc.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,53 @@
+CREATE TABLE srcpart_rc (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE;
+
+INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
+INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12;
+INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11;
+INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12;
+
+EXPLAIN
+CREATE INDEX srcpart_rc_index ON TABLE srcpart_rc(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_rc_index ON TABLE srcpart_rc(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_rc_index ON srcpart_rc REBUILD;
+SELECT x.* FROM default__srcpart_rc_srcpart_rc_index__ x WHERE x.ds = '2008-04-08' and x.hr = 11 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_test_index_result" SELECT `_bucketname`,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_rc_srcpart_rc_index__
+x WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND x.key=100 AND x.ds = '2008-04-08' GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_test_index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart_rc WHERE key=100 AND ds = '2008-04-08' ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_test_index_result" SELECT `_bucketname` ,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_rc_srcpart_rc_index__
+x WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND x.key=100 AND x.ds = '2008-04-08' and x.hr = 11 GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_test_index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart_rc WHERE key=100 AND ds = '2008-04-08' and hr = 11 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM srcpart_rc WHERE key=100 AND ds = '2008-04-08' and hr = 11 ORDER BY key;
+
+DROP INDEX srcpart_rc_index on srcpart_rc;
+
+EXPLAIN
+CREATE INDEX srcpart_rc_index ON TABLE srcpart_rc(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_rc_index ON TABLE srcpart_rc(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER  INDEX srcpart_rc_index ON srcpart_rc REBUILD;
+SELECT x.* FROM default__srcpart_rc_srcpart_rc_index__ x WHERE x.key = 100;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+INSERT OVERWRITE DIRECTORY "/tmp/index_result" SELECT `_bucketname` ,
+COLLECT_SET(`_offset`) as `_offsets` FROM default__srcpart_rc_srcpart_rc_index__
+WHERE NOT EWAH_BITMAP_EMPTY(`_bitmaps`) AND key=100 GROUP BY `_bucketname`;
+SET hive.index.blockfilter.file=/tmp/index_result;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.HiveIndexedInputFormat;
+SELECT key, value FROM srcpart_rc WHERE key=100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT key, value FROM srcpart_rc WHERE key=100 ORDER BY key;
+
+DROP INDEX srcpart_rc_index on srcpart_rc;
+DROP TABLE srcpart_rc;

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_and.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_and.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_and.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_and.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,3 @@
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
+
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_empty.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_empty.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_empty.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,3 @@
+select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src limit 1;
+
+select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src limit 1;

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_or.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_or.q?rev=1096397&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_or.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_bitmap_or.q Mon Apr 25 02:50:14 2011
@@ -0,0 +1,3 @@
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
+
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;