You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by bi...@apache.org on 2012/11/08 07:22:31 UTC

svn commit: r1406941 - in /pig/trunk: CHANGES.txt src/org/apache/pig/ResourceStatistics.java test/org/apache/pig/test/PigStorageWithStatistics.java

Author: billgraham
Date: Thu Nov  8 06:22:31 2012
New Revision: 1406941

URL: http://svn.apache.org/viewvc?rev=1406941&view=rev
Log:
PIG-2582: Store size in bytes (not mbytes) in ResourceStatistics (prkommireddi via billgraham)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/ResourceStatistics.java
    pig/trunk/test/org/apache/pig/test/PigStorageWithStatistics.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1406941&r1=1406940&r2=1406941&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Nov  8 06:22:31 2012
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2582: Store size in bytes (not mbytes) in ResourceStatistics (prkommireddi via billgraham)
+
 PIG-3006: Modernize a chunk of the tests (jcoveney via cheolsoo)
 
 PIG-2997: Provide a convenience constructor on PigServer that accepts Configuration (prkommireddi via rohini)

Modified: pig/trunk/src/org/apache/pig/ResourceStatistics.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/ResourceStatistics.java?rev=1406941&r1=1406940&r2=1406941&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/ResourceStatistics.java (original)
+++ pig/trunk/src/org/apache/pig/ResourceStatistics.java Thu Nov  8 06:22:31 2012
@@ -44,21 +44,22 @@ public class ResourceStatistics implemen
     // setters disallow setting them to null.
     
     private static final long serialVersionUID = 1L;
-    public Long mBytes; // size in megabytes
-    public Long numRecords;  // number of records
-    public Long avgRecordSize;
-    public ResourceFieldStatistics[] fields = new ResourceFieldStatistics[0];
+    private Long numRecords; // number of records
+    private Long avgRecordSize; // average record size in bytes
+    private ResourceFieldStatistics[] fields = new ResourceFieldStatistics[0];
+    private Long bytes;
 
     /**
      * Statistics for a given field in the data.
      */
     public static class ResourceFieldStatistics implements Serializable {
 
-        public static final long serialVersionUID = 1L;
+        private static final long serialVersionUID = 1L;
 
-        public int version;
+        private int version;
 
-        public Long numDistinctValues;  // number of distinct values represented in this field
+        private Long numDistinctValues; // number of distinct values represented
+                                        // in this field
 
         /**
          * We need some way to represent a histogram of values in the field,
@@ -71,14 +72,14 @@ public class ResourceStatistics implemen
          * an ordered array of the most common values, 
          * in descending order of frequency
          */
-        public Object[] mostCommonValues = new Object[0];
+        private Object[] mostCommonValues = new Object[0];
         
         /**
          * an array that matches the mostCommonValues array, and lists
          * the frequencies of those values as a fraction (0 through 1) of
          * the total number of records
          */
-        public float[] mostCommonValuesFreq = new float[0];
+        private float[] mostCommonValuesFreq = new float[0];
         
         /**
          * an ordered array of values, from min val to max val
@@ -88,7 +89,7 @@ public class ResourceStatistics implemen
          * NOTE: if mostCommonValues is non-empty, the values in that array
          * should not be included in the histogram. Adjust accordingly.
          */
-        public Object[] valueHistogram = new Object[0];
+        private Object[] valueHistogram = new Object[0];
 
         
         public int getVersion() {
@@ -191,47 +192,66 @@ public class ResourceStatistics implemen
             return sb.toString();
         }
     }
-
     
     public Long getmBytes() {
-        return mBytes;
+        return this.bytes / 1024 / 1024;
     }
+
+    /**
+     * 
+     * @param mBytes
+     * @deprecated Use {@link ResourceStatistics#setSizeInBytes(Long)} instead
+     */
+    @Deprecated
     public ResourceStatistics setmBytes(Long mBytes) {
-        this.mBytes = mBytes;
+        this.bytes = mBytes * 1024 * 1024;
         return this;
     }
+    
+    /**
+     * Sets the size in bytes
+     * 
+     * @param bytes
+     */
+    public void setSizeInBytes(Long bytes) {
+        this.bytes = bytes;
+    }
 
     /**
-     * @return getmBytes as bytes.
+     * @return size in bytes.
      */
     public Long getSizeInBytes() {
-        // Ideally size would be stored in bytes, and getmBytes would convert
-        // that number. However, mBytes is public so we cannot remove it, or
-        // guarantee it stays in sync with size in bytes.
-        return getmBytes() == null ? null : getmBytes() * 1024 * 1024;
+        return this.bytes;
     }
 
     public Long getNumRecords() {
         return numRecords;
     }
+    
     public ResourceStatistics setNumRecords(Long numRecords) {
         this.numRecords = numRecords;
         return this;
     }
-    
-    /* 
-     * returns average record size. This number can be explicitly specified by statistics, or
-     * if absent, computed using totalbytes/totalrecords. Will return null if can't be computed.
+
+    /*
+     * returns average record size in bytes. This number can be explicitly
+     * specified by statistics, or if absent, computed using
+     * totalbytes/totalrecords. Will return null if can't be computed.
      */
     public Long getAvgRecordSize() {
-        if (avgRecordSize == null && (mBytes != null && numRecords != null))
-            return mBytes / numRecords;
+        if (avgRecordSize == null && (bytes != null && numRecords != null))
+            return bytes / numRecords;
         else 
             return avgRecordSize;
     }
     
-    public void setAvgRecordSize(Long size) {
-        avgRecordSize = size;
+    /**
+     * Set average record size in bytes
+     * 
+     * @param sizeInBytes
+     */
+    public void setAvgRecordSize(Long sizeInBytes) {
+        avgRecordSize = sizeInBytes;
     }
     
     public ResourceFieldStatistics[] getFields() {
@@ -256,8 +276,8 @@ public class ResourceStatistics implemen
             return false;        
         ResourceStatistics other = (ResourceStatistics) anOther;
         return (Arrays.equals(fields, other.fields) &&
-                ((mBytes==null) 
-                        ? (other.mBytes==null) : mBytes.equals(other.mBytes)) &&
+                ((bytes == null) ? (other.bytes == null) : bytes
+                        .equals(other.bytes)) &&
                 ((numRecords == null) 
                         ? (other.numRecords==null) : numRecords.equals(other.numRecords)) 
         );
@@ -267,7 +287,7 @@ public class ResourceStatistics implemen
     public int hashCode() {
         int hash = 1;
         hash = 31*hash + Arrays.hashCode(fields);
-        hash = 31*hash + (mBytes == null ? 0 : mBytes.hashCode());
+        hash = 31 * hash + (bytes == null ? 0 : bytes.hashCode());
         hash = 31*hash + (numRecords == null ? 0 : numRecords.hashCode());
         return hash;
     }
@@ -277,7 +297,7 @@ public class ResourceStatistics implemen
     public String toString() {
         StringBuilder sb = new StringBuilder("Field Stats: \n");
         for (ResourceFieldStatistics f : fields) sb.append(f.toString());
-        sb.append("mBytes: "+mBytes);
+        sb.append("bytes: " + bytes);
         sb.append("numRecords: "+numRecords);
         return sb.toString();
     }

Modified: pig/trunk/test/org/apache/pig/test/PigStorageWithStatistics.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/PigStorageWithStatistics.java?rev=1406941&r1=1406940&r2=1406941&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/PigStorageWithStatistics.java (original)
+++ pig/trunk/test/org/apache/pig/test/PigStorageWithStatistics.java Thu Nov  8 06:22:31 2012
@@ -1,5 +1,7 @@
 package org.apache.pig.test;
 
+import java.io.IOException;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -7,11 +9,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.ResourceStatistics;
 import org.apache.pig.builtin.PigStorage;
-import org.apache.pig.impl.util.UriUtil;
 import org.apache.pig.impl.util.Utils;
 
-import java.io.IOException;
-
 public class PigStorageWithStatistics extends PigStorage {
     private String loc = null;
 
@@ -25,11 +24,11 @@ public class PigStorageWithStatistics ex
     @Override
     public ResourceStatistics getStatistics(String location, Job job) throws IOException {
         ResourceStatistics stats = new ResourceStatistics();
-        stats.setmBytes(getInputmBytes());
+        stats.setSizeInBytes(getInputSizeInBytes());
         return stats;
     }
-
-    private Long getInputmBytes() throws IOException {
+    
+    private Long getInputSizeInBytes() throws IOException {
         if (loc == null) {
             return 0L;
         }
@@ -45,6 +44,6 @@ public class PigStorageWithStatistics ex
                 }
             }
         }
-        return inputBytes / 1024 / 1024;
+        return inputBytes;
     }
 }