You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sentry.apache.org by ak...@apache.org on 2017/08/24 00:11:15 UTC
sentry git commit: SENTRY-1892: Reduce memory consumption of
HMSPath$Entry and TPathEntry (Misha Dmitriev,
reviewed by Alex Kolbasov and Vadim Spector)
Repository: sentry
Updated Branches:
refs/heads/master a41a1447b -> 201808de0
SENTRY-1892: Reduce memory consumption of HMSPath$Entry and TPathEntry (Misha Dmitriev, reviewed by Alex Kolbasov and Vadim Spector)
Project: http://git-wip-us.apache.org/repos/asf/sentry/repo
Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/201808de
Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/201808de
Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/201808de
Branch: refs/heads/master
Commit: 201808de059b9cda38a34c3aefb4253b402b891d
Parents: a41a144
Author: Alexander Kolbasov <ak...@gmail.com>
Authored: Wed Aug 23 17:02:58 2017 -0700
Committer: Alexander Kolbasov <ak...@gmail.com>
Committed: Wed Aug 23 17:02:58 2017 -0700
----------------------------------------------------------------------
.../sentry/hdfs/service/thrift/TPathEntry.java | 76 ++++++-------
.../java/org/apache/sentry/hdfs/HMSPaths.java | 106 ++++++++++++++-----
.../org/apache/sentry/hdfs/HMSPathsDumper.java | 28 +++--
.../main/resources/sentry_hdfs_service.thrift | 4 +-
.../org/apache/sentry/hdfs/TestHMSPaths.java | 13 +++
5 files changed, 157 insertions(+), 70 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
index 2a89368..210d219 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
@@ -34,14 +34,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-04-26")
+@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-08-21")
public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TPathEntry> {
private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPathEntry");
private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.BYTE, (short)1);
private static final org.apache.thrift.protocol.TField PATH_ELEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("pathElement", org.apache.thrift.protocol.TType.STRING, (short)2);
- private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children", org.apache.thrift.protocol.TType.SET, (short)4);
- private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs", org.apache.thrift.protocol.TType.SET, (short)5);
+ private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children", org.apache.thrift.protocol.TType.LIST, (short)4);
+ private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs", org.apache.thrift.protocol.TType.LIST, (short)5);
private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
static {
@@ -51,8 +51,8 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
private byte type; // required
private String pathElement; // required
- private Set<Integer> children; // required
- private Set<String> authzObjs; // optional
+ private List<Integer> children; // required
+ private List<String> authzObjs; // optional
/** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
public enum _Fields implements org.apache.thrift.TFieldIdEnum {
@@ -133,10 +133,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
tmpMap.put(_Fields.PATH_ELEMENT, new org.apache.thrift.meta_data.FieldMetaData("pathElement", org.apache.thrift.TFieldRequirementType.REQUIRED,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
tmpMap.put(_Fields.CHILDREN, new org.apache.thrift.meta_data.FieldMetaData("children", org.apache.thrift.TFieldRequirementType.REQUIRED,
- new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET,
+ new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))));
tmpMap.put(_Fields.AUTHZ_OBJS, new org.apache.thrift.meta_data.FieldMetaData("authzObjs", org.apache.thrift.TFieldRequirementType.OPTIONAL,
- new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET,
+ new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST,
new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
metaDataMap = Collections.unmodifiableMap(tmpMap);
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPathEntry.class, metaDataMap);
@@ -148,7 +148,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
public TPathEntry(
byte type,
String pathElement,
- Set<Integer> children)
+ List<Integer> children)
{
this();
this.type = type;
@@ -167,11 +167,11 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
this.pathElement = other.pathElement;
}
if (other.isSetChildren()) {
- Set<Integer> __this__children = new HashSet<Integer>(other.children);
+ List<Integer> __this__children = new ArrayList<Integer>(other.children);
this.children = __this__children;
}
if (other.isSetAuthzObjs()) {
- Set<String> __this__authzObjs = new HashSet<String>(other.authzObjs);
+ List<String> __this__authzObjs = new ArrayList<String>(other.authzObjs);
this.authzObjs = __this__authzObjs;
}
}
@@ -244,16 +244,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
public void addToChildren(int elem) {
if (this.children == null) {
- this.children = new HashSet<Integer>();
+ this.children = new ArrayList<Integer>();
}
this.children.add(elem);
}
- public Set<Integer> getChildren() {
+ public List<Integer> getChildren() {
return this.children;
}
- public void setChildren(Set<Integer> children) {
+ public void setChildren(List<Integer> children) {
this.children = children;
}
@@ -282,16 +282,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
public void addToAuthzObjs(String elem) {
if (this.authzObjs == null) {
- this.authzObjs = new HashSet<String>();
+ this.authzObjs = new ArrayList<String>();
}
this.authzObjs.add(elem);
}
- public Set<String> getAuthzObjs() {
+ public List<String> getAuthzObjs() {
return this.authzObjs;
}
- public void setAuthzObjs(Set<String> authzObjs) {
+ public void setAuthzObjs(List<String> authzObjs) {
this.authzObjs = authzObjs;
}
@@ -332,7 +332,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
if (value == null) {
unsetChildren();
} else {
- setChildren((Set<Integer>)value);
+ setChildren((List<Integer>)value);
}
break;
@@ -340,7 +340,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
if (value == null) {
unsetAuthzObjs();
} else {
- setAuthzObjs((Set<String>)value);
+ setAuthzObjs((List<String>)value);
}
break;
@@ -634,17 +634,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
}
break;
case 4: // CHILDREN
- if (schemeField.type == org.apache.thrift.protocol.TType.SET) {
+ if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
{
- org.apache.thrift.protocol.TSet _set32 = iprot.readSetBegin();
- struct.children = new HashSet<Integer>(2*_set32.size);
+ org.apache.thrift.protocol.TList _list32 = iprot.readListBegin();
+ struct.children = new ArrayList<Integer>(_list32.size);
int _elem33;
- for (int _i34 = 0; _i34 < _set32.size; ++_i34)
+ for (int _i34 = 0; _i34 < _list32.size; ++_i34)
{
_elem33 = iprot.readI32();
struct.children.add(_elem33);
}
- iprot.readSetEnd();
+ iprot.readListEnd();
}
struct.setChildrenIsSet(true);
} else {
@@ -652,17 +652,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
}
break;
case 5: // AUTHZ_OBJS
- if (schemeField.type == org.apache.thrift.protocol.TType.SET) {
+ if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
{
- org.apache.thrift.protocol.TSet _set35 = iprot.readSetBegin();
- struct.authzObjs = new HashSet<String>(2*_set35.size);
+ org.apache.thrift.protocol.TList _list35 = iprot.readListBegin();
+ struct.authzObjs = new ArrayList<String>(_list35.size);
String _elem36;
- for (int _i37 = 0; _i37 < _set35.size; ++_i37)
+ for (int _i37 = 0; _i37 < _list35.size; ++_i37)
{
_elem36 = iprot.readString();
struct.authzObjs.add(_elem36);
}
- iprot.readSetEnd();
+ iprot.readListEnd();
}
struct.setAuthzObjsIsSet(true);
} else {
@@ -693,12 +693,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
if (struct.children != null) {
oprot.writeFieldBegin(CHILDREN_FIELD_DESC);
{
- oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32, struct.children.size()));
+ oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.children.size()));
for (int _iter38 : struct.children)
{
oprot.writeI32(_iter38);
}
- oprot.writeSetEnd();
+ oprot.writeListEnd();
}
oprot.writeFieldEnd();
}
@@ -706,12 +706,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
if (struct.isSetAuthzObjs()) {
oprot.writeFieldBegin(AUTHZ_OBJS_FIELD_DESC);
{
- oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING, struct.authzObjs.size()));
+ oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.authzObjs.size()));
for (String _iter39 : struct.authzObjs)
{
oprot.writeString(_iter39);
}
- oprot.writeSetEnd();
+ oprot.writeListEnd();
}
oprot.writeFieldEnd();
}
@@ -766,10 +766,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
struct.pathElement = iprot.readString();
struct.setPathElementIsSet(true);
{
- org.apache.thrift.protocol.TSet _set42 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32, iprot.readI32());
- struct.children = new HashSet<Integer>(2*_set42.size);
+ org.apache.thrift.protocol.TList _list42 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32());
+ struct.children = new ArrayList<Integer>(_list42.size);
int _elem43;
- for (int _i44 = 0; _i44 < _set42.size; ++_i44)
+ for (int _i44 = 0; _i44 < _list42.size; ++_i44)
{
_elem43 = iprot.readI32();
struct.children.add(_elem43);
@@ -779,10 +779,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr
BitSet incoming = iprot.readBitSet(1);
if (incoming.get(0)) {
{
- org.apache.thrift.protocol.TSet _set45 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
- struct.authzObjs = new HashSet<String>(2*_set45.size);
+ org.apache.thrift.protocol.TList _list45 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+ struct.authzObjs = new ArrayList<String>(_list45.size);
String _elem46;
- for (int _i47 = 0; _i47 < _set45.size; ++_i47)
+ for (int _i47 = 0; _i47 < _list45.size; ++_i47)
{
_elem46 = iprot.readString();
struct.authzObjs.add(_elem46);
http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
index 6e71561..5a98263 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
@@ -132,10 +132,10 @@ public class HMSPaths implements AuthzPaths {
private EntryType type;
private String pathElement;
- // A set of authorizable objects associated with this entry. Authorizable
- // object should be case insensitive. The set is allocated lazily to avoid
- // wasting memory due to empty sets.
- private Set<String> authzObjs;
+ // A set (or single object when set size is 1) of authorizable objects associated
+ // with this entry. Authorizable object should be case insensitive. The set is
+ // allocated lazily to avoid wasting memory due to empty sets.
+ private Object authzObjs;
// Path of child element to the path entry mapping, e.g. 'b' -> '/a/b'
// This is allocated lazily to avoid wasting memory due to empty maps.
@@ -161,9 +161,9 @@ public class HMSPaths implements AuthzPaths {
* @param parent the parent node. If not specified, this entry is a root entry.
* @param pathElement the path element of this entry on the tree.
* @param type entry type.
- * @param authzObjs a set of authz objects.
+ * @param authzObjs a collection of authz objects.
*/
- Entry(Entry parent, String pathElement, EntryType type, Set<String> authzObjs) {
+ Entry(Entry parent, String pathElement, EntryType type, Collection<String> authzObjs) {
this.parent = parent;
this.type = type;
this.pathElement = pathElement.intern();
@@ -204,26 +204,42 @@ public class HMSPaths implements AuthzPaths {
void removeAuthzObj(String authzObj) {
if (authzObjs != null) {
- authzObjs.remove(authzObj);
+ if (authzObjs instanceof Set) {
+ Set<String> authzObjsSet = (Set<String>) authzObjs;
+ authzObjsSet.remove(authzObj);
+ if (authzObjsSet.size() == 1) {
+ authzObjs = authzObjsSet.iterator().next();
+ }
+ } else if (authzObjs.equals(authzObj)){
+ authzObjs = null;
+ }
}
}
void addAuthzObj(String authzObj) {
if (authzObj != null) {
if (authzObjs == null) {
- authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+ authzObjs = authzObj;
+ } else {
+ Set<String> authzObjsSet;
+ if (authzObjs instanceof String) {
+ if (authzObjs.equals(authzObj)) {
+ return;
+ } else {
+ authzObjs = authzObjsSet = newTreeSetWithElement((String) authzObjs);
+ }
+ } else {
+ authzObjsSet = (Set) authzObjs;
+ }
+ authzObjsSet.add(authzObj.intern());
}
- authzObjs.add(authzObj.intern());
}
}
- void addAuthzObjs(Set<String> authzObjs) {
+ void addAuthzObjs(Collection<String> authzObjs) {
if (authzObjs != null) {
- if (this.authzObjs == null) {
- this.authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
- }
for (String authzObj : authzObjs) {
- this.authzObjs.add(authzObj.intern());
+ addAuthzObj(authzObj.intern());
}
}
}
@@ -238,7 +254,17 @@ public class HMSPaths implements AuthzPaths {
public String toString() {
return String.format("Entry[fullPath: %s, type: %s, authObject: %s]",
- getFullPath(), type, authzObjs != null ? Joiner.on(",").join(authzObjs) : "");
+ getFullPath(), type, authzObjsToString());
+ }
+
+ private String authzObjsToString() {
+ if (authzObjs == null) {
+ return "";
+ } else if (authzObjs instanceof String) {
+ return (String) authzObjs;
+ } else {
+ return Joiner.on(",").join((Set) authzObjs);
+ }
}
@Override
@@ -420,9 +446,9 @@ public class HMSPaths implements AuthzPaths {
// entry no longer maps to any authzObj, removes the
// entry recursively.
if (authzObjs != null) {
- authzObjs.remove(authzObj);
+ removeAuthzObj(authzObj);
}
- if (authzObjs == null || authzObjs.isEmpty()) {
+ if (authzObjs == null) {
deleteFromParent();
}
} else {
@@ -433,7 +459,7 @@ public class HMSPaths implements AuthzPaths {
if (getType() == EntryType.AUTHZ_OBJECT) {
setType(EntryType.DIR);
if (authzObjs != null) {
- authzObjs.remove(authzObj);
+ removeAuthzObj(authzObj);
}
}
}
@@ -497,10 +523,42 @@ public class HMSPaths implements AuthzPaths {
/**
* @return the set of auth objects. The returned set should be used only
- * for querying, not for any modifications.
+ * for querying, not for any modifications. If you just want to find out
+ * the set size or whether it's empty, use the specialized getAuthzObjsSize()
+ * and isAuthzObjsEmpty() methods that performs better.
*/
- public Set<String> getAuthzObjs() {
- return authzObjs != null ? authzObjs : Collections.<String>emptySet();
+ Set<String> getAuthzObjs() {
+ if (authzObjs != null) {
+ if (authzObjs instanceof Set) {
+ return (Set<String>) authzObjs;
+ } else {
+ return newTreeSetWithElement((String) authzObjs);
+ }
+ } else {
+ return Collections.<String>emptySet();
+ }
+ }
+
+ int getAuthzObjsSize() {
+ if (authzObjs != null) {
+ if (authzObjs instanceof Set) {
+ return ((Set<String>) authzObjs).size();
+ } else {
+ return 1;
+ }
+ } else {
+ return 0;
+ }
+ }
+
+ boolean isAuthzObjsEmpty() {
+ return authzObjs == null;
+ }
+
+ private Set<String> newTreeSetWithElement(String el) {
+ Set<String> result = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+ result.add(el);
+ return result;
}
public Entry findPrefixEntry(List<String> pathElements) {
@@ -538,17 +596,17 @@ public class HMSPaths implements AuthzPaths {
boolean isPartialMatchOk, Entry lastAuthObj) {
Entry found = null;
if (index == pathElements.length) {
- if (isPartialMatchOk && getAuthzObjs().size() != 0) {
+ if (isPartialMatchOk && !isAuthzObjsEmpty()) {
found = this;
}
} else {
Entry child = getChild(pathElements[index]);
if (child != null) {
if (index == pathElements.length - 1) {
- found = (child.getAuthzObjs().size() != 0) ? child : lastAuthObj;
+ found = (!child.isAuthzObjsEmpty()) ? child : lastAuthObj;
} else {
found = child.find(pathElements, index + 1, isPartialMatchOk,
- (child.getAuthzObjs().size() != 0) ? child : lastAuthObj);
+ (!child.isAuthzObjsEmpty()) ? child : lastAuthObj);
}
} else {
if (isPartialMatchOk) {
http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
index bf6c3de..1267093 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
@@ -17,6 +17,7 @@
*/
package org.apache.sentry.hdfs;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -30,11 +31,15 @@ import org.apache.sentry.hdfs.HMSPaths.Entry;
import org.apache.sentry.hdfs.HMSPaths.EntryType;
import org.apache.sentry.hdfs.service.thrift.TPathEntry;
import org.apache.sentry.hdfs.service.thrift.TPathsDump;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
private final HMSPaths hmsPaths;
+ private static final Logger LOG = LoggerFactory.getLogger(HMSPathsDumper.class);
+
static class Tuple {
private final TPathEntry entry;
private final int id;
@@ -65,9 +70,16 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
hmsPaths.getRootEntry(), tRootTuple.entry, counter, idMap, dups);
TPathsDump dump = new TPathsDump(tRootTuple.id, idMap);
+ String stringDupMsg = "";
if (minimizeSize) {
- dump.setDupStringValues(Arrays.asList(dups.getDupStringValues()));
+ String[] dupStringValues = dups.getDupStringValues();
+ dump.setDupStringValues(Arrays.asList(dupStringValues));
+ stringDupMsg = String.format(" %d total path strings, %d duplicate strings found, " +
+ "compacted to %d unique strings.", dups.nTotalStrings, dups.nDupStrings,
+ dupStringValues.length);
}
+ LOG.info("Paths Dump created." + stringDupMsg);
+
return dump;
}
@@ -83,14 +95,14 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
private Tuple createTPathEntry(Entry entry, AtomicInteger idCounter,
Map<Integer, TPathEntry> idMap, DupDetector dups) {
int myId = idCounter.incrementAndGet();
- Set<Integer> children = entry.hasChildren() ?
- new HashSet<Integer>(entry.numChildren()) : Collections.<Integer>emptySet();
+ List<Integer> children = entry.hasChildren() ?
+ new ArrayList<Integer>(entry.numChildren()) : Collections.<Integer>emptyList();
String pathElement = entry.getPathElement();
String sameOrReplacementId =
dups != null ? dups.getReplacementString(pathElement) : pathElement;
TPathEntry tEntry = new TPathEntry(entry.getType().getByte(), sameOrReplacementId, children);
- if (!entry.getAuthzObjs().isEmpty()) {
- tEntry.setAuthzObjs(entry.getAuthzObjs());
+ if (!entry.isAuthzObjsEmpty()) {
+ tEntry.setAuthzObjs(new ArrayList<>(entry.getAuthzObjs()));
}
idMap.put(myId, tEntry);
return new Tuple(tEntry, myId);
@@ -142,7 +154,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
child = new Entry(parent, tChildPathElement,
EntryType.fromByte(tChild.getType()), tChild.getAuthzObjs());
}
- if (child.getAuthzObjs().size() != 0) {
+ if (!child.isAuthzObjsEmpty()) {
for (String authzObj: child.getAuthzObjs()) {
Set<Entry> paths = authzObjToPath.get(authzObj);
if (paths == null) {
@@ -200,6 +212,8 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
// Size of the auxiliary string array - essentially the number of duplicate
// strings that we detected and encoded.
private int auxArraySize;
+ // For statistics/debugging
+ int nTotalStrings, nDupStrings;
/**
* Finds duplicate strings in the tree of Entry objects with the given root,
@@ -215,6 +229,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
if (keys[i] != null) {
if (values[i] >= MIN_NUM_DUPLICATES) {
values[i] = auxArraySize++;
+ nDupStrings += values[i];
} else { // No duplication for this string
keys[i] = null;
values[i] = -1; // Just to mark invalid slots
@@ -258,6 +273,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
}
private void inspectEntry(Entry entry) {
+ nTotalStrings++;
String pathElement = entry.getPathElement();
if (pathElement.length() > AVG_ID_LENGTH) {
// In the serialized data, it doesn't make sense to replace string origS
http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
index 22dea02..465b421 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
@@ -49,10 +49,10 @@ struct TPathEntry {
2: required string pathElement;
# The child tuple id of the Path Entry.
-4: required set<i32> children;
+4: required list<i32> children;
# A set of authzObjs associated with the Path Entry.
-5: optional set<string> authzObjs;
+5: optional list<string> authzObjs;
}
struct TPathsDump {
http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
index a0d7bdc..20ed97c 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
@@ -63,6 +63,9 @@ public class TestHMSPaths {
root.toString();
Assert.assertNull(root.getParent());
Assert.assertEquals(HMSPaths.EntryType.DIR, root.getType());
+ Assert.assertTrue(root.getAuthzObjsSize() == 0);
+ Assert.assertTrue(root.isAuthzObjsEmpty());
+ // getAuthzObjs().size() is not recommended, but let's check that it works correctly
Assert.assertTrue(root.getAuthzObjs().size() == 0);
Assert.assertEquals(Path.SEPARATOR, root.getFullPath());
Assert.assertFalse(root.hasChildren());
@@ -127,6 +130,9 @@ public class TestHMSPaths {
Assert.assertEquals(root, entry.getParent());
Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType());
Assert.assertEquals("a", entry.getPathElement());
+ Assert.assertEquals(0, entry.getAuthzObjsSize());
+ Assert.assertTrue(entry.isAuthzObjsEmpty());
+ // getAuthzObjs().size() is not recommended, but let's check that it works correctly
Assert.assertEquals(0, entry.getAuthzObjs().size());
Assert.assertEquals(Path.SEPARATOR + "a", entry.getFullPath());
Assert.assertFalse(entry.hasChildren());
@@ -171,6 +177,10 @@ public class TestHMSPaths {
entry.getParent().getType());
Assert.assertEquals("b", entry.getPathElement());
Assert.assertEquals("a", entry.getParent().getPathElement());
+ Assert.assertTrue(entry.getAuthzObjsSize() == 0);
+ Assert.assertTrue(entry.isAuthzObjsEmpty());
+ Assert.assertTrue(entry.getParent().getAuthzObjsSize() == 0);
+ // getAuthzObjs().size() is not recommended, but let's check that it works correctly
Assert.assertTrue(entry.getAuthzObjs().size() == 0);
Assert.assertTrue(entry.getParent().getAuthzObjs().size() == 0);
Assert.assertEquals(Path.SEPARATOR + "a" + Path.SEPARATOR + "b",
@@ -296,6 +306,9 @@ public class TestHMSPaths {
Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"},
true));
Assert.assertEquals(HMSPaths.EntryType.DIR, entry.getType());
+ Assert.assertEquals(entry.getAuthzObjsSize(), 0);
+ Assert.assertTrue(entry.isAuthzObjsEmpty());
+ // getAuthzObjs().size() is not recommended, but let's check that it works correctly
Assert.assertEquals(entry.getAuthzObjs().size(), 0);
Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"}, false));