You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2018/05/19 19:10:23 UTC

[arrow] branch master updated: ARROW-2597: [Plasma] remove UniqueIDHasher

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f319bca  ARROW-2597: [Plasma] remove UniqueIDHasher
f319bca is described below

commit f319bcad7b3e5ed45b92f2489a6bb2e31ef44005
Author: Zhijun Fu <pi...@antfin.com>
AuthorDate: Sat May 19 12:10:56 2018 -0700

    ARROW-2597: [Plasma] remove UniqueIDHasher
    
    Replace UniqueIDHasher with std::hash so that STL containers with ObjectID doesn't need to specify the compare function. This has already been done for Ray, this change applies it to Plasma.
    
    Author: Zhijun Fu <pi...@antfin.com>
    Author: Zhijun Fu <zh...@outlook.com>
    
    Closes #2059 from zhijunfu/remove-UniqueIDHasher and squashes the following commits:
    
    2498635a <Zhijun Fu> resolve review comments: remove const version of hash()
    d5b51690 <Zhijun Fu>  remove UniqueIDHasher
---
 cpp/src/plasma/client.cc                   |  5 ++---
 cpp/src/plasma/common.cc                   |  6 ++++++
 cpp/src/plasma/common.h                    | 17 ++++++++---------
 cpp/src/plasma/eviction_policy.h           |  2 +-
 cpp/src/plasma/plasma.h                    |  4 ++--
 cpp/src/plasma/protocol.cc                 |  9 ++++-----
 cpp/src/plasma/protocol.h                  |  9 ++++-----
 cpp/src/plasma/store.cc                    |  5 ++---
 cpp/src/plasma/store.h                     |  5 ++---
 cpp/src/plasma/test/serialization_tests.cc |  2 +-
 10 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index bfb291c..43e27e0 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -93,7 +93,7 @@ struct GpuProcessHandle {
 // This is necessary as IPC handles can only be mapped once per process.
 // Thus if multiple clients in the same process get the same gpu object,
 // they need to access the same mapped CudaBuffer.
-static std::unordered_map<ObjectID, GpuProcessHandle*, UniqueIDHasher> gpu_object_map;
+static std::unordered_map<ObjectID, GpuProcessHandle*> gpu_object_map;
 static std::mutex gpu_mutex;
 #endif
 
@@ -247,8 +247,7 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   std::unordered_map<int, ClientMmapTableEntry> mmap_table_;
   /// A hash table of the object IDs that are currently being used by this
   /// client.
-  std::unordered_map<ObjectID, std::unique_ptr<ObjectInUseEntry>, UniqueIDHasher>
-      objects_in_use_;
+  std::unordered_map<ObjectID, std::unique_ptr<ObjectInUseEntry>> objects_in_use_;
   /// Object IDs of the last few release calls. This is a deque and
   /// is used to delay releasing objects to see if they can be reused by
   /// subsequent tasks so we do not unneccessarily invalidate cpu caches.
diff --git a/cpp/src/plasma/common.cc b/cpp/src/plasma/common.cc
index be3fc74..7ac5413 100644
--- a/cpp/src/plasma/common.cc
+++ b/cpp/src/plasma/common.cc
@@ -68,6 +68,12 @@ std::string UniqueID::hex() const {
   return result;
 }
 
+size_t UniqueID::hash() const {
+  size_t result;
+  std::memcpy(&result, id_, sizeof(size_t));
+  return result;
+}
+
 bool UniqueID::operator==(const UniqueID& rhs) const {
   return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0;
 }
diff --git a/cpp/src/plasma/common.h b/cpp/src/plasma/common.h
index cc67ffe..7dbcf80 100644
--- a/cpp/src/plasma/common.h
+++ b/cpp/src/plasma/common.h
@@ -44,6 +44,7 @@ class ARROW_EXPORT UniqueID {
   uint8_t* mutable_data();
   std::string binary() const;
   std::string hex() const;
+  size_t hash() const;
 
  private:
   uint8_t id_[kUniqueIDSize];
@@ -51,15 +52,6 @@ class ARROW_EXPORT UniqueID {
 
 static_assert(std::is_pod<UniqueID>::value, "UniqueID must be plain old data");
 
-struct UniqueIDHasher {
-  // ObjectID hashing function.
-  size_t operator()(const UniqueID& id) const {
-    size_t result;
-    std::memcpy(&result, id.data(), sizeof(size_t));
-    return result;
-  }
-};
-
 typedef UniqueID ObjectID;
 
 arrow::Status plasma_error_status(int plasma_error);
@@ -104,4 +96,11 @@ struct PlasmaStoreInfo;
 extern const PlasmaStoreInfo* plasma_config;
 }  // namespace plasma
 
+namespace std {
+template <>
+struct hash<::plasma::UniqueID> {
+  size_t operator()(const ::plasma::UniqueID& id) const { return id.hash(); }
+};
+}  // namespace std
+
 #endif  // PLASMA_COMMON_H
diff --git a/cpp/src/plasma/eviction_policy.h b/cpp/src/plasma/eviction_policy.h
index b076309..d13933e 100644
--- a/cpp/src/plasma/eviction_policy.h
+++ b/cpp/src/plasma/eviction_policy.h
@@ -52,7 +52,7 @@ class LRUCache {
   ItemList item_list_;
   /// A hash table mapping the object ID of an object in the cache to its
   /// location in the doubly linked list item_list_.
-  std::unordered_map<ObjectID, ItemList::iterator, UniqueIDHasher> item_map_;
+  std::unordered_map<ObjectID, ItemList::iterator> item_map_;
 };
 
 /// The eviction policy.
diff --git a/cpp/src/plasma/plasma.h b/cpp/src/plasma/plasma.h
index 8673036..43a7496 100644
--- a/cpp/src/plasma/plasma.h
+++ b/cpp/src/plasma/plasma.h
@@ -71,7 +71,7 @@ constexpr int64_t kBlockSize = 64;
 struct Client;
 
 /// Mapping from object IDs to type and status of the request.
-typedef std::unordered_map<ObjectID, ObjectRequest, UniqueIDHasher> ObjectRequestMap;
+typedef std::unordered_map<ObjectID, ObjectRequest> ObjectRequestMap;
 
 // TODO(pcm): Replace this by the flatbuffers message PlasmaObjectSpec.
 struct PlasmaObject {
@@ -146,7 +146,7 @@ struct ObjectTableEntry {
 /// The plasma store information that is exposed to the eviction policy.
 struct PlasmaStoreInfo {
   /// Objects that are in the Plasma store.
-  std::unordered_map<ObjectID, std::unique_ptr<ObjectTableEntry>, UniqueIDHasher> objects;
+  std::unordered_map<ObjectID, std::unique_ptr<ObjectTableEntry>> objects;
   /// The amount of memory (in bytes) that we allow to be allocated in the
   /// store.
   int64_t memory_capacity;
diff --git a/cpp/src/plasma/protocol.cc b/cpp/src/plasma/protocol.cc
index 9443762..e5cce75 100644
--- a/cpp/src/plasma/protocol.cc
+++ b/cpp/src/plasma/protocol.cc
@@ -421,11 +421,10 @@ Status ReadGetRequest(uint8_t* data, size_t size, std::vector<ObjectID>& object_
   return Status::OK();
 }
 
-Status SendGetReply(
-    int sock, ObjectID object_ids[],
-    std::unordered_map<ObjectID, PlasmaObject, UniqueIDHasher>& plasma_objects,
-    int64_t num_objects, const std::vector<int>& store_fds,
-    const std::vector<int64_t>& mmap_sizes) {
+Status SendGetReply(int sock, ObjectID object_ids[],
+                    std::unordered_map<ObjectID, PlasmaObject>& plasma_objects,
+                    int64_t num_objects, const std::vector<int>& store_fds,
+                    const std::vector<int64_t>& mmap_sizes) {
   flatbuffers::FlatBufferBuilder fbb;
   std::vector<PlasmaObjectSpec> objects;
 
diff --git a/cpp/src/plasma/protocol.h b/cpp/src/plasma/protocol.h
index 86b3577..7028132 100644
--- a/cpp/src/plasma/protocol.h
+++ b/cpp/src/plasma/protocol.h
@@ -81,11 +81,10 @@ Status SendGetRequest(int sock, const ObjectID* object_ids, int64_t num_objects,
 Status ReadGetRequest(uint8_t* data, size_t size, std::vector<ObjectID>& object_ids,
                       int64_t* timeout_ms);
 
-Status SendGetReply(
-    int sock, ObjectID object_ids[],
-    std::unordered_map<ObjectID, PlasmaObject, UniqueIDHasher>& plasma_objects,
-    int64_t num_objects, const std::vector<int>& store_fds,
-    const std::vector<int64_t>& mmap_sizes);
+Status SendGetReply(int sock, ObjectID object_ids[],
+                    std::unordered_map<ObjectID, PlasmaObject>& plasma_objects,
+                    int64_t num_objects, const std::vector<int>& store_fds,
+                    const std::vector<int64_t>& mmap_sizes);
 
 Status ReadGetReply(uint8_t* data, size_t size, ObjectID object_ids[],
                     PlasmaObject plasma_objects[], int64_t num_objects,
diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index 310f0cb..061b7ad 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -85,7 +85,7 @@ struct GetRequest {
   std::vector<ObjectID> object_ids;
   /// The object information for the objects in this request. This is used in
   /// the reply.
-  std::unordered_map<ObjectID, PlasmaObject, UniqueIDHasher> objects;
+  std::unordered_map<ObjectID, PlasmaObject> objects;
   /// The minimum number of objects to wait for in this request.
   int64_t num_objects_to_wait_for;
   /// The number of object requests in this wait request that are already
@@ -99,8 +99,7 @@ GetRequest::GetRequest(Client* client, const std::vector<ObjectID>& object_ids)
       object_ids(object_ids.begin(), object_ids.end()),
       objects(object_ids.size()),
       num_satisfied(0) {
-  std::unordered_set<ObjectID, UniqueIDHasher> unique_ids(object_ids.begin(),
-                                                          object_ids.end());
+  std::unordered_set<ObjectID> unique_ids(object_ids.begin(), object_ids.end());
   num_objects_to_wait_for = unique_ids.size();
 }
 
diff --git a/cpp/src/plasma/store.h b/cpp/src/plasma/store.h
index fd077f9..64c5249 100644
--- a/cpp/src/plasma/store.h
+++ b/cpp/src/plasma/store.h
@@ -49,7 +49,7 @@ struct Client {
   int fd;
 
   /// Object ids that are used by this client.
-  std::unordered_set<ObjectID, UniqueIDHasher> object_ids;
+  std::unordered_set<ObjectID> object_ids;
 };
 
 class PlasmaStore {
@@ -188,8 +188,7 @@ class PlasmaStore {
   std::vector<uint8_t> input_buffer_;
   /// A hash table mapping object IDs to a vector of the get requests that are
   /// waiting for the object to arrive.
-  std::unordered_map<ObjectID, std::vector<GetRequest*>, UniqueIDHasher>
-      object_get_requests_;
+  std::unordered_map<ObjectID, std::vector<GetRequest*>> object_get_requests_;
   /// The pending notifications that have not been sent to subscribers because
   /// the socket send buffers were full. This is a hash table from client file
   /// descriptor to an array of object_ids to send to that client.
diff --git a/cpp/src/plasma/test/serialization_tests.cc b/cpp/src/plasma/test/serialization_tests.cc
index 03b1428..006fa74 100644
--- a/cpp/src/plasma/test/serialization_tests.cc
+++ b/cpp/src/plasma/test/serialization_tests.cc
@@ -165,7 +165,7 @@ TEST(PlasmaSerialization, GetReply) {
   ObjectID object_ids[2];
   object_ids[0] = ObjectID::from_random();
   object_ids[1] = ObjectID::from_random();
-  std::unordered_map<ObjectID, PlasmaObject, UniqueIDHasher> plasma_objects;
+  std::unordered_map<ObjectID, PlasmaObject> plasma_objects;
   plasma_objects[object_ids[0]] = random_plasma_object();
   plasma_objects[object_ids[1]] = random_plasma_object();
   std::vector<int> store_fds = {1, 2, 3};

-- 
To stop receiving notification emails like this one, please contact
pcmoritz@apache.org.