You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/08 15:04:08 UTC
arrow git commit: ARROW-1494: [C++] Improve doxygen comments in
arrow/table.h, note that RecordBatch::column returns new object
Repository: arrow
Updated Branches:
refs/heads/master de2edc8d5 -> 11ebe9387
ARROW-1494: [C++] Improve doxygen comments in arrow/table.h, note that RecordBatch::column returns new object
Author: Wes McKinney <we...@twosigma.com>
Closes #1065 from wesm/ARROW-1494 and squashes the following commits:
ff87fa85 [Wes McKinney] Improve doxygen comments in arrow/table.h, note that RecordBatch::column returns a new object
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/11ebe938
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/11ebe938
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/11ebe938
Branch: refs/heads/master
Commit: 11ebe938744f14c335d8cf5ecfacd64afd492977
Parents: de2edc8
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Sep 8 11:04:05 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Sep 8 11:04:05 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/table.h | 102 ++++++++++++++++++++++++++++-----------------
1 file changed, 64 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/11ebe938/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index ceb564d..1145d11 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -34,19 +34,21 @@ class Status;
using ArrayVector = std::vector<std::shared_ptr<Array>>;
+/// \class ChunkedArray
/// \brief A data structure managing a list of primitive Arrow arrays logically
/// as one large array
class ARROW_EXPORT ChunkedArray {
public:
explicit ChunkedArray(const ArrayVector& chunks);
- // \return the total length of the chunked array; computed on construction
+ /// \return the total length of the chunked array; computed on construction
int64_t length() const { return length_; }
int64_t null_count() const { return null_count_; }
int num_chunks() const { return static_cast<int>(chunks_.size()); }
+ /// \return chunk a particular chunk from the chunked array
std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; }
const ArrayVector& chunks() const { return chunks_; }
@@ -71,7 +73,7 @@ class ARROW_EXPORT Column {
Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
- /// Construct from name and array
+ // Construct from name and array
Column(const std::string& name, const std::shared_ptr<Array>& data);
int64_t length() const { return data_->length(); }
@@ -80,20 +82,23 @@ class ARROW_EXPORT Column {
std::shared_ptr<Field> field() const { return field_; }
- // \return the column's name in the passed metadata
+ /// \brief The column name
+ /// \return the column's name in the passed metadata
const std::string& name() const { return field_->name(); }
- // \return the column's type according to the metadata
+ /// \brief The column type
+ /// \return the column's type according to the metadata
std::shared_ptr<DataType> type() const { return field_->type(); }
- // \return the column's data as a chunked logical array
+ /// \brief The column data as a chunked array
+ /// \return the column's data as a chunked logical array
std::shared_ptr<ChunkedArray> data() const { return data_; }
bool Equals(const Column& other) const;
bool Equals(const std::shared_ptr<Column>& other) const;
- // Verify that the column's array data is consistent with the passed field's
- // metadata
+ /// \brief Verify that the column's array data is consistent with the passed
+ /// field's metadata
Status ValidateData();
protected:
@@ -108,14 +113,14 @@ class ARROW_EXPORT Column {
/// sequence of fields, each a contiguous Arrow array
class ARROW_EXPORT RecordBatch {
public:
- /// num_rows is a parameter to allow for record batches of a particular size not
- /// having any materialized columns. Each array should have the same length as
- /// num_rows
-
+ /// \param[in] schema
+ /// \param[in] num_rows length of fields in the record batch. Each array
+ /// should have the same length as num_rows
+ /// \param[in] columns the record batch fields as vector of arrays
RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
const std::vector<std::shared_ptr<Array>>& columns);
- /// \brief Deprecated move constructor for a vector of Array instances
+ /// \brief Move-based constructor for a vector of Array instances
RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
std::vector<std::shared_ptr<Array>>&& columns);
@@ -137,25 +142,35 @@ class ARROW_EXPORT RecordBatch {
RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
const std::vector<std::shared_ptr<internal::ArrayData>>& columns);
+ /// \brief Determine if two record batches are exactly equal
+ /// \return true if batches are equal
bool Equals(const RecordBatch& other) const;
+ /// \brief Determine if two record batches are approximately equal
bool ApproxEquals(const RecordBatch& other) const;
// \return the table's schema
+ /// \return true if batches are equal
std::shared_ptr<Schema> schema() const { return schema_; }
- // \return the i-th column
- // Note: Does not boundscheck
+ /// \brief Retrieve an array from the record batch (new object)
+ /// \param[in] i field index, does not boundscheck
+ /// \return a new Array object
+ ///
+ /// \note This function returns a new object. If you intend to dereference
+ /// the pointer or access the internals, retain a reference to the
+ /// std::shared_ptr returned.
std::shared_ptr<Array> column(int i) const;
std::shared_ptr<internal::ArrayData> column_data(int i) const { return columns_[i]; }
+ /// \brief Name in i-th column
const std::string& column_name(int i) const;
- // \return the number of columns in the table
+ /// \return the number of columns in the table
int num_columns() const { return static_cast<int>(columns_.size()); }
- // \return the number of rows (the corresponding length of each column)
+ /// \return the number of rows (the corresponding length of each column)
int64_t num_rows() const { return num_rows_; }
/// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL)
@@ -166,12 +181,18 @@ class ARROW_EXPORT RecordBatch {
std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
const std::shared_ptr<const KeyValueMetadata>& metadata) const;
- /// Slice each of the arrays in the record batch and construct a new RecordBatch object
+ /// \brief Slice each of the arrays in the record batch
+ /// \param[in] offset the starting offset to slice, through end of batch
+ /// \return new record batch
std::shared_ptr<RecordBatch> Slice(int64_t offset) const;
+
+ /// \brief Slice each of the arrays in the record batch
+ /// \param[in] offset the starting offset to slice
+ /// \param[in] length the number of elements to slice from offset
+ /// \return new record batch
std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const;
/// \brief Check for schema or length inconsistencies
- ///
/// \return Status
Status Validate() const;
@@ -181,21 +202,22 @@ class ARROW_EXPORT RecordBatch {
std::vector<std::shared_ptr<internal::ArrayData>> columns_;
};
-// Immutable container of fixed-length columns conforming to a particular schema
+/// \class Table
+/// \brief Logical table as sequence of chunked arrays
class ARROW_EXPORT Table {
public:
/// \brief Construct Table from schema and columns
/// If columns is zero-length, the table's number of rows is zero
/// \param schema
/// \param columns
- /// \param num_rows number of rows in table, -1 (default) to infer from columns
+ /// \param number of rows in table, -1 (default) to infer from columns
Table(const std::shared_ptr<Schema>& schema,
const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows = -1);
/// \brief Construct Table from schema and arrays
/// \param schema
/// \param arrays
- /// \param num_rows number of rows in table, -1 (default) to infer from columns
+ /// \param number of rows in table, -1 (default) to infer from columns
Table(const std::shared_ptr<Schema>& schema,
const std::vector<std::shared_ptr<Array>>& arrays, int64_t num_rows = -1);
@@ -205,18 +227,17 @@ class ARROW_EXPORT Table {
const std::vector<std::shared_ptr<RecordBatch>>& batches,
std::shared_ptr<Table>* table);
- // \return the table's schema
+ /// \return the table's schema
std::shared_ptr<Schema> schema() const { return schema_; }
- // Note: Does not boundscheck
- // \return the i-th column
+ /// \param[i] i column index, does not boundscheck
+ /// \return the i-th column
std::shared_ptr<Column> column(int i) const { return columns_[i]; }
- /// Remove column from the table, producing a new Table (because tables and
- /// schemas are immutable)
+ /// \brief Remove column from the table, producing a new Table
Status RemoveColumn(int i, std::shared_ptr<Table>* out) const;
- /// Add column to the table, producing a new Table
+ /// \brief Add column to the table, producing a new Table
Status AddColumn(int i, const std::shared_ptr<Column>& column,
std::shared_ptr<Table>* out) const;
@@ -228,15 +249,16 @@ class ARROW_EXPORT Table {
std::shared_ptr<Table> ReplaceSchemaMetadata(
const std::shared_ptr<const KeyValueMetadata>& metadata) const;
- // \return the number of columns in the table
+ /// \return the number of columns in the table
int num_columns() const { return static_cast<int>(columns_.size()); }
- // \return the number of rows (the corresponding length of each column)
+ /// \return the number of rows (the corresponding length of each column)
int64_t num_rows() const { return num_rows_; }
+ /// \brief Determine if semantic contents of tables are exactly equal
bool Equals(const Table& other) const;
- // After construction, perform any checks to validate the input arguments
+ /// \brief Perform any checks to validate the input arguments
Status ValidateColumns() const;
private:
@@ -246,14 +268,18 @@ class ARROW_EXPORT Table {
int64_t num_rows_;
};
-// Construct table from multiple input tables. Return Status::Invalid if
-// schemas are not equal
-Status ARROW_EXPORT ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables,
- std::shared_ptr<Table>* table);
-
-Status ARROW_EXPORT MakeTable(const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Array>>& arrays,
- std::shared_ptr<Table>* table);
+/// \brief Construct table from multiple input tables.
+/// \return Status, fails if any schemas are different
+ARROW_EXPORT
+Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables,
+ std::shared_ptr<Table>* table);
+
+/// \brief Construct table from multiple input tables.
+/// \return Status, fails if any schemas are different
+ARROW_EXPORT
+Status MakeTable(const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Array>>& arrays,
+ std::shared_ptr<Table>* table);
} // namespace arrow