You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/09/06 19:19:10 UTC
[incubator-datasketches-cpp] 01/01: formatting,
use const where appropriate
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch cpc_formatting_and_const
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 3e54d7934307c387b9d55bfe8d6bd6a8f63901b0
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Sep 6 12:18:55 2019 -0700
formatting, use const where appropriate
---
cpc/include/common.h | 2 -
cpc/include/cpc_sketch.hpp | 21 +-
cpc/include/fm85.h | 68 ++---
cpc/include/fm85Compression.h | 64 ++--
cpc/include/fm85Confidence.h | 8 +-
cpc/include/fm85Merging.h | 28 +-
cpc/include/fm85Util.h | 16 +-
cpc/include/iconEstimator.h | 2 +-
cpc/include/u32Table.h | 50 +---
cpc/src/fm85.cpp | 368 ++++++++++-------------
cpc/src/fm85Compression.cpp | 680 ++++++++++++++++--------------------------
cpc/src/fm85Confidence.cpp | 95 +++---
cpc/src/fm85Merging.cpp | 208 ++++++-------
cpc/src/fm85Util.cpp | 139 ++++-----
cpc/src/iconEstimator.cpp | 95 +++---
cpc/src/u32Table.cpp | 223 ++++++--------
16 files changed, 813 insertions(+), 1254 deletions(-)
diff --git a/cpc/include/common.h b/cpc/include/common.h
index 5d020e2..4aa98bd 100644
--- a/cpc/include/common.h
+++ b/cpc/include/common.h
@@ -29,8 +29,6 @@
#include <time.h>
#include <math.h>
-typedef int Boolean;
-
typedef uint8_t U8;
typedef uint16_t U16;
typedef uint32_t U32;
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index 78ed8d5..06a2ded 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -325,12 +325,12 @@ class cpc_sketch {
is.read((char*)&flags_byte, sizeof(flags_byte));
uint16_t seed_hash;
is.read((char*)&seed_hash, sizeof(seed_hash));
- const bool has_hip(flags_byte & (1 << flags::HAS_HIP));
- const bool has_table(flags_byte & (1 << flags::HAS_TABLE));
- const bool has_window(flags_byte & (1 << flags::HAS_WINDOW));
+ const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
+ const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
+ const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
FM85 compressed;
- compressed.isCompressed = 1;
- compressed.mergeFlag = has_hip ? 0 : 1;
+ compressed.isCompressed = true;
+ compressed.mergeFlag = !has_hip;
compressed.lgK = lg_k;
compressed.firstInterestingColumn = first_interesting_column;
compressed.numCoupons = 0;
@@ -422,12 +422,12 @@ class cpc_sketch {
ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
uint16_t seed_hash;
ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
- const bool has_hip(flags_byte & (1 << flags::HAS_HIP));
- const bool has_table(flags_byte & (1 << flags::HAS_TABLE));
- const bool has_window(flags_byte & (1 << flags::HAS_WINDOW));
+ const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
+ const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
+ const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
FM85 compressed;
- compressed.isCompressed = 1;
- compressed.mergeFlag = has_hip ? 0 : 1;
+ compressed.isCompressed = true;
+ compressed.mergeFlag = !has_hip;
compressed.lgK = lg_k;
compressed.firstInterestingColumn = first_interesting_column;
compressed.numCoupons = 0;
@@ -517,7 +517,6 @@ class cpc_sketch {
}
friend std::ostream& operator<<(std::ostream& os, cpc_sketch const& sketch);
-
friend class cpc_union;
private:
diff --git a/cpc/include/fm85.h b/cpc/include/fm85.h
index 9bf03f2..c123596 100644
--- a/cpc/include/fm85.h
+++ b/cpc/include/fm85.h
@@ -36,28 +36,25 @@ enum flavorType {
SLIDING // 27K/8 <= C
};
-/*******************************************************/
-
-typedef struct fm85_sketch_type
-{
+typedef struct fm85_sketch_type {
// The following variables occur in all sketch types.
Short lgK;
- Boolean isCompressed;
- Boolean mergeFlag; // Is the sketch the result of merging?
+ bool isCompressed;
+ bool mergeFlag; // Is the sketch the result of merging?
Long numCoupons; // The number of coupons collected so far.
// The following variables occur in the updateable semi-compressed type.
- U8 * slidingWindow;
+ U8* slidingWindow;
Short windowOffset; // Derivable from numCoupons, but made explicit for speed.
- u32Table * surprisingValueTable;
+ u32Table* surprisingValueTable;
// The following variables occur in the non-updateable fully-compressed type.
- U32 * compressedWindow; // A bitstream.
- Long cwLength; // The number of 32-bit words in this bitstream. (Not needed in Java).
- Long numCompressedSurprisingValues;
- U32 * compressedSurprisingValues; // A bitstream.
- Long csvLength; // The number of 32-bit words in this bitstream. (Not needed in Java).
+ U32* compressedWindow; // A bitstream.
+ Long cwLength; // The number of 32-bit words in this bitstream. (Not needed in Java).
+ Long numCompressedSurprisingValues;
+ U32* compressedSurprisingValues; // A bitstream.
+ Long csvLength; // The number of 32-bit words in this bitstream. (Not needed in Java).
// Note that (as an optimization) the two bitstreams could be concatenated.
@@ -72,53 +69,36 @@ typedef struct fm85_sketch_type
extern void* (*fm85alloc)(size_t);
extern void (*fm85free)(void*);
-/*******************************************************/
// These routines are exported.
-void fm85Init (void); // Call this before anything else.
-void fm85InitAD (void* (*alloc)(size_t), void (*dealloc)(void*)); // or this to use custom allocator and deallocator
-
-void fm85Clean (void); // Call this at the end to clean up (optional)
+void fm85Init(void); // Call this before anything else.
+void fm85InitAD(void* (*alloc)(size_t), void (*dealloc)(void*)); // or this to use custom allocator and deallocator
-FM85 * fm85Make (Short lgK);
+void fm85Clean(void); // Call this at the end to clean up (optional)
-FM85 * fm85Copy (FM85 * self);
+FM85* fm85Make(Short lgK);
-void fm85Free (FM85 * sketch);
+FM85* fm85Copy(const FM85* self);
-void fm85Update (FM85 * sketch, U64 hash0, U64 hash1);
+void fm85Free(FM85* sketch);
-double getHIPEstimate (FM85 * sketch);
+void fm85Update(FM85* sketch, U64 hash0, U64 hash1);
-// getIconEstimate() is defined in a separate file.
-
-/*******************************************************/
+double getHIPEstimate(const FM85* sketch);
// The following is used during testing, and is basically package private.
-U32 rowColFromTwoHashes (U64 hash0, U64 hash1, Short lgK);
+U32 rowColFromTwoHashes(U64 hash0, U64 hash1, Short lgK);
-/*******************************************************/
// These routines are internal.
-void fm85RowColUpdate (FM85 * sketch, U32 rowCol);
-
-enum flavorType determineFlavor (Short lgK, Long c);
-enum flavorType determineSketchFlavor (FM85 * self);
+void fm85RowColUpdate(FM85* sketch, U32 rowCol);
-Short determineCorrectOffset (Short lgK, Long c);
+enum flavorType determineFlavor(Short lgK, Long c);
+enum flavorType determineSketchFlavor(const FM85* self);
-U64 * bitMatrixOfSketch (FM85 * self);
+Short determineCorrectOffset(Short lgK, Long c);
-// these are only used internally
-// void promoteEmptyToSparse (FM85 * self);
-// void promoteSparseToWindowed (FM85 * self);
-// void modifyOffset (FM85 * self, Short newOffset);
-// void updateSparse (FM85 * self, U32 rowCol);
-// void updateWindowed (FM85 * self, U32 rowCol);
-
-
-/*******************************************************/
+U64* bitMatrixOfSketch(const FM85* self);
#define GOT_FM85_H
#endif
-
diff --git a/cpc/include/fm85Compression.h b/cpc/include/fm85Compression.h
index 334363b..c13c10c 100644
--- a/cpc/include/fm85Compression.h
+++ b/cpc/include/fm85Compression.h
@@ -24,70 +24,54 @@
#include "fm85.h"
#include "fm85Util.h"
-/****************************************/
-
#define MAYBE_FLUSH_BITBUF(wordarr,wordindex) \
if (bufbits >= 32) { \
- (wordarr)[(wordindex)++] = (U32) (bitbuf & 0xffffffff); \
- bitbuf = bitbuf >> 32; bufbits -= 32;}
+ (wordarr)[(wordindex)++] = (U32) (bitbuf & 0xffffffff); \
+ bitbuf = bitbuf >> 32; bufbits -= 32; \
+ }
#define MAYBE_FILL_BITBUF(wordarr,wordindex,minbits) \
if (bufbits < (minbits)) { \
- bitbuf |= (((U64) (wordarr)[(wordindex)++]) << bufbits); \
- bufbits += 32; \
- }
-
-/****************************************/
+ bitbuf |= (((U64) (wordarr)[(wordindex)++]) << bufbits); \
+ bufbits += 32; \
+ }
-void makeTheDecodingTables (void); // call this at startup
-void freeTheDecodingTables (void); // call this at the end
+void makeTheDecodingTables(void); // call this at startup
+void freeTheDecodingTables(void); // call this at the end
-/****************************************/
// Here "pairs" refers to row/column pairs that specify
// the positions of surprising values in the bit matrix.
// returns the number of compressedWords actually used
-Long lowLevelCompressPairs (U32 * pairArray, // input
+Long lowLevelCompressPairs(const U32* pairArray, // input
Long numPairs, // input
- Long numBaseBits, // input
- U32 * compressedWords); // output
+ Long numBaseBits, // input
+ U32* compressedWords); // output
-void lowLevelUncompressPairs (U32 * pairArray, // output
+void lowLevelUncompressPairs(U32* pairArray, // output
Long numPairs, // input
- Long numBaseBits, // input
- U32 * compressedWords, // input
+ Long numBaseBits, // input
+ const U32* compressedWords, // input
Long numCompressedWords); // input
-/****************************************/
-
// This returns the number of compressedWords that were actually used. It is the caller's
// responsibility to ensure that the compressedWords array is long enough to prevent over-run.
-Long lowLevelCompressBytes (U8 * byteArray, // input
- Long numBytesToEncode, // input
- U16 * encodingTable, // input
- U32 * compressedWords); // output
+Long lowLevelCompressBytes(const U8* byteArray, // input
+ Long numBytesToEncode, // input
+ const U16* encodingTable, // input
+ U32* compressedWords); // output
-/****************************************/
-
-void lowLevelUncompressBytes (U8 * byteArray, // output
- Long numBytesToDecode, // input (but refers to the output)
- U16 * decodingTable, // input
- U32 * compressedWords, // input
+void lowLevelUncompressBytes(U8* byteArray, // output
+ Long numBytesToDecode, // input (but refers to the output)
+ const U16* decodingTable, // input
+ const U32* compressedWords, // input
Long numCompressedWords); // input
-/****************************************/
-
-FM85 * fm85Compress (FM85 * uncompressedSketch); // returns a compressed copy of its input
+FM85* fm85Compress(FM85* uncompressedSketch); // returns a compressed copy of its input
-FM85 * fm85Uncompress (FM85 * compressedSketch); // returns an updateable copy of its input
-
-// Note: in the final system, compressed and uncompressed sketches will have different types
-
-/****************************************/
+FM85* fm85Uncompress(FM85* compressedSketch); // returns an updateable copy of its input
#define GOT_FM85_COMPRESSION_H
#endif
-
-
diff --git a/cpc/include/fm85Confidence.h b/cpc/include/fm85Confidence.h
index 18741cd..67f9db9 100644
--- a/cpc/include/fm85Confidence.h
+++ b/cpc/include/fm85Confidence.h
@@ -24,10 +24,10 @@
#include "common.h"
#include "fm85.h"
-double getIconConfidenceLB (FM85 * sketch, int kappa);
-double getIconConfidenceUB (FM85 * sketch, int kappa);
-double getHIPConfidenceLB (FM85 * sketch, int kappa);
-double getHIPConfidenceUB (FM85 * sketch, int kappa);
+double getIconConfidenceLB(const FM85* sketch, int kappa);
+double getIconConfidenceUB(const FM85* sketch, int kappa);
+double getHIPConfidenceLB(const FM85* sketch, int kappa);
+double getHIPConfidenceUB(const FM85* sketch, int kappa);
#define GOT_FM85_CONFIDENCE_H
#endif
diff --git a/cpc/include/fm85Merging.h b/cpc/include/fm85Merging.h
index c6d84dd..0114e06 100644
--- a/cpc/include/fm85Merging.h
+++ b/cpc/include/fm85Merging.h
@@ -27,13 +27,10 @@
#include "fm85Util.h"
#include "u32Table.h"
-/****************************************/
-
-typedef struct fm85_unioning_gadget
-{
+typedef struct fm85_unioning_gadget {
Short lgK; // Note: in some cases this will be reduced.
- FM85 * accumulator; // this is a sketch object
- U64 * bitMatrix;
+ FM85* accumulator; // this is a sketch object
+ U64* bitMatrix;
// Note: at most one of the previous two fields will be non-NULL at any given moment.
// accumulator is a sketch object that is employed until it graduates out of Sparse mode.
// At that point, it is converted into a full-sized bitMatrix, which is mathematically a sketch,
@@ -41,21 +38,15 @@ typedef struct fm85_unioning_gadget
// is required when getResult is called at the end.
} UG85;
-/****************************************/
-
-UG85 * ug85Make (Short lgK);
-
-void ug85Free (UG85 * unioner);
+UG85* ug85Make(Short lgK);
-void ug85MergeInto (UG85 * unioner, FM85 * sourceSketch);
+void ug85Free(UG85* unioner);
-FM85 * ug85GetResult (UG85 * unioner);
+void ug85MergeInto(UG85* unioner, const FM85* sourceSketch);
-/****************************************/
+FM85* ug85GetResult(const UG85* unioner);
-U64 * bitMatrixOfUG85 (UG85 * self, Boolean * needToFreePtr); // used for testing
-
-/****************************************/
+U64* bitMatrixOfUG85(const UG85* self, bool* needToFreePtr); // used for testing
#define GOT_FM85_MERGING_H
#endif
@@ -99,7 +90,6 @@ U64 * bitMatrixOfUG85 (UG85 * self, Boolean * needToFreePtr); // used for testin
// wouldn't work because of the partially inverted Logic in the Sliding flavor, where the presence of
// coupons is sometimes indicated by the ABSENCE of rowCol pairs in the surprises table.]
-/****************************************/
// How does getResult work?
@@ -108,5 +98,3 @@ U64 * bitMatrixOfUG85 (UG85 * self, Boolean * needToFreePtr); // used for testin
// If the unioner is using its bitMatrix field, then we have to convert the
// bitMatrix back into a sketch, which requires doing some extra work to
// figure out the values of numCoupons, offset, firstInterestingColumn, and KxQ.
-
-/*******************************************************************************************/
diff --git a/cpc/include/fm85Util.h b/cpc/include/fm85Util.h
index c2da967..90acdce 100644
--- a/cpc/include/fm85Util.h
+++ b/cpc/include/fm85Util.h
@@ -22,11 +22,11 @@
#ifndef GOT_FM85_UTIL_H
#include "common.h"
-void * shallowCopy (void * oldObject, size_t numBytes);
+void* shallowCopy(const void* oldObject, size_t numBytes);
extern double invPow2Tab[];
-void fillInvPow2Tab (void);
+void fillInvPow2Tab(void);
extern double kxpByteLookup[];
@@ -37,17 +37,15 @@ extern U8 byteTrailingZerosTable[];
void fillByteLeadingZerosTable(void);
void fillByteTrailingZerosTable(void);
-Short countLeadingZerosInUnsignedLong (U64 theInput);
-Short countTrailingZerosInUnsignedLong (U64 theInput);
+Short countLeadingZerosInUnsignedLong(U64 theInput);
+Short countTrailingZerosInUnsignedLong(U64 theInput);
-Long divideLongsRoundingUp (Long x, Long y);
+Long divideLongsRoundingUp(Long x, Long y);
// for delta-encoding an instance of (n choose m)
-Long golombChooseNumberOfBaseBits (Long n, Long m);
+Long golombChooseNumberOfBaseBits(Long n, Long m);
-Long countBitsSetInMatrix (U64 * array, Long length);
-
-/******************************************/
+Long countBitsSetInMatrix(const U64* array, Long length);
#define GOT_FM85_UTIL_H
#endif
diff --git a/cpc/include/iconEstimator.h b/cpc/include/iconEstimator.h
index 3bb5303..ecdeeff 100644
--- a/cpc/include/iconEstimator.h
+++ b/cpc/include/iconEstimator.h
@@ -23,7 +23,7 @@
#include "common.h"
-double getIconEstimate (Short lgK, Long c);
+double getIconEstimate(Short lgK, Long c);
#define GOT_ICON_H
#endif
diff --git a/cpc/include/u32Table.h b/cpc/include/u32Table.h
index aa83fa2..515e982 100644
--- a/cpc/include/u32Table.h
+++ b/cpc/include/u32Table.h
@@ -31,60 +31,42 @@
#define u32TableDownsizeNumer 1LL
#define u32TableDownsizeDenom 4LL
-typedef struct u32_table_type
-{
+typedef struct u32_table_type {
Short validBits;
Short lgSize; // log2 of number of slots
Long numItems;
- U32 * slots;
+ U32* slots;
} u32Table;
-/*******************************************************/
+u32Table* u32TableMake(Short initialLgSize, Short numValidBits);
-u32Table * u32TableMake (Short initialLgSize, Short numValidBits);
+u32Table* u32TableCopy(const u32Table* self);
-u32Table * u32TableCopy (u32Table * self);
+void u32TableClear(u32Table* self);
-void u32TableClear (u32Table * self);
+void u32TableFree(u32Table* self);
-void u32TableFree (u32Table * self);
+void u32TableShow(const u32Table* self); // for debugging
-void u32TableShow (u32Table * self); // for debugging
+bool u32TableMaybeInsert(u32Table* self, U32 item);
-/*******************************************************/
-
-Boolean u32TableMaybeInsert (u32Table * self, U32 item);
-
-Boolean u32TableMaybeDelete (u32Table * self, U32 item);
-
-/*******************************************************/
+bool u32TableMaybeDelete(u32Table* self, U32 item);
// this one slightly breaks the abstraction boundary
+u32Table* makeU32TableFromPairsArray(const U32* pairs, Long numPairs, Short sketchLgK);
-u32Table * makeU32TableFromPairsArray (U32 * pairs, Long numPairs, Short sketchLgK);
-
-/*******************************************************/
-
-U32 * u32TableUnwrappingGetItems (u32Table * self, Long * returnNumItems);
+U32* u32TableUnwrappingGetItems(const u32Table* self, Long* returnNumItems);
-void printU32Array (U32 * array, Long arrayLength);
-
-/*******************************************************/
-
-// void u32TokudaShellSort(U32 a[], Long l, Long r);
+void printU32Array(const U32* array, Long arrayLength);
void u32KnuthShellSort3(U32 a[], Long l, Long r);
void introspectiveInsertionSort(U32 a[], Long l, Long r);
-
-/*******************************************************/
-
-void u32Merge (U32 * arrA, Long startA, Long lengthA, // input
- U32 * arrB, Long startB, Long lengthB, // input
- U32 * arrC, Long startC); // output
-
-/*******************************************************/
+void u32Merge(
+ const U32* arrA, Long startA, Long lengthA, // input
+ const U32* arrB, Long startB, Long lengthB, // input
+ U32* arrC, Long startC); // output
#define GOT_U32_TABLE_H
#endif
diff --git a/cpc/src/fm85.cpp b/cpc/src/fm85.cpp
index bf98dff..e498951 100644
--- a/cpc/src/fm85.cpp
+++ b/cpc/src/fm85.cpp
@@ -28,24 +28,20 @@
#include <stdexcept>
#include <new>
-/*******************************************************/
-
-U32 rowColFromTwoHashes (U64 hash0, U64 hash1, Short lgK) {
+U32 rowColFromTwoHashes(U64 hash0, U64 hash1, Short lgK) {
if (lgK > 26) throw std::logic_error("lgK > 26");
- Long k = (1LL << lgK);
- Short col = countLeadingZerosInUnsignedLong (hash1); // 0 <= col <= 64
- if (col > 63) col = 63; // clip so that 0 <= col <= 63
- Long row = hash0 & (k - 1);
+ const Long k = 1LL << lgK;
+ Short col = countLeadingZerosInUnsignedLong(hash1); // 0 <= col <= 64
+ if (col > 63) col = 63; // clip so that 0 <= col <= 63
+ const Long row = hash0 & (k - 1);
U32 rowCol = (U32) ((row << 6) | col);
// To avoid the hash table's "empty" value, we change the row of the following pair.
// This case is extremely unlikely, but we might as well handle it.
if (rowCol == ALL32BITS) { rowCol ^= (1 << 6); }
- return (rowCol);
+ return rowCol;
}
-/*******************************************************/
-
-Boolean fm85Initialized = 0;
+bool fm85Initialized = false;
// This is to support custom allocator and deallocator
void* (*fm85alloc)(size_t);
@@ -54,12 +50,12 @@ void (*fm85free)(void*);
// This stuff would be handled by Java's mechanism
// for initializing class variables.
-void fm85Init (void) {
+void fm85Init(void) {
fm85InitAD(&malloc, &free);
}
// This is to support custom allocator and deallocator
-void fm85InitAD (void* (*alloc)(size_t), void (*dealloc)(void*)) {
+void fm85InitAD(void* (*alloc)(size_t), void (*dealloc)(void*)) {
if (!fm85Initialized) {
fm85alloc = alloc;
fm85free = dealloc;
@@ -68,25 +64,23 @@ void fm85InitAD (void* (*alloc)(size_t), void (*dealloc)(void*)) {
makeTheDecodingTables();
fillInvPow2Tab();
fillKxpByteLookup();
- fm85Initialized = 1;
+ fm85Initialized = true;
}
}
-void fm85Clean (void) {
+void fm85Clean(void) {
if (fm85Initialized) {
freeTheDecodingTables();
- fm85Initialized = 0;
+ fm85Initialized = false;
}
}
-/*******************************************************/
// The flavor is a function of K and C (the number of collected coupons).
-
-enum flavorType determineFlavor (Short lgK, Long c) {
- Long k = (1LL << lgK);
- Long c2 = c << 1;
- Long c8 = c << 3;
- Long c32 = c << 5;
+enum flavorType determineFlavor(Short lgK, Long c) {
+ const Long k = 1LL << lgK;
+ const Long c2 = c << 1;
+ const Long c8 = c << 3;
+ const Long c32 = c << 5;
if (c == 0) return (EMPTY); // 0 == C < 1
if (c32 < 3*k) return (SPARSE); // 1 <= C < 3K/32
if (c2 < k) return (HYBRID); // 3K/32 <= C < K/2
@@ -96,92 +90,82 @@ enum flavorType determineFlavor (Short lgK, Long c) {
// Note: the <= occurs with equality except SPARSE-vs-HYBRID for K = 2^4
-enum flavorType determineSketchFlavor (FM85 * self) {
+enum flavorType determineSketchFlavor(const FM85* self) {
return (determineFlavor(self->lgK, self->numCoupons));
}
-/*******************************************************/
-
-Short determineCorrectOffset (Short lgK, Long c) {
- Long k = (1LL << lgK);
- Long tmp = (c << 3) - 19 * k; // 8C - 19K
+Short determineCorrectOffset(Short lgK, Long c) {
+ const Long k = 1LL << lgK;
+ const Long tmp = (c << 3) - 19 * k; // 8C - 19K
if (tmp < 0) return ((Short) 0);
return ((Short) (tmp >> (lgK + 3))); // tmp / 8K
}
-/*******************************************************/
-
-FM85 * fm85Make (Short lgK) {
+FM85* fm85Make(Short lgK) {
if (lgK < 4 || lgK > 26) throw std::invalid_argument("lgK must be between 4 and 26");
- FM85 * self = (FM85 *) fm85alloc (sizeof(FM85));
+ FM85* self = (FM85*) fm85alloc(sizeof(FM85));
if (self == NULL) throw std::bad_alloc();
self->lgK = lgK;
- self->isCompressed = 0;
- self->mergeFlag = 0;
+ self->isCompressed = false;
+ self->mergeFlag = false;
self->numCoupons = 0LL;
self->windowOffset = 0;
- self->slidingWindow = (U8 *) NULL;
+ self->slidingWindow = (U8*) NULL;
self->surprisingValueTable = (u32Table *) NULL;
self->numCompressedSurprisingValues = 0;
- self->compressedSurprisingValues = (U32 *) NULL;
+ self->compressedSurprisingValues = (U32*) NULL;
self->csvLength = 0;
- self->compressedWindow = (U32 *) NULL;
+ self->compressedWindow = (U32*) NULL;
self->cwLength = 0;
self->firstInterestingColumn = 0;
- Long k = (1LL << self->lgK);
+ const Long k = 1LL << self->lgK;
self->kxp = ((double) k) * 1.0;
self->hipEstAccum = 0.0;
self->hipErrAccum = 0.0;
- return (self);
+ return self;
}
-/*******************************************************/
-
-FM85 * fm85Copy (FM85 * self) {
+FM85* fm85Copy(const FM85* self) {
if (self == NULL) throw std::invalid_argument("self is null");
- FM85 * newObj = (FM85 *) shallowCopy ((void *) self, sizeof(FM85));
+ FM85* newObj = (FM85*) shallowCopy((void*) self, sizeof(FM85));
if (self->surprisingValueTable != NULL) {
- newObj->surprisingValueTable = u32TableCopy (self->surprisingValueTable);
+ newObj->surprisingValueTable = u32TableCopy(self->surprisingValueTable);
}
if (self->slidingWindow != NULL) {
- Long k = (1LL << self->lgK);
- size_t theSize = k * sizeof(U8);
- newObj->slidingWindow = (U8 *) shallowCopy ((void *) self->slidingWindow, theSize);
+ const Long k = 1LL << self->lgK;
+ const size_t theSize = k * sizeof(U8);
+ newObj->slidingWindow = (U8*) shallowCopy((void*) self->slidingWindow, theSize);
}
if (self->compressedSurprisingValues != NULL) {
- size_t theSize = self->csvLength * sizeof(U32);
- newObj->compressedSurprisingValues = (U32 *) shallowCopy ((void *) self->compressedSurprisingValues, theSize);
+ const size_t theSize = self->csvLength * sizeof(U32);
+ newObj->compressedSurprisingValues = (U32 *) shallowCopy((void*) self->compressedSurprisingValues, theSize);
}
if (self->compressedWindow != NULL) {
- size_t theSize = self->cwLength * sizeof(U32);
- newObj->compressedWindow = (U32 *) shallowCopy ((void *) self->compressedWindow, theSize);
+ const size_t theSize = self->cwLength * sizeof(U32);
+ newObj->compressedWindow = (U32 *) shallowCopy((void*) self->compressedWindow, theSize);
}
- return (newObj);
+ return newObj;
}
-/*******************************************************/
-
-void fm85Free (FM85 * self) {
+void fm85Free(FM85* self) {
if (self != NULL) {
- if (self->surprisingValueTable != NULL) u32TableFree (self->surprisingValueTable);
- if (self->slidingWindow != NULL) fm85free (self->slidingWindow);
- if (self->compressedSurprisingValues != NULL) fm85free (self->compressedSurprisingValues);
- if (self->compressedWindow != NULL) fm85free (self->compressedWindow);
- fm85free (self);
+ if (self->surprisingValueTable != NULL) u32TableFree(self->surprisingValueTable);
+ if (self->slidingWindow != NULL) fm85free(self->slidingWindow);
+ if (self->compressedSurprisingValues != NULL) fm85free(self->compressedSurprisingValues);
+ if (self->compressedWindow != NULL) fm85free(self->compressedWindow);
+ fm85free(self);
}
}
-/*******************************************************/
-
// Warning: this is called in several places, including during the
// transitional moments during which sketch invariants involving
// flavor and offset are out of whack and in fact we are re-imposing
@@ -191,40 +175,39 @@ void fm85Free (FM85 * self) {
// This produces a full-size k-by-64 bit matrix from any Live sketch.
-U64 * bitMatrixOfSketch (FM85 * self) {
- if (self->isCompressed != 0) throw std::logic_error("isCompressed != 0");
- Long k = (1LL << self->lgK);
- Short offset = self->windowOffset;
+U64* bitMatrixOfSketch(const FM85* self) {
+ if (self->isCompressed) throw std::logic_error("must not be compressed");
+ const Long k = 1LL << self->lgK;
+ const Short offset = self->windowOffset;
if (offset < 0 || offset > 56) throw std::logic_error("offset < 0 || offset > 56");
- Long i = 0;
- U64 * matrix = (U64 *) fm85alloc ((size_t) (k * sizeof(U64)));
+ U64* matrix = (U64*) fm85alloc((size_t) (k * sizeof(U64)));
if (matrix == NULL) throw std::bad_alloc();
-// Fill the matrix with default rows in which the "early zone" is filled with ones.
-// This is essential for the routine's O(k) time cost (as opposed to O(C)).
- U64 defaultRow = (1ULL << offset) - 1;
- for (i = 0; i < k; i++) { matrix[i] = defaultRow; }
+ // Fill the matrix with default rows in which the "early zone" is filled with ones.
+ // This is essential for the routine's O(k) time cost (as opposed to O(C)).
+ const U64 defaultRow = (1ULL << offset) - 1;
+ for (Long i = 0; i < k; i++) { matrix[i] = defaultRow; }
if (self->numCoupons == 0) {
return (matrix); // Returning a matrix of zeros rather than NULL.
}
- U8 * window = self->slidingWindow;
+ U8* window = self->slidingWindow;
if (window != NULL) { // In other words, we are in window mode, not sparse mode.
- for (i = 0; i < k; i++) { // set the window bits, trusting the sketch's current offset.
+ for (Long i = 0; i < k; i++) { // set the window bits, trusting the sketch's current offset.
matrix[i] |= (((U64) window[i]) << offset);
}
}
- u32Table * table = self->surprisingValueTable;
+ u32Table* table = self->surprisingValueTable;
if (table == NULL) throw std::logic_error("table == NULL");
- U32 * slots = table->slots;
- Long numSlots = (1LL << table->lgSize);
- for (i = 0; i < numSlots; i++) {
- U32 rowCol = slots[i];
+ U32* slots = table->slots;
+ const Long numSlots = 1LL << table->lgSize;
+ for (Long i = 0; i < numSlots; i++) {
+ const U32 rowCol = slots[i];
if (rowCol != ALL32BITS) {
- Short col = (Short) (rowCol & 63);
- Long row = (Long) (rowCol >> 6);
+ const Short col = (Short) (rowCol & 63);
+ const Long row = (Long) (rowCol >> 6);
// Flip the specified matrix bit from its default value.
// In the "early" zone the bit changes from 1 to 0.
// In the "late" zone the bit changes from 0 to 1.
@@ -232,132 +215,111 @@ U64 * bitMatrixOfSketch (FM85 * self) {
}
}
- return(matrix);
+ return matrix;
}
-/*******************************************************/
-
-void promoteEmptyToSparse (FM85 * self) {
+void promoteEmptyToSparse(FM85* self) {
if (self->numCoupons != 0) throw std::logic_error("numCoupons != 0");
if (self->surprisingValueTable != NULL) throw std::logic_error("surprisingValueTable != NULL");
- self->surprisingValueTable = u32TableMake (2, 6 + self->lgK);
+ self->surprisingValueTable = u32TableMake(2, 6 + self->lgK);
}
-/*******************************************************/
// In terms of flavor, this promotes SPARSE to HYBRID.
-
-void promoteSparseToWindowed (FM85 * self) {
- Long k = (1LL << self->lgK);
- Long c32 = self->numCoupons << 5;
+void promoteSparseToWindowed(FM85* self) {
+ const Long k = 1LL << self->lgK;
+ const Long c32 = self->numCoupons << 5;
if (!(c32 == 3 * k || (self->lgK == 4 && c32 > 3 * k))) throw std::logic_error("wrong c32");
- Long i;
- U8 * window = (U8 *) fm85alloc ((size_t) (k * sizeof(U8)));
+ U8* window = (U8*) fm85alloc((size_t) (k * sizeof(U8)));
if (window == NULL) throw std::bad_alloc();
- memset ((void *) window,0, (size_t) k); // zero the memory (because we will be OR'ing into it)
+ memset((void*) window, 0, (size_t) k); // zero the memory (because we will be OR'ing into it)
- u32Table * newTable = u32TableMake (2, 6 + self->lgK);
+ u32Table* newTable = u32TableMake(2, 6 + self->lgK);
- u32Table * oldTable = self->surprisingValueTable;
- U32 * oldSlots = oldTable->slots;
- Long oldNumSlots = (1LL << oldTable->lgSize);
+ u32Table* oldTable = self->surprisingValueTable;
+ U32* oldSlots = oldTable->slots;
+ const Long oldNumSlots = 1LL << oldTable->lgSize;
if (self->windowOffset != 0) throw std::logic_error("windowOffset != 0");
- for (i = 0; i < oldNumSlots; i++) {
- U32 rowCol = oldSlots[i];
+ for (Long i = 0; i < oldNumSlots; i++) {
+ const U32 rowCol = oldSlots[i];
if (rowCol != ALL32BITS) {
- Short col = (Short) (rowCol & 63);
+ const Short col = (Short) (rowCol & 63);
if (col < 8) {
- Long row = (Long) (rowCol >> 6);
+ const Long row = (Long) (rowCol >> 6);
window[row] |= (1 << col);
- }
- else {
+ } else {
// cannot use u32TableMustInsert(), because it doesn't provide for growth
- Boolean isNovel = u32TableMaybeInsert (newTable, rowCol);
- if (isNovel != 1) throw std::logic_error("isNovel != 1");
+ const bool isNovel = u32TableMaybeInsert(newTable, rowCol);
+ if (!isNovel) throw std::logic_error("isNovel != true");
}
}
}
- // fprintf (stderr, "Number of surprising values dropped from %lld to %lld\n", oldTable->numItems, newTable->numItems);
if (self->slidingWindow != NULL) throw std::logic_error("slidingWindow != NULL");
self->slidingWindow = window;
-
+
self->surprisingValueTable = newTable;
- u32TableFree (oldTable);
+ u32TableFree(oldTable);
}
-/*******************************************************/
// The KXP register is a double with roughly 50 bits of precision, but
// it might need roughly 90 bits to track the value with perfect accuracy.
// Therefore we recalculate KXP occasionally from the sketch's full bitmatrix
// so that it will reflect changes that were previously outside the mantissa.
+void refreshKXP(FM85* self, U64* bitMatrix) {
+ const Long k = 1LL << self->lgK;
-void refreshKXP (FM85 * self, U64 * bitMatrix) {
- Long k = (1LL << self->lgK);
- Long i;
- Short j;
-
- // for improved numerical accuracy, we separately sum the bytes of the U64's
- double byteSums [8]; // allocating on the stack
+ // for improved numerical accuracy, we separately sum the bytes of the U64's
+ double byteSums[8]; // allocating on the stack
- for (j = 0; j < 8; j++) { byteSums[j] = 0.0; }
+ for (Short j = 0; j < 8; j++) { byteSums[j] = 0.0; }
- for (i = 0; i < k; i++) {
+ for (Long i = 0; i < k; i++) {
U64 word = bitMatrix[i];
- for (j = 0; j < 8; j++) {
- U8 byte = word & 0xff;
+ for (Short j = 0; j < 8; j++) {
+ const U8 byte = word & 0xff;
byteSums[j] += kxpByteLookup[byte];
word >>= 8;
}
}
double total = 0.0;
- for (j = 7; j >= 0; j--) { // the reverse order is important
- double factor = invPow2Tab[8*j]; // pow (256.0, (-1.0 * ((double) j)));
+ for (Short j = 7; j >= 0; j--) { // the reverse order is important
+ double factor = invPow2Tab[8 * j]; // pow (256.0, (-1.0 * ((double) j)));
total += factor * byteSums[j];
}
- // fprintf (stderr, "%.3f\n", ((double) self->numCoupons) / k);
- // fprintf (stderr, "%.19g\told value of KXP\n", self->kxp);
- // fprintf (stderr, "%.19g\tnew value of KXP\n", total);
- // fflush (stderr);
-
self->kxp = total;
-
}
-
-/*******************************************************/
// this moves the sliding window
-
-void modifyOffset (FM85 * self, Short newOffset) {
+void modifyOffset(FM85* self, Short newOffset) {
if (newOffset < 0 || newOffset > 56) throw std::logic_error("newOffset < 0 || newOffset > 56");
if (newOffset != self->windowOffset + 1) throw std::logic_error("newOffset != windowOffset + 1");
- if (newOffset != determineCorrectOffset (self->lgK, self->numCoupons)) throw std::logic_error("newOffset is wrong");
+ if (newOffset != determineCorrectOffset(self->lgK, self->numCoupons)) throw std::logic_error("newOffset is wrong");
if (self->slidingWindow == NULL) throw std::logic_error("slidingWindow == NULL");
if (self->surprisingValueTable == NULL) throw std::logic_error("surprisingValueTable == NULL");
- Long k = (1LL << self->lgK);
+ const Long k = 1LL << self->lgK;
// Construct the full-sized bit matrix that corresponds to the sketch
- U64 * bitMatrix = bitMatrixOfSketch (self);
+ U64* bitMatrix = bitMatrixOfSketch(self);
if (bitMatrix == NULL) throw std::logic_error("bitMatrix == NULL");
// refresh the KXP register on every 8th window shift.
- if ((newOffset & 0x7) == 0) { refreshKXP (self, bitMatrix); }
+ if ((newOffset & 0x7) == 0) { refreshKXP(self, bitMatrix); }
- u32TableClear (self->surprisingValueTable); // the new number of surprises will be about the same
+ u32TableClear(self->surprisingValueTable); // the new number of surprises will be about the same
- u32Table * table = self->surprisingValueTable;
- U8 * window = self->slidingWindow;
- U64 maskForClearingWindow = (0xffULL << newOffset) ^ ALL64BITS;
- U64 maskForFlippingEarlyZone = (1ULL << newOffset) - 1;
+ u32Table* table = self->surprisingValueTable;
+ U8* window = self->slidingWindow;
+ const U64 maskForClearingWindow = (0xffULL << newOffset) ^ ALL64BITS;
+ const U64 maskForFlippingEarlyZone = (1ULL << newOffset) - 1;
U64 allSurprisesORed = 0;
- Long i = 0;
- for (i = 0; i < k; i++) {
+ for (Long i = 0; i < k; i++) {
U64 pattern = bitMatrix[i];
window[i] = (U8) ((pattern >> newOffset) & 0xff);
pattern &= maskForClearingWindow;
@@ -366,125 +328,103 @@ void modifyOffset (FM85 * self, Short newOffset) {
pattern ^= maskForFlippingEarlyZone;
allSurprisesORed |= pattern; // a cheap way to recalculate firstInterestingColumn
while (pattern != 0) {
- Short col = countTrailingZerosInUnsignedLong (pattern);
+ const Short col = countTrailingZerosInUnsignedLong(pattern);
pattern = pattern ^ (1ULL << col); // erase the 1.
- U32 rowCol = (i << 6) | col;
- Boolean isNovel = u32TableMaybeInsert (table, rowCol);
- if (isNovel != 1) throw std::logic_error("isNovel != 1");
+ const U32 rowCol = (i << 6) | col;
+ const bool isNovel = u32TableMaybeInsert(table, rowCol);
+ if (!isNovel) throw std::logic_error("isNovel != true");
}
}
- fm85free (bitMatrix);
+ fm85free(bitMatrix);
self->windowOffset = newOffset;
- self->firstInterestingColumn = countTrailingZerosInUnsignedLong (allSurprisesORed);
+ self->firstInterestingColumn = countTrailingZerosInUnsignedLong(allSurprisesORed);
if (self->firstInterestingColumn > newOffset) self->firstInterestingColumn = newOffset; // corner case
}
-
- // fprintf (stderr, "Changed the offset from %d to %d because C = %lld;",
- // (int) self->windowOffset, (int) newOffset, self->numCoupons);
- // fprintf (stderr, "\t[%d]\n", (int) self->firstInterestingColumn);
- // fflush (stderr);
-
-
-/*******************************************************/
// Call this whenever a new coupon has been collected.
-
-void updateHIP (FM85 * self, Short rowCol) {
- Long k = (1LL << self->lgK);
- Short col = (Short) (rowCol & 63);
- double oneOverP = ((double) k) / self->kxp;
+void updateHIP(FM85* self, Short rowCol) {
+ const Long k = 1LL << self->lgK;
+ const Short col = (Short) (rowCol & 63);
+ const double oneOverP = ((double) k) / self->kxp;
self->hipEstAccum += oneOverP;
self->hipErrAccum += ((oneOverP * oneOverP) - oneOverP);
self->kxp -= invPow2Tab[col+1]; // notice the "+1"
}
-/*******************************************************/
-
-double getHIPEstimate (FM85 * self) {
+double getHIPEstimate(const FM85* self) {
if (self->mergeFlag != 0) throw std::logic_error("tried to get HIP estimate of merged sketch");
return (self->hipEstAccum);
}
-/*******************************************************/
-
-void updateSparse (FM85 * self, U32 rowCol) {
- Long k = (1LL << self->lgK);
- Long c32pre = self->numCoupons << 5;
- if (c32pre >= 3*k) throw std::logic_error("c32pre >= 3*k"); // C < 3K/32, in other words flavor == SPARSE
+void updateSparse(FM85* self, U32 rowCol) {
+ const Long k = 1LL << self->lgK;
+ const Long c32pre = self->numCoupons << 5;
+ if (c32pre >= 3 * k) throw std::logic_error("c32pre >= 3 * k"); // C < 3K/32, in other words flavor == SPARSE
if (self->surprisingValueTable == NULL) throw std::logic_error("surprisingValueTable == NULL");
- Boolean isNovel = u32TableMaybeInsert (self->surprisingValueTable, rowCol);
+ bool isNovel = u32TableMaybeInsert(self->surprisingValueTable, rowCol);
if (isNovel) {
self->numCoupons += 1;
- updateHIP (self, rowCol);
- Long c32post = self->numCoupons << 5;
- if (c32post >= 3*k) { promoteSparseToWindowed (self); } // C >= 3K/32
+ updateHIP(self, rowCol);
+ const Long c32post = self->numCoupons << 5;
+ if (c32post >= 3 * k) { promoteSparseToWindowed(self); } // C >= 3K/32
}
}
-/*******************************************************/
-
// the flavor is HYBRID, PINNED, or SLIDING.
-void updateWindowed (FM85 * self, U32 rowCol) {
+void updateWindowed(FM85* self, U32 rowCol) {
if (self->windowOffset < 0 || self->windowOffset > 56) throw std::logic_error("windowOffset < 0 || windowOffset > 56");
- Long k = (1LL << self->lgK);
- Long c32pre = self->numCoupons << 5;
- if (c32pre < 3*k) throw std::logic_error("c32pre < 3*k"); // C < 3K/32, in other words flavor >= HYBRID
- Long c8pre = self->numCoupons << 3;
- Long w8pre = ((Long) self->windowOffset) << 3;
+ const Long k = 1LL << self->lgK;
+ const Long c32pre = self->numCoupons << 5;
+ if (c32pre < 3 * k) throw std::logic_error("c32pre < 3 * k"); // C < 3K/32, in other words flavor >= HYBRID
+ const Long c8pre = self->numCoupons << 3;
+ const Long w8pre = ((Long) self->windowOffset) << 3;
if (c8pre >= (27 + w8pre) * k) throw std::logic_error("c8pre is wrong"); // C < (K * 27/8) + (K * windowOffset)
- Boolean isNovel = 0;
+ bool isNovel = false;
Short col = (Short) (rowCol & 63);
if (col < self->windowOffset) { // track the surprising 0's "before" the window
- isNovel = u32TableMaybeDelete (self->surprisingValueTable, rowCol); // inverted logic
- }
- else if (col < self->windowOffset + 8) { // track the 8 bits inside the window
+ isNovel = u32TableMaybeDelete(self->surprisingValueTable, rowCol); // inverted logic
+ } else if (col < self->windowOffset + 8) { // track the 8 bits inside the window
if (col < self->windowOffset) throw std::logic_error("col < windowOffset");
- Long row = (Long) (rowCol >> 6);
- U8 oldBits = self->slidingWindow[row];
- U8 newBits = oldBits | (1 << (col - self->windowOffset));
+ const Long row = (Long) (rowCol >> 6);
+ const U8 oldBits = self->slidingWindow[row];
+ const U8 newBits = oldBits | (1 << (col - self->windowOffset));
if (newBits != oldBits) {
self->slidingWindow[row] = newBits;
- isNovel = 1;
+ isNovel = true;
}
- }
- else { // track the surprising 1's "after" the window
+ } else { // track the surprising 1's "after" the window
if (col < self->windowOffset + 8) throw std::logic_error("col < windowOffset + 8");
- isNovel = u32TableMaybeInsert (self->surprisingValueTable, rowCol); // normal logic
+ isNovel = u32TableMaybeInsert(self->surprisingValueTable, rowCol); // normal logic
}
if (isNovel) {
self->numCoupons += 1;
- updateHIP (self, rowCol);
- Long c8post = self->numCoupons << 3;
+ updateHIP(self, rowCol);
+ const Long c8post = self->numCoupons << 3;
if (c8post >= (27 + w8pre) * k) {
- modifyOffset (self, self->windowOffset + 1);
+ modifyOffset(self, self->windowOffset + 1);
if (self->windowOffset < 1 || self->windowOffset > 56) throw std::logic_error("windowOffset < 1 || windowOffset > 56");
- Long w8post = ((Long) self->windowOffset) << 3;
+ const Long w8post = ((Long) self->windowOffset) << 3;
if (c8post >= (27 + w8post) * k) throw std::logic_error("c8pre is wrong"); // C < (K * 27/8) + (K * windowOffset)
}
}
}
-/*******************************************************/
-
-void fm85RowColUpdate (FM85 * self, U32 rowCol) {
- Short col = (Short) (rowCol & 63);
+void fm85RowColUpdate(FM85* self, U32 rowCol) {
+ const Short col = (Short) (rowCol & 63);
if (col < self->firstInterestingColumn) { return; } // important speed optimization
if (self->isCompressed) std::logic_error("Cannot update a compressed sketch");
- Long c = self->numCoupons;
- if (c == 0) { promoteEmptyToSparse (self); }
- Long k = (1LL << self->lgK);
- if ((c << 5) < 3*k) { updateSparse (self, rowCol); }
- else { updateWindowed (self, rowCol); }
+ const Long c = self->numCoupons;
+ if (c == 0) { promoteEmptyToSparse(self); }
+ const Long k = 1LL << self->lgK;
+ if ((c << 5) < 3 * k) { updateSparse(self, rowCol); }
+ else { updateWindowed(self, rowCol); }
}
-
-void fm85Update (FM85 * self, U64 hash0, U64 hash1) {
- U32 rowCol = rowColFromTwoHashes (hash0, hash1, self->lgK);
- fm85RowColUpdate (self, rowCol);
+void fm85Update(FM85* self, U64 hash0, U64 hash1) {
+ fm85RowColUpdate(self, rowColFromTwoHashes(hash0, hash1, self->lgK));
}
-
diff --git a/cpc/src/fm85Compression.cpp b/cpc/src/fm85Compression.cpp
index 50e8640..dd98274 100644
--- a/cpc/src/fm85Compression.cpp
+++ b/cpc/src/fm85Compression.cpp
@@ -25,132 +25,101 @@
#include <stdexcept>
#include <new>
-/*********************************/
// The following material is in a separate file because it is so big.
-
#include "compressionData.data"
-/***************************************************************/
-/***************************************************************/
// Intentionally uses malloc instead of the custom allocator
// since it is for global initialization, not for allocating instances.
-U8 * makeInversePermutation (U8 * permu, int length) {
- U8 * inverse = (U8 *) malloc (((size_t) length) * sizeof(U8));
+U8* makeInversePermutation(const U8* permu, int length) {
+ U8* inverse = (U8*) malloc (((size_t) length) * sizeof(U8));
if (inverse == NULL) throw std::bad_alloc();
- int i;
- for (i = 0; i < length; i++) {
+ for (int i = 0; i < length; i++) {
inverse[permu[i]] = i;
}
- for (i = 0; i < length; i++) {
+ for (int i = 0; i < length; i++) {
if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
}
return inverse;
}
-/***************************************************************/
-/***************************************************************/
-
/* Given an encoding table that maps unsigned bytes to codewords
of length at most 12, this builds a size-4096 decoding table */
// The second argument is typically 256, but can be other values such as 65.
// Intentionally uses malloc instead of the custom allocator
// since it is for global initialization, not for allocating instances.
-U16 * makeDecodingTable (U16 * encodingTable, int numByteValues) {
- int byteValue;
- U16 * decodingTable = (U16 *) malloc (((size_t) 4096) * sizeof(U16));
+U16* makeDecodingTable(const U16* encodingTable, int numByteValues) {
+ U16* decodingTable = (U16*) malloc(((size_t) 4096) * sizeof(U16));
if (decodingTable == NULL) throw std::bad_alloc();
- for (byteValue=0; byteValue < numByteValues; byteValue++) {
- int encodingEntry = encodingTable [byteValue];
- int codeValue = encodingEntry & 0xfff;
- int codeLength = encodingEntry >> 12;
- int decodingEntry = (codeLength << 8) | byteValue;
- int garbageLength = 12 - codeLength;
- int numCopies = 1 << garbageLength;
- int garbageBits;
- for (garbageBits = 0; garbageBits < numCopies; garbageBits++) {
- int extendedCodeValue = codeValue | (garbageBits << codeLength);
+ for (int byteValue = 0; byteValue < numByteValues; byteValue++) {
+ const int encodingEntry = encodingTable [byteValue];
+ const int codeValue = encodingEntry & 0xfff;
+ const int codeLength = encodingEntry >> 12;
+ const int decodingEntry = (codeLength << 8) | byteValue;
+ const int garbageLength = 12 - codeLength;
+ const int numCopies = 1 << garbageLength;
+ for (int garbageBits = 0; garbageBits < numCopies; garbageBits++) {
+ const int extendedCodeValue = codeValue | (garbageBits << codeLength);
decodingTable[extendedCodeValue & 0xfff] = decodingEntry;
}
}
- return (decodingTable);
+ return decodingTable;
}
-/***************************************************************/
-/***************************************************************/
-
-void validateDecodingTable (U16 * decodingTable, U16 * encodingTable) {
- int decodeThis;
-
- for (decodeThis = 0; decodeThis < 4096; decodeThis++) {
-
- int tmpD = decodingTable[decodeThis];
- int decodedByte = tmpD & 0xff;
- int decodedLength = tmpD >> 8;
+void validateDecodingTable(const U16* decodingTable, const U16* encodingTable) {
+ for (int decodeThis = 0; decodeThis < 4096; decodeThis++) {
+ const int tmpD = decodingTable[decodeThis];
+ const int decodedByte = tmpD & 0xff;
+ const int decodedLength = tmpD >> 8;
- int tmpE = encodingTable[decodedByte];
- int encodedBitpattern = tmpE & 0xfff;
- int encodedLength = tmpE >> 12;
-
- // encodedBitpattern++; // uncomment this line to test the test
- // encodedLength++; // uncomment this line to test the test
+ const int tmpE = encodingTable[decodedByte];
+ const int encodedBitpattern = tmpE & 0xfff;
+ const int encodedLength = tmpE >> 12;
if (decodedLength != encodedLength) throw std::logic_error("decoded length error");
if (encodedBitpattern != (decodeThis & ((1 << decodedLength) - 1))) throw std::logic_error("bit pattern error");
}
-
}
-/***************************************************************/
-/***************************************************************/
+void makeTheDecodingTables() {
+ lengthLimitedUnaryDecodingTable65 = makeDecodingTable(lengthLimitedUnaryEncodingTable65, 65);
+ validateDecodingTable(lengthLimitedUnaryDecodingTable65, lengthLimitedUnaryEncodingTable65);
-void makeTheDecodingTables (void) {
- int i;
- lengthLimitedUnaryDecodingTable65 = makeDecodingTable (lengthLimitedUnaryEncodingTable65, 65);
- validateDecodingTable (lengthLimitedUnaryDecodingTable65, lengthLimitedUnaryEncodingTable65);
-
- for (i = 0; i < (16 + 6); i++) {
+ for (int i = 0; i < (16 + 6); i++) {
decodingTablesForHighEntropyByte[i] = makeDecodingTable(encodingTablesForHighEntropyByte[i], 256);
- validateDecodingTable (decodingTablesForHighEntropyByte[i], encodingTablesForHighEntropyByte[i]);
+ validateDecodingTable(decodingTablesForHighEntropyByte[i], encodingTablesForHighEntropyByte[i]);
}
- for (i = 0; i < 16; i++) {
- columnPermutationsForDecoding[i] = makeInversePermutation(columnPermutationsForEncoding[i],56);
+ for (int i = 0; i < 16; i++) {
+ columnPermutationsForDecoding[i] = makeInversePermutation(columnPermutationsForEncoding[i], 56);
}
-
- // fprintf (stderr, "tables okay\n"); fflush (stderr);
}
-void freeTheDecodingTables (void) {
- int i;
+void freeTheDecodingTables() {
free(lengthLimitedUnaryDecodingTable65);
- for (i = 0; i < (16 + 6); i++) {
+ for (int i = 0; i < (16 + 6); i++) {
free(decodingTablesForHighEntropyByte[i]);
}
- for (i = 0; i < 16; i++) {
+ for (int i = 0; i < 16; i++) {
free(columnPermutationsForDecoding[i]);
}
}
-/***************************************************************/
-/***************************************************************/
-
-static inline void writeUnary (U32 * compressedWords,
- Long * nextWordIndexPtr,
- U64 * bitbufPtr,
- int * bufbitsPtr,
- Long the_value)
+static inline void writeUnary(
+ U32* compressedWords,
+ Long* nextWordIndexPtr,
+ U64* bitbufPtr,
+ int* bufbitsPtr,
+ Long the_value)
{
- // printf("%ld writing\n", the_value);
-
if (compressedWords == NULL) throw std::logic_error("compressedWords == NULL");
if (nextWordIndexPtr == NULL) throw std::logic_error("nextWordIndexPtr == NULL");
if (bitbufPtr == NULL) throw std::logic_error("bitbufPtr == NULL");
if (bufbitsPtr == NULL) throw std::logic_error("bufbitsPtr == NULL");
Long nextWordIndex = *nextWordIndexPtr;
- U64 bitbuf = *bitbufPtr;
- int bufbits = *bufbitsPtr;
+ U64 bitbuf = *bitbufPtr;
+ int bufbits = *bufbitsPtr;
if (bufbits < 0 || bufbits > 31) throw std::out_of_range("bufbits out of range");
@@ -161,29 +130,25 @@ static inline void writeUnary (U32 * compressedWords,
// Here we output 16 zeros, but we don't need to physically write them into bitbuf
// because it already contains zeros in that region.
bufbits += 16; // Record the fact that 16 bits of output have occurred.
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
}
if (remaining < 0 || remaining > 15) throw std::out_of_range("remaining out of range");
- U64 theUnaryCode = 1ULL << remaining;
+ const U64 theUnaryCode = 1ULL << remaining;
bitbuf |= theUnaryCode << bufbits;
bufbits += (1 + remaining);
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
*nextWordIndexPtr = nextWordIndex;
*bitbufPtr = bitbuf;
*bufbitsPtr = bufbits;
-
}
-/***************************************************************/
-/***************************************************************/
-
-static inline Long readUnary (U32 * compressedWords,
- Long * nextWordIndexPtr,
- U64 * bitbufPtr,
- int * bufbitsPtr)
+static inline Long readUnary(const U32* compressedWords,
+ Long* nextWordIndexPtr,
+ U64* bitbufPtr,
+ int* bufbitsPtr)
{
if (compressedWords == NULL) throw std::logic_error("compressedWords == NULL");
if (nextWordIndexPtr == NULL) throw std::logic_error("nextWordIndexPtr == NULL");
@@ -195,17 +160,12 @@ static inline Long readUnary (U32 * compressedWords,
int bufbits = *bufbitsPtr;
Long subTotal = 0;
- readUnaryLoop:
-
- MAYBE_FILL_BITBUF(compressedWords,nextWordIndex,8); // ensure 8 bits in bit buffer
+ readUnaryLoop:
- // if (bufbits < 8) { // Prepare for an 8-bit peek into the bitstream.
- // bitbuf |= (((U64) compressedWords[nextWordIndex++]) << bufbits);
- // bufbits += 32;
- // }
+ MAYBE_FILL_BITBUF(compressedWords, nextWordIndex, 8); // ensure 8 bits in bit buffer
- int peek8 = bitbuf & 0xffULL; // These 8 bits include either all or part of the Unary codeword.
- int trailingZeros = byteTrailingZerosTable[peek8];
+ const int peek8 = bitbuf & 0xffULL; // These 8 bits include either all or part of the Unary codeword.
+ const int trailingZeros = byteTrailingZerosTable[peek8];
if (trailingZeros < 0 || trailingZeros > 8) throw std::out_of_range("trailingZeros out of range");
@@ -216,27 +176,21 @@ static inline Long readUnary (U32 * compressedWords,
goto readUnaryLoop;
}
- bufbits -= (1+trailingZeros);
- bitbuf >>= (1+trailingZeros);
+ bufbits -= (1 + trailingZeros);
+ bitbuf >>= (1 + trailingZeros);
*nextWordIndexPtr = nextWordIndex;
*bitbufPtr = bitbuf;
*bufbitsPtr = bufbits;
- // printf("%ld READING\n", subTotal+trailingZeros); fflush (stdout);
-
- return (subTotal+trailingZeros);
-
+ return subTotal + trailingZeros;
}
-/***************************************************************/
-/***************************************************************/
// This returns the number of compressedWords that were actually used.
// It is the caller's responsibility to ensure that the compressedWords array is long enough.
-
-Long lowLevelCompressBytes (U8 * byteArray, // input
- Long numBytesToEncode, // input
- U16 * encodingTable, // input
- U32 * compressedWords) { // output
+Long lowLevelCompressBytes(const U8* byteArray, // input
+ Long numBytesToEncode, // input
+ const U16* encodingTable, // input
+ U32* compressedWords) { // output
Long byteIndex = 0;
Long nextWordIndex = 0;
@@ -245,17 +199,17 @@ Long lowLevelCompressBytes (U8 * byteArray, // input
int bufbits = 0; /* number of bits currently in bitbuf; must be between 0 and 31 */
for (byteIndex = 0; byteIndex < numBytesToEncode; byteIndex++) {
- U64 codeInfo = (U64) encodingTable[byteArray[byteIndex]];
- U64 codeVal = codeInfo & 0xfff;
- int codeLen = codeInfo >> 12;
+ const U64 codeInfo = (U64) encodingTable[byteArray[byteIndex]];
+ const U64 codeVal = codeInfo & 0xfff;
+ const int codeLen = codeInfo >> 12;
bitbuf |= (codeVal << bufbits);
bufbits += codeLen;
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
}
-// Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek can't overrun its input.
+ // Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek can't overrun its input.
bufbits += 11;
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer.
if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
@@ -265,39 +219,28 @@ Long lowLevelCompressBytes (U8 * byteArray, // input
return nextWordIndex;
}
-/***************************************************************/
-/***************************************************************/
-
-void lowLevelUncompressBytes (U8 * byteArray, // output
- Long numBytesToDecode, // input (but refers to the output)
- U16 * decodingTable, // input
- U32 * compressedWords, // input
- Long numCompressedWords) { // input
+void lowLevelUncompressBytes(U8* byteArray, // output
+ Long numBytesToDecode, // input (but refers to the output)
+ const U16* decodingTable, // input
+ const U32* compressedWords, // input
+ Long numCompressedWords) { // input
Long byteIndex = 0;
Long wordIndex = 0;
U64 bitbuf = 0;
int bufbits = 0;
- // printf ("Y\n"); fflush (stdout);
-
if (byteArray == NULL) throw std::logic_error("byteArray == NULL");
if (decodingTable == NULL) throw std::logic_error("decodingTable == NULL");
if (compressedWords == NULL) throw std::logic_error("compressedWords == NULL");
for (byteIndex = 0; byteIndex < numBytesToDecode; byteIndex++) {
+ MAYBE_FILL_BITBUF(compressedWords, wordIndex, 12); // ensure 12 bits in bit buffer
- // if (bufbits < 12) { // Prepare for a 12-bit peek into the bitstream.
- // bitbuf |= (((U64) compressedWords[wordIndex++]) << bufbits);
- // bufbits += 32;
- // }
-
- MAYBE_FILL_BITBUF(compressedWords,wordIndex,12); // ensure 12 bits in bit buffer
-
- int peek12 = bitbuf & 0xfffULL; // These 12 bits will include an entire Huffman codeword.
- int lookup = decodingTable[peek12];
- int codeWordLength = lookup >> 8;
- U8 decodedByte = lookup & 0xff;
+ const int peek12 = bitbuf & 0xfffULL; // These 12 bits will include an entire Huffman codeword.
+ const int lookup = decodingTable[peek12];
+ const int codeWordLength = lookup >> 8;
+ const U8 decodedByte = lookup & 0xff;
byteArray[byteIndex] = decodedByte;
bitbuf >>= codeWordLength;
bufbits -= codeWordLength;
@@ -305,71 +248,64 @@ void lowLevelUncompressBytes (U8 * byteArray, // output
// Buffer over-run should be impossible unless there is a bug.
// However, we might as well check here.
if (wordIndex > numCompressedWords) throw std::logic_error("wordIndex > numCompressedWords");
-
- // printf ("X\n"); fflush (stdout);
-
- return;
}
-/***************************************************************/
-/***************************************************************/
-
// Here "pairs" refers to row/column pairs that specify
// the positions of surprising values in the bit matrix.
// returns the number of compressedWords actually used
-Long lowLevelCompressPairs (U32 * pairArray, // input
- Long numPairsToEncode, // input
- Long numBaseBits, // input
- U32 * compressedWords) { // output
+Long lowLevelCompressPairs(const U32* pairArray, // input
+ Long numPairsToEncode, // input
+ Long numBaseBits, // input
+ U32* compressedWords) { // output
Long pairIndex = 0;
Long nextWordIndex = 0;
U64 bitbuf = 0;
int bufbits = 0;
- Long golombLoMask = (1LL << numBaseBits) - 1;
+ const Long golombLoMask = (1LL << numBaseBits) - 1;
Long predictedRowIndex = 0;
Short predictedColIndex = 0;
for (pairIndex = 0; pairIndex < numPairsToEncode; pairIndex++) {
- U32 rowCol = pairArray[pairIndex];
- Long rowIndex = (Long) (rowCol >> 6);
- Short colIndex = (Short) (rowCol & 63);
+ const U32 rowCol = pairArray[pairIndex];
+ const Long rowIndex = (Long) (rowCol >> 6);
+ const Short colIndex = (Short) (rowCol & 63);
if (rowIndex != predictedRowIndex) { predictedColIndex = 0; }
if (rowIndex < predictedRowIndex) throw std::logic_error("rowIndex < predictedRowIndex");
if (colIndex < predictedColIndex) throw std::logic_error("colIndex < predictedColIndex");
- Long yDelta = rowIndex - predictedRowIndex;
- Short xDelta = colIndex - predictedColIndex;
+ const Long yDelta = rowIndex - predictedRowIndex;
+ const Short xDelta = colIndex - predictedColIndex;
predictedRowIndex = rowIndex;
predictedColIndex = colIndex + 1;
- U64 codeInfo = (U64) lengthLimitedUnaryEncodingTable65[xDelta];
- U64 codeVal = codeInfo & 0xfff;
- int codeLen = codeInfo >> 12;
+ const U64 codeInfo = (U64) lengthLimitedUnaryEncodingTable65[xDelta];
+ const U64 codeVal = codeInfo & 0xfff;
+ const int codeLen = codeInfo >> 12;
bitbuf |= (codeVal << bufbits);
bufbits += codeLen;
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
- Long golombLo = yDelta & golombLoMask;
- Long golombHi = yDelta >> numBaseBits;
+ const Long golombLo = yDelta & golombLoMask;
+ const Long golombHi = yDelta >> numBaseBits;
- writeUnary (compressedWords, &nextWordIndex, &bitbuf, &bufbits, golombHi);
+ writeUnary(compressedWords, &nextWordIndex, &bitbuf, &bufbits, golombHi);
bitbuf |= golombLo << bufbits;
bufbits += numBaseBits;
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
}
// Pad the bitstream so that the decompressor's 12-bit peek can't overrun its input.
Long padding = 10LL - numBaseBits; // should be 10LL
if (padding < 0) padding = 0;
bufbits += padding;
- MAYBE_FLUSH_BITBUF(compressedWords,nextWordIndex);
+ MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer.
if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
@@ -377,23 +313,19 @@ Long lowLevelCompressPairs (U32 * pairArray, // input
bitbuf = 0; bufbits = 0; // not really necessary
}
return nextWordIndex;
-
}
-/***************************************************************/
-/***************************************************************/
-
-void lowLevelUncompressPairs (U32 * pairArray, // output
- Long numPairsToDecode, // input (but refers to the output)
- Long numBaseBits, // input
- U32 * compressedWords, // input
- Long numCompressedWords) { // input
+void lowLevelUncompressPairs(U32* pairArray, // output
+ Long numPairsToDecode, // input (but refers to the output)
+ Long numBaseBits, // input
+ const U32* compressedWords, // input
+ Long numCompressedWords) { // input
Long pairIndex = 0;
Long wordIndex = 0;
U64 bitbuf = 0;
int bufbits = 0;
- Long golombLoMask = (1LL << numBaseBits) - 1;
+ const Long golombLoMask = (1LL << numBaseBits) - 1;
Long predictedRowIndex = 0;
Short predictedColIndex = 0;
@@ -404,52 +336,45 @@ void lowLevelUncompressPairs (U32 * pairArray, // output
// yDeltaLo (basebits)
for (pairIndex = 0; pairIndex < numPairsToDecode; pairIndex++) {
-
- MAYBE_FILL_BITBUF(compressedWords,wordIndex,12); // ensure 12 bits in bit buffer
- int peek12 = bitbuf & 0xfffULL;
- int lookup = lengthLimitedUnaryDecodingTable65[peek12];
- int codeWordLength = lookup >> 8;
- Short xDelta = lookup & 0xff;
+ MAYBE_FILL_BITBUF(compressedWords, wordIndex, 12); // ensure 12 bits in bit buffer
+ const int peek12 = bitbuf & 0xfffULL;
+ const int lookup = lengthLimitedUnaryDecodingTable65[peek12];
+ const int codeWordLength = lookup >> 8;
+ const Short xDelta = lookup & 0xff;
bitbuf >>= codeWordLength;
bufbits -= codeWordLength;
- Long golombHi = readUnary (compressedWords, &wordIndex, &bitbuf, &bufbits);
+ const Long golombHi = readUnary(compressedWords, &wordIndex, &bitbuf, &bufbits);
- MAYBE_FILL_BITBUF(compressedWords,wordIndex,numBaseBits); // ensure numBaseBits in bit buffer
- Long golombLo = bitbuf & golombLoMask;
+ MAYBE_FILL_BITBUF(compressedWords, wordIndex, numBaseBits); // ensure numBaseBits in bit buffer
+ const Long golombLo = bitbuf & golombLoMask;
bitbuf >>= numBaseBits;
bufbits -= numBaseBits;
- Long yDelta = (golombHi << numBaseBits) | golombLo;
+ const Long yDelta = (golombHi << numBaseBits) | golombLo;
// Now that we have yDelta and xDelta, we can compute the pair's row and column.
if (yDelta > 0) { predictedColIndex = 0; }
- Long rowIndex = predictedRowIndex + yDelta;
- Short colIndex = predictedColIndex + xDelta;
- U32 rowCol = (rowIndex << 6) | colIndex;
+ const Long rowIndex = predictedRowIndex + yDelta;
+ const Short colIndex = predictedColIndex + xDelta;
+ const U32 rowCol = (rowIndex << 6) | colIndex;
pairArray[pairIndex] = rowCol;
predictedRowIndex = rowIndex;
predictedColIndex = colIndex + 1;
-
}
if (wordIndex > numCompressedWords) throw std::logic_error("wordIndex > numCompressedWords"); // check for buffer over-run
}
-
-/***************************************************************/
-/***************************************************************/
-
-Long safeLengthForCompressedPairBuf (Long k, Long numPairs, Long numBaseBits) {
+Long safeLengthForCompressedPairBuf(Long k, Long numPairs, Long numBaseBits) {
if (numPairs <= 0) throw std::invalid_argument("numPairs <= 0");
// Long ybits = k + numPairs; // simpler and safer UB
// The following tighter UB on ybits is based on page 198
// of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
// Notice that if numBaseBits == 0 it coincides with (k + numPairs).
- Long ybits = numPairs * (1LL + numBaseBits) + (k >> numBaseBits);
- Long xbits = 12 * numPairs;
+ const Long ybits = numPairs * (1LL + numBaseBits) + (k >> numBaseBits);
+ const Long xbits = 12 * numPairs;
Long padding = 10LL - numBaseBits;
if (padding < 0) padding = 0;
- Long bits = xbits + ybits + padding;
- return (divideLongsRoundingUp(bits, 32));
+ return divideLongsRoundingUp(xbits + ybits + padding, 32);
}
// Explanation of padding: we write
@@ -459,172 +384,136 @@ Long safeLengthForCompressedPairBuf (Long k, Long numPairs, Long numBaseBits) {
// So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
// so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
-/***************************************************************/
-
-Long safeLengthForCompressedWindowBuf (Long k) { // measured in 32-bit words
+Long safeLengthForCompressedWindowBuf(Long k) { // measured in 32-bit words
Long bits = 12 * k + 11; // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
- return (divideLongsRoundingUp(bits, 32));
+ return divideLongsRoundingUp(bits, 32);
}
-/***************************************************************/
-/***************************************************************/
-
-Short determinePseudoPhase (Short lgK, Long c) {
- Long k = (1LL << lgK);
- // This midrange logic produces pseudo-phases. They are used to select encoding tables.
- // The thresholds were chosen by hand after looking at plots of measured compression.
+Short determinePseudoPhase(Short lgK, Long c) {
+ const Long k = 1LL << lgK;
+ // This mid-range logic produces pseudo-phases. They are used to select encoding tables.
+ // The thresholds were chosen by hand after looking at plots of measured compression.
if (1000 * c < 2375 * k) {
- if ( 4 * c < 3 * k) return ( 16 + 0 ); // midrange table
- else if ( 10 * c < 11 * k) return ( 16 + 1 ); // midrange table
- else if ( 100 * c < 132 * k) return ( 16 + 2 ); // midrange table
- else if ( 3 * c < 5 * k) return ( 16 + 3 ); // midrange table
- else if (1000 * c < 1965 * k) return ( 16 + 4 ); // midrange table
- else if (1000 * c < 2275 * k) return ( 16 + 5 ); // midrange table
- else return ( 6 ); // steady-state table employed before its actual phase
- }
- else { // This steady-state logic produces true phases. They are used to select
+ if ( 4 * c < 3 * k) return 16 + 0; // mid-range table
+ else if ( 10 * c < 11 * k) return 16 + 1; // mid-range table
+ else if ( 100 * c < 132 * k) return 16 + 2; // mid-range table
+ else if ( 3 * c < 5 * k) return 16 + 3; // mid-range table
+ else if (1000 * c < 1965 * k) return 16 + 4; // mid-range table
+ else if (1000 * c < 2275 * k) return 16 + 5; // mid-range table
+ else return 6; // steady-state table employed before its actual phase
+ } else { // This steady-state logic produces true phases. They are used to select
// encoding tables, and also column permutations for the "Sliding" flavor.
if (lgK < 4) throw std::logic_error("lgK < 4");
- Long tmp = c >> (lgK - 4);
- Long phase = tmp & 15;
+ const Long tmp = c >> (lgK - 4);
+ const Long phase = tmp & 15;
if (phase < 0 || phase >= 16) throw std::out_of_range("wrong phase");
- return ((Short) phase);
+ return (Short) phase;
}
}
-/***************************************************************/
-/***************************************************************/
-
-void compressTheWindow (FM85 * target, FM85 * source) {
- Long k = (1LL << source->lgK);
- Long windowBufLen = safeLengthForCompressedWindowBuf (k);
- U32 * windowBuf = (U32 *) fm85alloc ((size_t) (windowBufLen * sizeof(U32)));
+void compressTheWindow(FM85* target, FM85* source) {
+ const Long k = 1LL << source->lgK;
+ const Long windowBufLen = safeLengthForCompressedWindowBuf (k);
+ U32* windowBuf = (U32 *) fm85alloc ((size_t) (windowBufLen * sizeof(U32)));
if (windowBuf == NULL) throw std::logic_error("windowBuf == NULL");
- Short pseudoPhase = determinePseudoPhase (source->lgK, source->numCoupons);
- target->cwLength = lowLevelCompressBytes (source->slidingWindow, k,
+ const Short pseudoPhase = determinePseudoPhase(source->lgK, source->numCoupons);
+ target->cwLength = lowLevelCompressBytes(source->slidingWindow, k,
encodingTablesForHighEntropyByte[pseudoPhase],
windowBuf);
// At this point we free the unused portion of the compression output buffer.
// Note: realloc caused strange timing spikes for lgK = 11 and 12.
- U32 * shorterBuf = (U32 *) fm85alloc (((size_t) target->cwLength) * sizeof(U32));
+ U32* shorterBuf = (U32*) fm85alloc(((size_t) target->cwLength) * sizeof(U32));
if (shorterBuf == NULL) throw std::bad_alloc();
- memcpy ((void *) shorterBuf, (void *) windowBuf, ((size_t) target->cwLength) * sizeof(U32));
- fm85free (windowBuf);
+ memcpy((void*) shorterBuf, (void*) windowBuf, ((size_t) target->cwLength) * sizeof(U32));
+ fm85free(windowBuf);
target->compressedWindow = shorterBuf;
-
- return;
}
-/***************************************************************/
-/***************************************************************/
-
-void uncompressTheWindow (FM85 * target, FM85 * source) {
- Long k = (1LL << source->lgK);
- U8 * window = (U8 *) fm85alloc ((size_t) (k * sizeof(U8)));
+void uncompressTheWindow(FM85* target, FM85* source) {
+ const Long k = 1LL << source->lgK;
+ U8* window = (U8*) fm85alloc((size_t) (k * sizeof(U8)));
if (window == NULL) throw std::bad_alloc();
// zeroing not needed here (unlike the Hybrid Flavor)
if (target->slidingWindow != NULL) throw std::logic_error("target->slidingWindow != NULL");
target->slidingWindow = window;
- Short pseudoPhase = determinePseudoPhase (source->lgK, source->numCoupons);
+ const Short pseudoPhase = determinePseudoPhase(source->lgK, source->numCoupons);
if (source->compressedWindow == NULL) throw std::logic_error("source->compressedWindow == NULL");
- lowLevelUncompressBytes (target->slidingWindow, k,
+ lowLevelUncompressBytes(target->slidingWindow, k,
decodingTablesForHighEntropyByte[pseudoPhase],
source->compressedWindow,
source->cwLength);
- return;
}
-/***************************************************************/
-/***************************************************************/
-
-void compressTheSurprisingValues (FM85 * target, FM85 * source, U32 * pairs, Long numPairs) {
+void compressTheSurprisingValues(FM85* target, FM85* source, U32* pairs, Long numPairs) {
if (numPairs <= 0) throw std::invalid_argument("numPairs <= 0");
target->numCompressedSurprisingValues = numPairs;
- Long k = (1LL << source->lgK);
- Long numBaseBits = golombChooseNumberOfBaseBits (k + numPairs, numPairs);
- Long pairBufLen = safeLengthForCompressedPairBuf (k, numPairs, numBaseBits);
- U32 * pairBuf = (U32 *) fm85alloc ((size_t) (pairBufLen * sizeof(U32)));
+ const Long k = 1LL << source->lgK;
+ const Long numBaseBits = golombChooseNumberOfBaseBits(k + numPairs, numPairs);
+ const Long pairBufLen = safeLengthForCompressedPairBuf(k, numPairs, numBaseBits);
+ U32* pairBuf = (U32*) fm85alloc((size_t) (pairBufLen * sizeof(U32)));
if (pairBuf == NULL) throw std::bad_alloc();
- target->csvLength = lowLevelCompressPairs (pairs, numPairs, numBaseBits, pairBuf);
+ target->csvLength = lowLevelCompressPairs(pairs, numPairs, numBaseBits, pairBuf);
// At this point we free the unused portion of the compression output buffer.
// Note: realloc caused strange timing spikes for lgK = 11 and 12.
- U32 * shorterBuf = (U32 *) fm85alloc (((size_t) target->csvLength) * sizeof(U32));
+ U32* shorterBuf = (U32*) fm85alloc(((size_t) target->csvLength) * sizeof(U32));
if (shorterBuf == NULL) throw std::bad_alloc();
- memcpy ((void *) shorterBuf, (void *) pairBuf, ((size_t) target->csvLength) * sizeof(U32));
- fm85free (pairBuf);
+ memcpy((void*) shorterBuf, (void*) pairBuf, ((size_t) target->csvLength) * sizeof(U32));
+ fm85free(pairBuf);
target->compressedSurprisingValues = shorterBuf;
}
-/***************************************************************/
-/***************************************************************/
// allocates and returns an array of uncompressed pairs.
// the length of this array is known to the source sketch.
-
-U32 * uncompressTheSurprisingValues (FM85 * source) {
- if (source->isCompressed != 1) throw std::logic_error("not compressed");
- Long k = (1LL << source->lgK);
- Long numPairs = source->numCompressedSurprisingValues;
+U32* uncompressTheSurprisingValues(FM85* source) {
+ if (!source->isCompressed) throw std::logic_error("not compressed");
+ const Long k = 1LL << source->lgK;
+ const Long numPairs = source->numCompressedSurprisingValues;
if (numPairs <= 0) throw std::logic_error("numPairs <= 0");
- U32 * pairs = (U32 *) fm85alloc ((size_t) numPairs * sizeof(U32));
+ U32 * pairs = (U32 *) fm85alloc((size_t) numPairs * sizeof(U32));
if (pairs == NULL) throw std::bad_alloc();
- Long numBaseBits = golombChooseNumberOfBaseBits (k + numPairs, numPairs);
+ Long numBaseBits = golombChooseNumberOfBaseBits(k + numPairs, numPairs);
lowLevelUncompressPairs(pairs, numPairs, numBaseBits,
source->compressedSurprisingValues, source->csvLength);
- return (pairs);
+ return pairs;
}
-/***************************************************************/
-/***************************************************************/
-
-void compressEmptyFlavor (FM85 * target, FM85 * source) {
+void compressEmptyFlavor(FM85* target, FM85* source) {
return; // nothing to do, so just return
}
-/***************************************************************/
-
-void uncompressEmptyFlavor (FM85 * target, FM85 * source) {
+void uncompressEmptyFlavor(FM85* target, FM85* source) {
return; // nothing to do, so just return
}
-/***************************************************************/
-/***************************************************************/
-
-void compressSparseFlavor (FM85 * target, FM85 * source) {
+void compressSparseFlavor(FM85* target, FM85* source) {
if (source->slidingWindow != NULL) throw std::logic_error("source->slidingWindow != NULL"); // there is no window to compress
Long numPairs = 0;
- U32 * pairs = u32TableUnwrappingGetItems (source->surprisingValueTable, &numPairs);
- introspectiveInsertionSort(pairs, 0, numPairs-1);
- compressTheSurprisingValues (target, source, pairs, numPairs);
- if (pairs) fm85free (pairs);
- return;
+ U32* pairs = u32TableUnwrappingGetItems(source->surprisingValueTable, &numPairs);
+ introspectiveInsertionSort(pairs, 0, numPairs - 1);
+ compressTheSurprisingValues(target, source, pairs, numPairs);
+ if (pairs) fm85free(pairs);
}
-/***************************************************************/
-
-void uncompressSparseFlavor (FM85 * target, FM85 * source) {
+void uncompressSparseFlavor(FM85* target, FM85* source) {
if (source->compressedWindow != NULL) throw std::logic_error("source->compressedWindow != NULL");
if (source->compressedSurprisingValues == NULL) throw std::logic_error("source->compressedSurprisingValues == NULL");
- U32 * pairs = uncompressTheSurprisingValues (source);
- Long numPairs = source->numCompressedSurprisingValues;
- u32Table * table = makeU32TableFromPairsArray (pairs, numPairs, source->lgK);
+ U32* pairs = uncompressTheSurprisingValues (source);
+ const Long numPairs = source->numCompressedSurprisingValues;
+ u32Table* table = makeU32TableFromPairsArray(pairs, numPairs, source->lgK);
target->surprisingValueTable = table;
- fm85free (pairs);
- return;
+ fm85free(pairs);
}
-/***************************************************************/
-/***************************************************************/
// The empty space that this leaves at the beginning of the output array
// will be filled in later by the caller.
-
-U32 * trickyGetPairsFromWindow (U8 * window, Long k, Long numPairsToGet, Long emptySpace) {
- Long outputLength = emptySpace + numPairsToGet;
- U32 * pairs = (U32 *) fm85alloc ((size_t) (outputLength * sizeof(U32)));
+U32* trickyGetPairsFromWindow(U8* window, Long k, Long numPairsToGet, Long emptySpace) {
+ const Long outputLength = emptySpace + numPairsToGet;
+ U32* pairs = (U32*) fm85alloc((size_t) (outputLength * sizeof(U32)));
if (pairs == NULL) throw std::bad_alloc();
Long rowIndex = 0;
Long pairIndex = emptySpace;
@@ -638,66 +527,56 @@ U32 * trickyGetPairsFromWindow (U8 * window, Long k, Long numPairsToGet, Long em
}
}
if (pairIndex != outputLength) throw std::logic_error("pairIndex != outputLength");
- return (pairs);
+ return pairs;
}
-/***************************************************************/
// This is complicated because it effectively builds a Sparse version
// of a Pinned sketch before compressing it. Hence the name Hybrid.
-
-void compressHybridFlavor (FM85 * target, FM85 * source) {
- // Long i;
- Long k = (1LL << source->lgK);
+void compressHybridFlavor(FM85* target, FM85* source) {
+ const Long k = 1LL << source->lgK;
Long numPairsFromTable = 0;
- U32 * pairsFromTable = u32TableUnwrappingGetItems (source->surprisingValueTable, &numPairsFromTable);
+ U32* pairsFromTable = u32TableUnwrappingGetItems(source->surprisingValueTable, &numPairsFromTable);
introspectiveInsertionSort(pairsFromTable, 0, numPairsFromTable-1);
if (source->slidingWindow == NULL) throw std::logic_error("source->slidingWindow == NULL");
if (source->windowOffset != 0) throw std::logic_error("source->windowOffset != 0");
- Long numPairsFromArray = source->numCoupons - numPairsFromTable; // because the window offset is zero
+ const Long numPairsFromArray = source->numCoupons - numPairsFromTable; // because the window offset is zero
- U32 * allPairs = trickyGetPairsFromWindow (source->slidingWindow, k, numPairsFromArray, numPairsFromTable);
+ U32* allPairs = trickyGetPairsFromWindow(source->slidingWindow, k, numPairsFromArray, numPairsFromTable);
- u32Merge (pairsFromTable, 0, numPairsFromTable,
+ u32Merge(pairsFromTable, 0, numPairsFromTable,
allPairs, numPairsFromTable, numPairsFromArray,
allPairs, 0); // note the overlapping subarray trick
- // for (i = 0; i < source->numCoupons-1; i++) { assert (allPairs[i] < allPairs[i+1]); }
-
- compressTheSurprisingValues (target, source, allPairs, source->numCoupons);
- if (pairsFromTable) fm85free (pairsFromTable);
- fm85free (allPairs);
- return;
+ compressTheSurprisingValues(target, source, allPairs, source->numCoupons);
+ if (pairsFromTable) fm85free(pairsFromTable);
+ fm85free(allPairs);
}
-/***************************************************************/
-
-void uncompressHybridFlavor (FM85 * target, FM85 * source) {
+void uncompressHybridFlavor(FM85* target, FM85* source) {
if (source->compressedWindow != NULL) throw std::logic_error("source->compressedWindow != NULL");
if (source->compressedSurprisingValues == NULL) throw std::logic_error("source->compressedSurprisingValues == NULL");
- U32 * pairs = uncompressTheSurprisingValues (source);
- Long numPairs = source->numCompressedSurprisingValues;
+ U32* pairs = uncompressTheSurprisingValues(source);
+ const Long numPairs = source->numCompressedSurprisingValues;
// In the hybrid flavor, some of these pairs actually
// belong in the window, so we will separate them out,
// moving the "true" pairs to the bottom of the array.
- Long k = (1LL << source->lgK);
+ const Long k = 1LL << source->lgK;
- U8 * window = (U8 *) fm85alloc ((size_t) (k * sizeof(U8)));
+ U8* window = (U8*) fm85alloc((size_t) (k * sizeof(U8)));
if (window == NULL) throw std::bad_alloc();
- memset ((void *) window, 0, (size_t) k); // important: zero the memory
+ memset((void*) window, 0, (size_t) k); // important: zero the memory
Long nextTruePair = 0;
- Long i;
- for (i = 0; i < numPairs; i++) {
- U32 rowCol = pairs[i];
+ for (Long i = 0; i < numPairs; i++) {
+ const U32 rowCol = pairs[i];
if (rowCol == ALL32BITS) throw std::logic_error("rowCol == ALL32BITS");
- Short col = (Short) (rowCol & 63);
+ const Short col = (Short) (rowCol & 63);
if (col < 8) {
- Long row = (Long) (rowCol >> 6);
+ const Long row = (Long) (rowCol >> 6);
window[row] |= (1 << col); // set the window bit
- }
- else {
+ } else {
pairs[nextTruePair++] = rowCol; // move true pair down
}
}
@@ -705,101 +584,80 @@ void uncompressHybridFlavor (FM85 * target, FM85 * source) {
if (source->windowOffset != 0) throw std::logic_error("source->windowOffset != 0");
target->windowOffset = 0;
- u32Table * table = makeU32TableFromPairsArray (pairs,
- nextTruePair,
- source->lgK);
+ u32Table* table = makeU32TableFromPairsArray(pairs, nextTruePair, source->lgK);
target->surprisingValueTable = table;
target->slidingWindow = window;
- fm85free (pairs);
-
- return;
+ fm85free(pairs);
}
-/***************************************************************/
-/***************************************************************/
-
-void compressPinnedFlavor (FM85 * target, FM85 * source) {
-
- compressTheWindow (target, source);
-
- Long numPairs = source->surprisingValueTable->numItems;
+void compressPinnedFlavor(FM85* target, FM85* source) {
+ compressTheWindow(target, source);
+ const Long numPairs = source->surprisingValueTable->numItems;
if (numPairs > 0) {
Long chkNumPairs;
- U32 * pairs = u32TableUnwrappingGetItems (source->surprisingValueTable, &chkNumPairs);
+ U32* pairs = u32TableUnwrappingGetItems(source->surprisingValueTable, &chkNumPairs);
if (chkNumPairs != numPairs) throw std::logic_error("chkNumPairs != numPairs");
// Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
// of each rowCol pair, and because no column index is less than 8 for a "Pinned" sketch,
// I believe we can simply subtract 8 from the pairs themselves.
- Long i; // shift the columns over by 8 positions before compressing (because of the window)
- for (i = 0; i < numPairs; i++) {
+ // shift the columns over by 8 positions before compressing (because of the window)
+ for (Long i = 0; i < numPairs; i++) {
if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
pairs[i] -= 8;
}
introspectiveInsertionSort(pairs, 0, numPairs-1);
- compressTheSurprisingValues (target, source, pairs, numPairs);
- if (pairs) fm85free (pairs);
+ compressTheSurprisingValues(target, source, pairs, numPairs);
+ if (pairs) fm85free(pairs);
}
- return;
}
-/***************************************************************/
-
-void uncompressPinnedFlavor (FM85 * target, FM85 * source) {
+void uncompressPinnedFlavor(FM85* target, FM85* source) {
if (source->compressedWindow == NULL) throw std::logic_error("source->compressedWindow == NULL");
- uncompressTheWindow (target, source);
- Long numPairs = source->numCompressedSurprisingValues;
+ uncompressTheWindow(target, source);
+ const Long numPairs = source->numCompressedSurprisingValues;
if (numPairs == 0) {
- target->surprisingValueTable = u32TableMake (2, 6 + source->lgK);
- }
- else {
+ target->surprisingValueTable = u32TableMake(2, 6 + source->lgK);
+ } else {
if (numPairs <= 0) throw std::logic_error("numPairs <= 0");
if (source->compressedSurprisingValues == NULL) throw std::logic_error("source->compressedSurprisingValues == NULL");
- U32 * pairs = uncompressTheSurprisingValues (source);
- Long i; // undo the compressor's 8-column shift
- for (i = 0; i < numPairs; i++) {
+ U32* pairs = uncompressTheSurprisingValues(source);
+ // undo the compressor's 8-column shift
+ for (Long i = 0; i < numPairs; i++) {
if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
pairs[i] += 8;
}
- u32Table * table = makeU32TableFromPairsArray (pairs, numPairs, source->lgK);
+ u32Table* table = makeU32TableFromPairsArray(pairs, numPairs, source->lgK);
target->surprisingValueTable = table;
- fm85free (pairs);
+ fm85free(pairs);
}
- return;
}
-/***************************************************************/
-/***************************************************************/
// Complicated by the existence of both a left fringe and a right fringe.
-
-void compressSlidingFlavor (FM85 * target, FM85 * source) {
-
- compressTheWindow (target, source);
-
- Long numPairs = source->surprisingValueTable->numItems;
-
+void compressSlidingFlavor(FM85* target, FM85* source) {
+ compressTheWindow(target, source);
+ const Long numPairs = source->surprisingValueTable->numItems;
if (numPairs > 0) {
Long chkNumPairs;
- U32 * pairs = u32TableUnwrappingGetItems (source->surprisingValueTable, &chkNumPairs);
+ U32* pairs = u32TableUnwrappingGetItems(source->surprisingValueTable, &chkNumPairs);
if (chkNumPairs != numPairs) throw std::logic_error("chkNumPairs != numPairs");
// Here we apply a complicated transformation to the column indices, which
// changes the implied ordering of the pairs, so we must do it before sorting.
- Short pseudoPhase = determinePseudoPhase (source->lgK, source->numCoupons); // NB
+ const Short pseudoPhase = determinePseudoPhase(source->lgK, source->numCoupons); // NB
if (pseudoPhase >= 16) throw std::logic_error("pseudoPhase >= 16");
- U8 * permutation = columnPermutationsForEncoding[pseudoPhase];
+ const U8* permutation = columnPermutationsForEncoding[pseudoPhase];
Short offset = source->windowOffset;
if (offset <= 0 || offset > 56) throw std::out_of_range("offset out of range");
- Long i;
- for (i = 0; i < numPairs; i++) {
- U32 rowCol = pairs[i];
- Long row = (Long) (rowCol >> 6);
+ for (Long i = 0; i < numPairs; i++) {
+ const U32 rowCol = pairs[i];
+ const Long row = (Long) (rowCol >> 6);
Short col = (Short) (rowCol & 63);
// first rotate the columns into a canonical configuration: new = ((old - (offset+8)) + 64) mod 64
col = (col + 56 - offset) & 63;
@@ -809,39 +667,33 @@ void compressSlidingFlavor (FM85 * target, FM85 * source) {
pairs[i] = (U32) ((row << 6) | col);
}
- introspectiveInsertionSort(pairs, 0, numPairs-1);
- compressTheSurprisingValues (target, source, pairs, numPairs);
- if (pairs) fm85free (pairs);
+ introspectiveInsertionSort(pairs, 0, numPairs - 1);
+ compressTheSurprisingValues(target, source, pairs, numPairs);
+ if (pairs) fm85free(pairs);
}
- return;
}
-/***************************************************************/
-
-void uncompressSlidingFlavor (FM85 * target, FM85 * source) {
+void uncompressSlidingFlavor(FM85* target, FM85* source) {
if (source->compressedWindow == NULL) throw std::logic_error("source->compressedWindow == NULL");
- uncompressTheWindow (target, source);
-
- Long numPairs = source->numCompressedSurprisingValues;
+ uncompressTheWindow(target, source);
+ const Long numPairs = source->numCompressedSurprisingValues;
if (numPairs == 0) {
- target->surprisingValueTable = u32TableMake (2, 6 + source->lgK);
- }
- else {
+ target->surprisingValueTable = u32TableMake(2, 6 + source->lgK);
+ } else {
if (numPairs <= 0) throw std::logic_error("numPairs <= 0");
if (source->compressedSurprisingValues == NULL) throw std::logic_error("source->compressedSurprisingValues == NULL");
- U32 * pairs = uncompressTheSurprisingValues (source);
+ U32* pairs = uncompressTheSurprisingValues(source);
Short pseudoPhase = determinePseudoPhase (source->lgK, source->numCoupons); // NB
if (pseudoPhase >= 16) throw std::logic_error("pseudoPhase >= 16");
- U8 * permutation = columnPermutationsForDecoding[pseudoPhase];
+ const U8* permutation = columnPermutationsForDecoding[pseudoPhase];
- Short offset = source->windowOffset;
+ const Short offset = source->windowOffset;
if (offset <= 0 || offset > 56) throw std::out_of_range("offset out of range");
- Long i;
- for (i = 0; i < numPairs; i++) {
- U32 rowCol = pairs[i];
- Long row = (Long) (rowCol >> 6);
+ for (Long i = 0; i < numPairs; i++) {
+ const U32 rowCol = pairs[i];
+ const Long row = (Long) (rowCol >> 6);
Short col = (Short) (rowCol & 63);
// first undo the permutation
col = permutation[col];
@@ -850,23 +702,17 @@ void uncompressSlidingFlavor (FM85 * target, FM85 * source) {
pairs[i] = (U32) ((row << 6) | col);
}
- u32Table * table = makeU32TableFromPairsArray (pairs, numPairs, source->lgK);
+ u32Table* table = makeU32TableFromPairsArray(pairs, numPairs, source->lgK);
target->surprisingValueTable = table;
fm85free (pairs);
}
- return;
}
-/***************************************************************/
-/***************************************************************/
-
-// Note: in the final system, compressed and uncompressed sketches will have different types
+FM85* fm85Compress(FM85* source) {
+ if (source->isCompressed) throw std::invalid_argument("already compressed");
-FM85 * fm85Compress (FM85 * source) {
- if (source->isCompressed != 0) throw std::invalid_argument("already compressed");
-
- FM85 * target = (FM85 *) fm85alloc (sizeof(FM85));
+ FM85* target = (FM85*) fm85alloc(sizeof(FM85));
if (target == NULL) throw std::bad_alloc();
target->lgK = source->lgK;
@@ -878,7 +724,7 @@ FM85 * fm85Compress (FM85 * source) {
target->hipEstAccum = source->hipEstAccum;
target->hipErrAccum = source->hipErrAccum;
- target->isCompressed = 1;
+ target->isCompressed = true;
// initialize the variables that belong in a compressed sketch
target->numCompressedSurprisingValues = 0;
@@ -893,26 +739,24 @@ FM85 * fm85Compress (FM85 * source) {
enum flavorType flavor = determineSketchFlavor(source);
switch (flavor) {
- case EMPTY: compressEmptyFlavor (target, source); break;
+ case EMPTY: compressEmptyFlavor(target, source); break;
case SPARSE:
- compressSparseFlavor (target, source);
+ compressSparseFlavor(target, source);
if (target->compressedWindow != NULL) throw std::logic_error("target->compressedWindow != NULL");
if (target->compressedSurprisingValues == NULL) throw std::logic_error("target->compressedSurprisingValues == NULL");
break;
case HYBRID:
- compressHybridFlavor (target, source);
+ compressHybridFlavor(target, source);
if (target->compressedWindow != NULL) throw std::logic_error("target->compressedWindow != NULL");
if (target->compressedSurprisingValues == NULL) throw std::logic_error("target->compressedSurprisingValues == NULL");
break;
case PINNED:
- compressPinnedFlavor (target, source);
+ compressPinnedFlavor(target, source);
if (target->compressedWindow == NULL) throw std::logic_error("target->compressedWindow != NULL");
- // assert (target->compressedSurprisingValues != NULL);
break;
case SLIDING:
compressSlidingFlavor(target, source);
if (target->compressedWindow == NULL) throw std::logic_error("target->compressedWindow == NULL");
- // assert (target->compressedSurprisingValues != NULL);
break;
default: throw std::logic_error("Unknown sketch flavor");
}
@@ -920,14 +764,10 @@ FM85 * fm85Compress (FM85 * source) {
return target;
}
-/***************************************************************/
-/***************************************************************/
-// Note: in the final system, compressed and uncompressed sketches will have different types
-
-FM85 * fm85Uncompress (FM85 * source) {
- if (source->isCompressed != 1) throw std::invalid_argument("not compressed");
+FM85* fm85Uncompress(FM85* source) {
+ if (!source->isCompressed) throw std::invalid_argument("not compressed");
- FM85 * target = (FM85 *) fm85alloc (sizeof(FM85));
+ FM85* target = (FM85*) fm85alloc(sizeof(FM85));
if (target == NULL) throw std::bad_alloc();
target->lgK = source->lgK;
@@ -939,17 +779,17 @@ FM85 * fm85Uncompress (FM85 * source) {
target->hipEstAccum = source->hipEstAccum;
target->hipErrAccum = source->hipErrAccum;
- target->isCompressed = 0;
+ target->isCompressed = false;
// initialize the variables that belong in an updateable sketch
- target->slidingWindow = (U8 *) NULL;
- target->surprisingValueTable = (u32Table *) NULL;
+ target->slidingWindow = (U8*) NULL;
+ target->surprisingValueTable = (u32Table*) NULL;
// clear the variables that don't belong in an updateable sketch
target->numCompressedSurprisingValues = 0;
- target->compressedSurprisingValues = (U32 *) NULL;
+ target->compressedSurprisingValues = (U32*) NULL;
target->csvLength = 0;
- target->compressedWindow = (U32 *) NULL;
+ target->compressedWindow = (U32*) NULL;
target->cwLength = 0;
enum flavorType flavor = determineSketchFlavor(source);
diff --git a/cpc/src/fm85Confidence.cpp b/cpc/src/fm85Confidence.cpp
index 0117e4e..5c37042 100644
--- a/cpc/src/fm85Confidence.cpp
+++ b/cpc/src/fm85Confidence.cpp
@@ -19,20 +19,17 @@
// author Kevin Lang, Oath Research
-/*******************************************************/
-
#include "common.h"
#include "fm85.h"
#include "iconEstimator.h"
#include <stdexcept>
-/*******************************************************/
// ln 2.0
-double iconErrorConstant = 0.693147180559945286;
+const double iconErrorConstant = 0.693147180559945286;
// 1, 2, 3, // kappa
-Short iconLowSideData [33] = { // Empirically measured at N = 1000 * K.
+const Short iconLowSideData [33] = { // Empirically measured at N = 1000 * K.
6037, 5720, 5328, // 4 1000000
6411, 6262, 5682, // 5 1000000
6724, 6403, 6127, // 6 1000000
@@ -47,7 +44,7 @@ Short iconLowSideData [33] = { // Empirically measured at N = 1000 * K.
}; // lgK numtrials
// 1, 2, 3, // kappa
-Short iconHighSideData [33] = { // Empirically measured at N = 1000 * K.
+const Short iconHighSideData [33] = { // Empirically measured at N = 1000 * K.
8031, 8559, 9309, // 4 1000000
7084, 7959, 8660, // 5 1000000
7141, 7514, 7876, // 6 1000000
@@ -61,12 +58,11 @@ Short iconHighSideData [33] = { // Empirically measured at N = 1000 * K.
6944, 6966, 7004, // 14 1000297
}; // lgK numtrials
-/*******************************************************/
// sqrt((ln 2.0) / 2.0)
-double hipErrorConstant = 0.588705011257737332;
+const double hipErrorConstant = 0.588705011257737332;
// 1, 2, 3, // kappa
-Short hipLowSideData [33] = { // Empirically measured at N = 1000 * K.
+const Short hipLowSideData [33] = { // Empirically measured at N = 1000 * K.
5871, 5247, 4826, // 4 1000000
5877, 5403, 5070, // 5 1000000
5873, 5533, 5304, // 6 1000000
@@ -81,7 +77,7 @@ Short hipLowSideData [33] = { // Empirically measured at N = 1000 * K.
}; // lgK numtrials
// 1, 2, 3, // kappa
-Short hipHighSideData [33] = { // Empirically measured at N = 1000 * K.
+const Short hipHighSideData [33] = { // Empirically measured at N = 1000 * K.
5855, 6688, 7391, // 4 1000000
5886, 6444, 6923, // 5 1000000
5885, 6254, 6594, // 6 1000000
@@ -95,73 +91,66 @@ Short hipHighSideData [33] = { // Empirically measured at N = 1000 * K.
5880, 5914, 5953, // 14 1000297
}; // lgK numtrials
-/*******************************************************/
-
-double getIconConfidenceLB (FM85 * sketch, int kappa) {
+double getIconConfidenceLB(const FM85* sketch, int kappa) {
if (sketch->numCoupons == 0) return 0.0;
- int lgK = sketch->lgK;
- long k = (1LL << lgK);
+ const int lgK = sketch->lgK;
+ const long k = 1LL << lgK;
if (lgK < 4) throw std::logic_error("lgk < 4");
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
double x = iconErrorConstant;
- if (lgK <= 14) x = ((double) iconHighSideData[3*(lgK-4) + (kappa-1)]) / 10000.0;
- double rel = x / (sqrt((double) k));
- double eps = ((double) kappa) * rel;
- double est = getIconEstimate (lgK, sketch->numCoupons);
+ if (lgK <= 14) x = ((double) iconHighSideData[3 * (lgK - 4) + (kappa - 1)]) / 10000.0;
+ const double rel = x / (sqrt((double) k));
+ const double eps = ((double) kappa) * rel;
+ const double est = getIconEstimate (lgK, sketch->numCoupons);
double result = est / (1.0 + eps);
- double check = (double) sketch->numCoupons;
+ const double check = (double) sketch->numCoupons;
if (result < check) result = check;
- return (result);
+ return result;
}
-double getIconConfidenceUB (FM85 * sketch, int kappa) {
+double getIconConfidenceUB(const FM85* sketch, int kappa) {
if (sketch->numCoupons == 0) return 0.0;
- int lgK = sketch->lgK;
- long k = (1LL << lgK);
+ const int lgK = sketch->lgK;
+ const long k = 1LL << lgK;
if (lgK < 4) throw std::logic_error("lgk < 4");
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
double x = iconErrorConstant;
- if (lgK <= 14) x = ((double) iconLowSideData[3*(lgK-4) + (kappa-1)]) / 10000.0;
- double rel = x / (sqrt((double) k));
- double eps = ((double) kappa) * rel;
- double est = getIconEstimate (lgK, sketch->numCoupons);
- double result = est / (1.0 - eps);
- return (ceil(result)); // widening for coverage
+ if (lgK <= 14) x = ((double) iconLowSideData[3 * (lgK - 4) + (kappa - 1)]) / 10000.0;
+ const double rel = x / (sqrt((double) k));
+ const double eps = ((double) kappa) * rel;
+ const double est = getIconEstimate (lgK, sketch->numCoupons);
+ const double result = est / (1.0 - eps);
+ return ceil(result); // widening for coverage
}
-/*******************************************************/
-
-double getHIPConfidenceLB (FM85 * sketch, int kappa) {
+double getHIPConfidenceLB(const FM85 * sketch, int kappa) {
if (sketch->numCoupons == 0) return 0.0;
- int lgK = sketch->lgK;
- long k = (1LL << lgK);
+ const int lgK = sketch->lgK;
+ const long k = 1LL << lgK;
if (lgK < 4) throw std::logic_error("lgk < 4");
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
double x = hipErrorConstant;
- if (lgK <= 14) x = ((double) hipHighSideData[3*(lgK-4) + (kappa-1)]) / 10000.0;
- double rel = x / (sqrt((double) k));
- double eps = ((double) kappa) * rel;
- double est = getHIPEstimate (sketch);
+ if (lgK <= 14) x = ((double) hipHighSideData[3 * (lgK - 4) + (kappa - 1)]) / 10000.0;
+ const double rel = x / (sqrt((double) k));
+ const double eps = ((double) kappa) * rel;
+ const double est = getHIPEstimate(sketch);
double result = est / (1.0 + eps);
- double check = (double) sketch->numCoupons;
+ const double check = (double) sketch->numCoupons;
if (result < check) result = check;
- return (result);
+ return result;
}
-double getHIPConfidenceUB (FM85 * sketch, int kappa) {
+double getHIPConfidenceUB(const FM85* sketch, int kappa) {
if (sketch->numCoupons == 0) return 0.0;
- int lgK = sketch->lgK;
- long k = (1LL << lgK);
+ const int lgK = sketch->lgK;
+ const long k = 1LL << lgK;
if (lgK < 4) throw std::logic_error("lgk < 4");
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
double x = hipErrorConstant;
- if (lgK <= 14) x = ((double) hipLowSideData[3*(lgK-4) + (kappa-1)]) / 10000.0;
- double rel = x / (sqrt((double) k));
- double eps = ((double) kappa) * rel;
- double est = getHIPEstimate (sketch);
- double result = est / (1.0 - eps);
- return (ceil(result)); // widening for coverage
+ if (lgK <= 14) x = ((double) hipLowSideData[3 * (lgK - 4) + (kappa - 1)]) / 10000.0;
+ const double rel = x / (sqrt((double) k));
+ const double eps = ((double) kappa) * rel;
+ const double est = getHIPEstimate(sketch);
+ const double result = est / (1.0 - eps);
+ return ceil(result); // widening for coverage
}
-
-/*******************************************************/
-
diff --git a/cpc/src/fm85Merging.cpp b/cpc/src/fm85Merging.cpp
index eecdd6d..c7b7e44 100644
--- a/cpc/src/fm85Merging.cpp
+++ b/cpc/src/fm85Merging.cpp
@@ -24,55 +24,47 @@
#include <stdexcept>
#include <new>
-UG85 * ug85Make (Short lgK) {
+UG85* ug85Make(Short lgK) {
if (lgK < 4) throw std::invalid_argument("lgK < 4");
- UG85 * self = (UG85 *) fm85alloc (sizeof(UG85));
+ UG85* self = (UG85*) fm85alloc(sizeof(UG85));
if (self == NULL) throw std::bad_alloc();
self->lgK = lgK;
// We begin with the accumulator holding an EMPTY sketch object.
// As an optimization the accumulator could start as NULL, but that would require changes elsewhere.
- self->accumulator = fm85Make (lgK);
+ self->accumulator = fm85Make(lgK);
self->bitMatrix = NULL;
- return (self);
+ return self;
}
-/*******************************************************************************************/
-
-void ug85Free (UG85 * self) {
+void ug85Free(UG85* self) {
if (self != NULL) {
- if (self->accumulator != NULL) { fm85Free (self->accumulator); }
- if (self->bitMatrix != NULL) { fm85free (self->bitMatrix); }
- fm85free (self);
+ if (self->accumulator != NULL) { fm85Free(self->accumulator); }
+ if (self->bitMatrix != NULL) { fm85free(self->bitMatrix); }
+ fm85free(self);
}
}
-/*******************************************************************************************/
-
// This is used for testing purposes only.
-U64 * bitMatrixOfUG85 (UG85 * self, Boolean * needToFreePtr) {
+U64* bitMatrixOfUG85(const UG85* self, bool* needToFreePtr) {
if (self->bitMatrix != NULL) { // return the matrix
if (self->accumulator != NULL) throw std::logic_error("accumulator is not null");
- *needToFreePtr = 0; // or we could make a copy of the bitmatrix
+ *needToFreePtr = false; // or we could make a copy of the bitmatrix
return (self->bitMatrix);
- }
- else { // construct a matrix
+ } else { // construct a matrix
if (self->accumulator == NULL) throw std::logic_error("accumulator is null");
- *needToFreePtr = 1;
- return (bitMatrixOfSketch (self->accumulator));
+ *needToFreePtr = true;
+ return bitMatrixOfSketch(self->accumulator);
}
}
-/*******************************************************************************************/
-/*******************************************************************************************/
-
-void walkTableUpdatingSketch (FM85 * dest, u32Table * table) {
- U32 * slots = table->slots;
- Long numSlots = (1LL << table->lgSize);
+void walkTableUpdatingSketch(FM85* dest, const u32Table* table) {
+ const U32* slots = table->slots;
+ const Long numSlots = 1LL << table->lgSize;
if (dest->lgK > 26) throw std::logic_error("dest->lgK > 26");
- U32 destMask = (((1 << dest->lgK) - 1) << 6) | 63; // downsamples when destlgK < srcLgK
+ const U32 destMask = (((1 << dest->lgK) - 1) << 6) | 63; // downsamples when destlgK < srcLgK
// Using a golden ratio stride fixes the snowplow effect.
- double golden = 0.6180339887498949025;
+ const double golden = 0.6180339887498949025;
Long stride = (Long) (golden * ((double) numSlots));
if (stride < 2) throw std::logic_error("stride < 2");
if (stride == ((stride >> 1) << 1)) { stride += 1; }; // force the stride to be odd
@@ -80,70 +72,58 @@ void walkTableUpdatingSketch (FM85 * dest, u32Table * table) {
Long i,j;
for (i = 0, j = 0; i < numSlots; i++, j += stride) {
- j &= (numSlots - 1LL);
- U32 rowCol = slots[j];
+ j &= numSlots - 1LL;
+ const U32 rowCol = slots[j];
if (rowCol != ALL32BITS) {
- fm85RowColUpdate (dest, rowCol & destMask);
+ fm85RowColUpdate(dest, rowCol & destMask);
}
}
}
-/*******************************************************************************************/
-
-void orTableIntoMatrix (U64 * bitMatrix, Short destLgK, u32Table * table) {
- U32 * slots = table->slots;
- Long numSlots = (1LL << table->lgSize);
- Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
- Long i = 0;
- for (i = 0; i < numSlots; i++) {
- U32 rowCol = slots[i];
+void orTableIntoMatrix(U64* bitMatrix, Short destLgK, const u32Table* table) {
+ const U32* slots = table->slots;
+ const Long numSlots = 1LL << table->lgSize;
+ const Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
+ for (Long i = 0; i < numSlots; i++) {
+ const U32 rowCol = slots[i];
if (rowCol != ALL32BITS) {
- Short col = (Short) (rowCol & 63);
- Long row = (Long) (rowCol >> 6);
+ const Short col = (Short) (rowCol & 63);
+ const Long row = (Long) (rowCol >> 6);
bitMatrix[row & destMask] |= (1ULL << col); // Set the bit.
}
}
}
-/*******************************************************************************************/
-
-void orWindowIntoMatrix (U64 * destMatrix, Short destLgK, U8 * srcWindow, Short srcOffset, Short srcLgK) {
+void orWindowIntoMatrix(U64* destMatrix, Short destLgK, const U8* srcWindow, Short srcOffset, Short srcLgK) {
if (destLgK > srcLgK) throw std::logic_error("destLgK > srcLgK");
- Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
- Long srcK = (1LL << srcLgK);
- Long srcRow = 0;
- for (srcRow = 0; srcRow < srcK; srcRow++) {
+ const Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
+ const Long srcK = 1LL << srcLgK;
+ for (Long srcRow = 0; srcRow < srcK; srcRow++) {
destMatrix[srcRow & destMask] |= (((U64) srcWindow[srcRow]) << srcOffset);
}
}
-/*******************************************************************************************/
-
-void orMatrixIntoMatrix (U64 * destMatrix, Short destLgK, U64 * srcMatrix, Short srcLgK) {
+void orMatrixIntoMatrix(U64* destMatrix, Short destLgK, const U64* srcMatrix, Short srcLgK) {
if (destLgK > srcLgK) throw std::logic_error("destLgK > srcLgK");
- Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
- Long srcK = (1LL << srcLgK);
- Long srcRow = 0;
- for (srcRow = 0; srcRow < srcK; srcRow++) {
+ const Long destMask = (1LL << destLgK) - 1LL; // downsamples when destlgK < srcLgK
+ const Long srcK = 1LL << srcLgK;
+ for (Long srcRow = 0; srcRow < srcK; srcRow++) {
destMatrix[srcRow & destMask] |= srcMatrix[srcRow];
}
}
-/*******************************************************************************************/
-
-void ug85ReduceK (UG85 * unioner, Short newLgK) {
+void ug85ReduceK(UG85* unioner, Short newLgK) {
if (newLgK >= unioner->lgK) throw std::logic_error("newLgK >= unioner->lgK");
if (unioner->accumulator == NULL && unioner->bitMatrix == NULL) throw std::logic_error("both accumulator and bitMatrix are null");
if (unioner->bitMatrix != NULL) { // downsample the unioner's bit matrix
if (unioner->accumulator != NULL) throw std::logic_error("accumulator is not null");
- Long newK = (1LL << newLgK);
- U64 * newMatrix = (U64 *) fm85alloc ((size_t) (newK * sizeof(U64)));
+ const Long newK = 1LL << newLgK;
+ U64* newMatrix = (U64*) fm85alloc((size_t) (newK * sizeof(U64)));
if (newMatrix == NULL) throw std::bad_alloc();
- Long i = 0;
- for (i = 0; i < newK; i++) { newMatrix[i] = 0LL; } // clear the bit matrix
- orMatrixIntoMatrix (newMatrix, newLgK, unioner->bitMatrix, unioner->lgK);
- fm85free (unioner->bitMatrix);
+ for (Long i = 0; i < newK; i++) { newMatrix[i] = 0LL; } // clear the bit matrix
+ orMatrixIntoMatrix(newMatrix, newLgK, unioner->bitMatrix, unioner->lgK);
+ fm85free(unioner->bitMatrix);
unioner->bitMatrix = newMatrix;
unioner->lgK = newLgK;
return;
@@ -151,7 +131,7 @@ void ug85ReduceK (UG85 * unioner, Short newLgK) {
if (unioner->accumulator != NULL) { // downsample the unioner's sketch
if (unioner->bitMatrix != NULL) throw std::logic_error("bitMatrix is not null");
- FM85 * oldSketch = unioner->accumulator;
+ FM85* oldSketch = unioner->accumulator;
if (oldSketch->numCoupons == 0) { // if the accumulator is EMPTY, simply change its K.
if (oldSketch->surprisingValueTable != NULL) throw std::logic_error("oldSketch->surprisingValueTable is not null");
@@ -160,24 +140,23 @@ void ug85ReduceK (UG85 * unioner, Short newLgK) {
return;
}
- FM85 * newSketch = fm85Make (newLgK);
+ FM85* newSketch = fm85Make(newLgK);
if (oldSketch->slidingWindow != NULL || oldSketch->surprisingValueTable == NULL) throw std::logic_error("invalid state");
- walkTableUpdatingSketch (newSketch, oldSketch->surprisingValueTable);
+ walkTableUpdatingSketch(newSketch, oldSketch->surprisingValueTable);
enum flavorType finalNewFlavor = determineSketchFlavor(newSketch);
if (finalNewFlavor == EMPTY) throw std::logic_error("finalNewFlavor == EMPTY");
if (finalNewFlavor == SPARSE) {
unioner->accumulator = newSketch;
unioner->lgK = newLgK;
- fm85Free (oldSketch);
+ fm85Free(oldSketch);
return;
- }
- else { // the new sketch has graduated beyond sparse, so convert to bitMatrix
+ } else { // the new sketch has graduated beyond sparse, so convert to bitMatrix
unioner->accumulator = NULL;
- unioner->bitMatrix = bitMatrixOfSketch (newSketch);
+ unioner->bitMatrix = bitMatrixOfSketch(newSketch);
unioner->lgK = newLgK;
- fm85Free (oldSketch);
- fm85Free (newSketch);
+ fm85Free(oldSketch);
+ fm85Free(newSketch);
return;
}
}
@@ -185,16 +164,14 @@ void ug85ReduceK (UG85 * unioner, Short newLgK) {
throw std::logic_error("invalid state");
}
-/*******************************************************************************************/
-
-void ug85MergeInto (UG85 * unioner, FM85 * source) {
+void ug85MergeInto(UG85* unioner, const FM85* source) {
if (NULL == unioner) throw std::invalid_argument("unioner is null");
if (NULL == source) return;
enum flavorType sourceFlavor = determineSketchFlavor(source);
if (EMPTY == sourceFlavor) return;
- if (source->lgK < unioner->lgK) { ug85ReduceK (unioner, source->lgK); }
+ if (source->lgK < unioner->lgK) { ug85ReduceK(unioner, source->lgK); }
if (source->lgK < unioner->lgK) throw std::logic_error("source->lgK < unioner->lgK");
@@ -202,22 +179,22 @@ void ug85MergeInto (UG85 * unioner, FM85 * source) {
if (SPARSE == sourceFlavor && unioner->accumulator != NULL) { // Case A
if (unioner->bitMatrix != NULL) throw std::logic_error("unioner->bitMatrix != NULL");
- enum flavorType initialDestFlavor = determineSketchFlavor (unioner->accumulator);
+ enum flavorType initialDestFlavor = determineSketchFlavor(unioner->accumulator);
if (EMPTY != initialDestFlavor && SPARSE != initialDestFlavor) throw std::logic_error("wrong flavor");
// The following partially fixes the snowplow problem provided that the K's are equal.
// A complete fix is coming soon.
if (EMPTY == initialDestFlavor && unioner->lgK == source->lgK) {
- fm85Free (unioner->accumulator);
+ fm85Free(unioner->accumulator);
unioner->accumulator = fm85Copy(source);
}
- walkTableUpdatingSketch (unioner->accumulator, source->surprisingValueTable);
+ walkTableUpdatingSketch(unioner->accumulator, source->surprisingValueTable);
enum flavorType finalDestFlavor = determineSketchFlavor(unioner->accumulator);
// if the accumulator has graduated beyond sparse, switch to a bitMatrix representation
if (finalDestFlavor != EMPTY && finalDestFlavor != SPARSE) {
- unioner->bitMatrix = bitMatrixOfSketch (unioner->accumulator);
- fm85Free (unioner->accumulator);
+ unioner->bitMatrix = bitMatrixOfSketch(unioner->accumulator);
+ fm85Free(unioner->accumulator);
unioner->accumulator = NULL;
}
return;
@@ -225,7 +202,7 @@ void ug85MergeInto (UG85 * unioner, FM85 * source) {
if (SPARSE == sourceFlavor && unioner->bitMatrix != NULL) { // Case B
if (unioner->accumulator != NULL) throw std::logic_error("unioner->accumulator != NULL");
- orTableIntoMatrix (unioner->bitMatrix, unioner->lgK, source->surprisingValueTable);
+ orTableIntoMatrix(unioner->bitMatrix, unioner->lgK, source->surprisingValueTable);
return;
}
@@ -234,33 +211,29 @@ void ug85MergeInto (UG85 * unioner, FM85 * source) {
// source is past SPARSE mode, so make sure that dest is a bitMatrix.
if (unioner->accumulator != NULL) {
if (unioner->bitMatrix != NULL) throw std::logic_error("unioner->bitMatrix != NULL");
- enum flavorType destFlavor = determineSketchFlavor (unioner->accumulator);
+ enum flavorType destFlavor = determineSketchFlavor(unioner->accumulator);
if (EMPTY != destFlavor && SPARSE != destFlavor) throw std::logic_error("wrong flavor");
- unioner->bitMatrix = bitMatrixOfSketch (unioner->accumulator);
- fm85Free (unioner->accumulator);
+ unioner->bitMatrix = bitMatrixOfSketch(unioner->accumulator);
+ fm85Free(unioner->accumulator);
unioner->accumulator = NULL;
}
if (unioner->bitMatrix == NULL) throw std::logic_error("unioner->bitMatrix == NULL");
if (HYBRID == sourceFlavor || PINNED == sourceFlavor) { // Case C
- orWindowIntoMatrix (unioner->bitMatrix, unioner->lgK, source->slidingWindow, source->windowOffset, source->lgK);
- orTableIntoMatrix (unioner->bitMatrix, unioner->lgK, source->surprisingValueTable);
+ orWindowIntoMatrix(unioner->bitMatrix, unioner->lgK, source->slidingWindow, source->windowOffset, source->lgK);
+ orTableIntoMatrix(unioner->bitMatrix, unioner->lgK, source->surprisingValueTable);
return;
}
// SLIDING mode involves inverted logic, so we can't just walk the source sketch.
// Instead, we convert it to a bitMatrix that can be OR'ed into the destination.
if (SLIDING != sourceFlavor) throw std::logic_error("wrong flavor"); // Case D
- U64 * sourceMatrix = bitMatrixOfSketch (source);
- orMatrixIntoMatrix (unioner->bitMatrix, unioner->lgK, sourceMatrix, source->lgK);
- fm85free (sourceMatrix);
-
- return;
+ U64* sourceMatrix = bitMatrixOfSketch(source);
+ orMatrixIntoMatrix(unioner->bitMatrix, unioner->lgK, sourceMatrix, source->lgK);
+ fm85free(sourceMatrix);
}
-/*******************************************************************************************/
-
-FM85 * ug85GetResult (UG85 * unioner) {
+FM85* ug85GetResult(const UG85* unioner) {
if (unioner == NULL) throw std::invalid_argument("unioner == NULL");
if (unioner->accumulator == NULL && unioner->bitMatrix == NULL) throw std::logic_error("both accumulator and bitMatrix are null");
@@ -268,34 +241,34 @@ FM85 * ug85GetResult (UG85 * unioner) {
if (unioner->bitMatrix != NULL) throw std::logic_error("unioner->bitMatrix != NULL");
if (unioner->lgK != unioner->accumulator->lgK) throw std::logic_error("unioner->lgK != unioner->accumulator->lgK");
if (unioner->accumulator->numCoupons == 0) {
- FM85 * result = fm85Make (unioner->lgK);
+ FM85* result = fm85Make(unioner->lgK);
result->mergeFlag = 1;
- return (result);
+ return result;
}
if (SPARSE != determineSketchFlavor(unioner->accumulator)) throw std::logic_error("wrong flavor");
- FM85 * result = fm85Copy (unioner->accumulator);
+ FM85* result = fm85Copy(unioner->accumulator);
result->mergeFlag = 1;
- return (result);
+ return result;
} // end of case where unioner contains a sketch
// start of case where unioner contains a bitMatrix
if (unioner->bitMatrix == NULL) throw std::logic_error("unioner->bitMatrix == NULL");
if (unioner->accumulator != NULL) throw std::logic_error("unioner->accumulator != NULL");
- U64 * matrix = unioner->bitMatrix;
- Short lgK = unioner->lgK;
- FM85 * result = fm85Make (unioner->lgK);
+ const U64* matrix = unioner->bitMatrix;
+ const Short lgK = unioner->lgK;
+ FM85* result = fm85Make(unioner->lgK);
- Long k = (1LL << lgK);
- Long numCoupons = countBitsSetInMatrix (matrix, k);
+ const Long k = 1LL << lgK;
+ const Long numCoupons = countBitsSetInMatrix(matrix, k);
result->numCoupons = numCoupons;
- enum flavorType flavor = determineFlavor (lgK, numCoupons);
+ const enum flavorType flavor = determineFlavor(lgK, numCoupons);
if (flavor != HYBRID && flavor != PINNED && flavor != SLIDING) throw std::logic_error("wrong flavor");
- Short offset = determineCorrectOffset (lgK, numCoupons);
+ const Short offset = determineCorrectOffset(lgK, numCoupons);
result->windowOffset = offset;
- U8 * window = (U8 *) fm85alloc ((size_t) (k * sizeof(U8)));
+ U8* window = (U8*) fm85alloc((size_t) (k * sizeof(U8)));
if (window == NULL) throw std::bad_alloc();
// don't need to zero the window's memory
if (result->slidingWindow != NULL) throw std::logic_error("result->slidingWindow != NULL");
@@ -304,41 +277,40 @@ FM85 * ug85GetResult (UG85 * unioner) {
// u32Table * table = u32TableMake (2, 6 + lgK); // dynamically growing caused snowplow effect
Short newTableSize = lgK - 4; // K/16; in some cases this will end up being oversized
if (newTableSize < 2) newTableSize = 2;
- u32Table * table = u32TableMake (newTableSize, 6 + lgK);
+ u32Table* table = u32TableMake(newTableSize, 6 + lgK);
if (result->surprisingValueTable != NULL) throw std::logic_error("result->surprisingValueTable != NULL");
result->surprisingValueTable = table;
// I believe that the following works even when the offset is zero.
- U64 maskForClearingWindow = (0xffULL << offset) ^ ALL64BITS;
- U64 maskForFlippingEarlyZone = (1ULL << offset) - 1;
+ const U64 maskForClearingWindow = (0xffULL << offset) ^ ALL64BITS;
+ const U64 maskForFlippingEarlyZone = (1ULL << offset) - 1;
U64 allSurprisesORed = 0;
- Long i = 0;
// The snowplow effect was caused by processing the rows in order,
// but we have fixed it by using a sufficiently large hash table.
- for (i = 0; i < k; i++) {
+ for (Long i = 0; i < k; i++) {
U64 pattern = matrix[i];
window[i] = (U8) ((pattern >> offset) & 0xff);
pattern &= maskForClearingWindow;
pattern ^= maskForFlippingEarlyZone; // This flipping converts surprising 0's to 1's.
allSurprisesORed |= pattern;
while (pattern != 0) {
- Short col = countTrailingZerosInUnsignedLong (pattern);
+ Short col = countTrailingZerosInUnsignedLong(pattern);
pattern = pattern ^ (1ULL << col); // erase the 1.
- U32 rowCol = (i << 6) | col;
- Boolean isNovel = u32TableMaybeInsert (table, rowCol);
- if (isNovel != 1) throw std::logic_error("isNovel != 1");
+ const U32 rowCol = (i << 6) | col;
+ bool isNovel = u32TableMaybeInsert(table, rowCol);
+ if (!isNovel) throw std::logic_error("isNovel != true");
}
}
// At this point we could shrink an oversized hash table, but the relative waste isn't very big.
- result->firstInterestingColumn = countTrailingZerosInUnsignedLong (allSurprisesORed);
+ result->firstInterestingColumn = countTrailingZerosInUnsignedLong(allSurprisesORed);
if (result->firstInterestingColumn > offset) result->firstInterestingColumn = offset; // corner case
// NB: the HIP-related fields will contain bogus values, but that is okay.
- result->mergeFlag = 1;
+ result->mergeFlag = true;
return result;
// end of case where unioner contains a bitMatrix
}
diff --git a/cpc/src/fm85Util.cpp b/cpc/src/fm85Util.cpp
index 1f6e8a2..c08f604 100644
--- a/cpc/src/fm85Util.cpp
+++ b/cpc/src/fm85Util.cpp
@@ -26,43 +26,33 @@
extern void* (*fm85alloc)(size_t);
-/******************************************/
-
-void * shallowCopy (void * oldObject, size_t numBytes) {
+void* shallowCopy(const void* oldObject, size_t numBytes) {
if (oldObject == NULL || numBytes == 0) throw std::invalid_argument("shallowCopyObject: bad argument");
- void * newObject = fm85alloc (numBytes);
+ void* newObject = fm85alloc(numBytes);
if (newObject == NULL) throw std::bad_alloc();
- memcpy (newObject, oldObject, numBytes);
- return (newObject);
+ memcpy(newObject, oldObject, numBytes);
+ return newObject;
}
-/******************************************/
-
U8 byteLeadingZerosTable[256];
-U8 slow_count_leading_zeros_in_byte (U8 the_byte)
-{
- int shift;
+U8 slow_count_leading_zeros_in_byte(U8 the_byte) {
int count = 0;
- for (shift = 7; shift >= 0; shift--) {
- int is_zero = !((the_byte >> shift) & 1);
- if (is_zero)
+ for (int shift = 7; shift >= 0; shift--) {
+ if (((the_byte >> shift) & 1) == 0) {
count++;
- else
+ } else {
break;
+ }
}
return count;
}
-void fillByteLeadingZerosTable(void)
-{
- int j;
- for (j = 0; j < 256; j++)
+void fillByteLeadingZerosTable(void) {
+ for (int j = 0; j < 256; j++)
byteLeadingZerosTable[j] = slow_count_leading_zeros_in_byte ((U8) j);
}
-/******************************************/
-
#define FCLZ_MASK_56 ((U64) 0x00ffffffffffffff)
#define FCLZ_MASK_48 ((U64) 0x0000ffffffffffff)
#define FCLZ_MASK_40 ((U64) 0x000000ffffffffff)
@@ -71,7 +61,7 @@ void fillByteLeadingZerosTable(void)
#define FCLZ_MASK_16 ((U64) 0x000000000000ffff)
#define FCLZ_MASK_08 ((U64) 0x00000000000000ff)
-Short countLeadingZerosInUnsignedLong (U64 theInput) {
+Short countLeadingZerosInUnsignedLong(U64 theInput) {
if (theInput > FCLZ_MASK_56)
return ((Short) ( 0 + byteLeadingZerosTable[(theInput >> 56) & FCLZ_MASK_08]));
if (theInput > FCLZ_MASK_48)
@@ -90,125 +80,101 @@ Short countLeadingZerosInUnsignedLong (U64 theInput) {
return ((Short) (56 + byteLeadingZerosTable[(theInput >> 0) & FCLZ_MASK_08]));
}
-/******************************************/
-
U8 byteTrailingZerosTable[256];
-// U8 lookupByteTrailingZeros (int x) { return (byteTrailingZerosTable[x]); }
-
-U8 slow_count_trailing_zeros_in_byte (U8 the_byte)
-{
- int shift;
+U8 slow_count_trailing_zeros_in_byte(U8 the_byte) {
int count = 0;
- for (shift = 0; shift <= 7; shift++) {
- int is_zero = !((the_byte >> shift) & 1);
- if (is_zero)
+ for (int shift = 0; shift <= 7; shift++) {
+ if (((the_byte >> shift) & 1) == 0) {
count++;
- else
+ } else {
break;
+ }
}
return count;
}
-void fillByteTrailingZerosTable(void)
-{
- int j;
- for (j = 0; j < 256; j++)
- byteTrailingZerosTable[j] = slow_count_trailing_zeros_in_byte ((U8) j);
+void fillByteTrailingZerosTable(void) {
+ for (int j = 0; j < 256; j++)
+ byteTrailingZerosTable[j] = slow_count_trailing_zeros_in_byte((U8) j);
}
-Short countTrailingZerosInUnsignedLong (U64 theInput) {
+Short countTrailingZerosInUnsignedLong(U64 theInput) {
U64 tmp = theInput;
- int byte;
- int j = 0;
- for (j = 0; j < 8; j++) {
- byte = (tmp & 0xffULL);
+ for (int j = 0; j < 8; j++) {
+ const int byte = (tmp & 0xffULL);
if (byte != 0) return ((Short) ((j << 3) + byteTrailingZerosTable[byte]));
tmp >>= 8;
}
return ((Short) (64));
}
-/******************************************/
-
double invPow2Tab[256];
-void fillInvPow2Tab (void)
-{
- int j;
- for (j = 0; j < 256; j++) {
+void fillInvPow2Tab(void) {
+ for (int j = 0; j < 256; j++) {
invPow2Tab[j] = pow (2.0, (-1.0 * ((double) j)));
}
}
-/******************************************/
-
double kxpByteLookup[256];
-void fillKxpByteLookup (void) // must call fillInvPow2Tab() first
-{
- int byte, col;
- for (byte = 0; byte < 256; byte++) {
+void fillKxpByteLookup(void) { // must call fillInvPow2Tab() first
+ for (int byte = 0; byte < 256; byte++) {
double sum = 0.0;
- for (col = 0; col < 8; col++) {
+ for (int col = 0; col < 8; col++) {
int bit = (byte >> col) & 1;
if (bit == 0) { // note the inverted logic
- sum += invPow2Tab[col+1]; // note the "+1"
+ sum += invPow2Tab[col+1]; // note the "+1"
}
- }
+ }
kxpByteLookup[byte] = sum;
}
}
-
-/******************************************/
-
-Long divideLongsRoundingUp (Long x, Long y) {
+Long divideLongsRoundingUp(Long x, Long y) {
if (x < 0 || y <= 0) throw std::invalid_argument("divideLongsRoundingUp: bad argument");
- Long quotient = x / y;
+ const Long quotient = x / y;
if (quotient * y == x) return (quotient);
else return (quotient + 1);
}
-Long longFloorLog2OfLong (Long x) {
+Long longFloorLog2OfLong(Long x) {
if (x < 1L) throw std::invalid_argument("longFloorLog2OfLong: bad argument");
Long p = 0;
Long y = 1;
- log2Loop:
- if (y == x) return (p);
- if (y > x) return (p-1);
- p += 1;
- y <<= 1;
- goto log2Loop;
+ while (true) {
+ if (y == x) return p;
+ if (y > x) return p - 1;
+ p += 1;
+ y <<= 1;
+ }
}
-/***********************************/
-
// returns an integer that is between
// zero and ceiling(log_2(k))-1, inclusive
-Long golombChooseNumberOfBaseBits (Long k, Long count) {
+Long golombChooseNumberOfBaseBits(Long k, Long count) {
if (k < 1L) throw std::invalid_argument("golombChooseNumberOfBaseBits: k < 1");
if (count < 1L) throw std::invalid_argument("golombChooseNumberOfBaseBits: count < 1");
- Long quotient = (k - count) / count; // integer division
+ const Long quotient = (k - count) / count; // integer division
if (quotient == 0) return (0);
else return (longFloorLog2OfLong(quotient));
}
-/*******************************************************/
// This place-holder code was inadequate because it caused
// the cost of the post-merge getResult() operation to be O(C)
// instead of O(K). It did have the advantage of being
// very simple and trustworthy during initial testing.
-Long wegnerCountBitsSetInMatrix (U64 * array, Long length) {
+Long wegnerCountBitsSetInMatrix(const U64* array, Long length) {
Long i = 0;
U64 pattern = 0;
Long count = 0;
// clock_t t0, t1;
// t0 = clock();
-// Wegner's Bit-Counting Algorithm, CACM 3 (1960), p. 322.
+ // Wegner's Bit-Counting Algorithm, CACM 3 (1960), p. 322.
for (i = 0; i < length; i++) {
pattern = array[i];
while (pattern != 0) {
@@ -223,8 +189,7 @@ Long wegnerCountBitsSetInMatrix (U64 * array, Long length) {
return count;
}
-/*******************************************************/
-// Note: this is an adaptation of the Java code that Lee sent me,
+// Note: this is an adaptation of the Java code,
// which is apparently a variation of Figure 5-2 in "Hacker's Delight"
// by Henry S. Warren.
@@ -238,28 +203,25 @@ static inline Long warrenBitCount(U64 i) {
return (Long)i & 0x7f;
}
-Long warrenCountBitsSetInMatrix (U64 * array, Long length) {
- Long i = 0;
+Long warrenCountBitsSetInMatrix(const U64* array, Long length) {
Long count = 0;
- for (i = 0; i < length; i++) {
+ for (Long i = 0; i < length; i++) {
count += warrenBitCount(array[i]);
}
return count;
}
-/*******************************************************/
// This code is Figure 5-9 in "Hacker's Delight" by Henry S. Warren.
#define CSA(h,l,a,b,c) {U64 u = a^b; U64 v = c; h = (a&b) | (u&v); l = u^v;}
-Long countBitsSetInMatrix (U64 * A, Long length) {
+Long countBitsSetInMatrix(const U64* A, Long length) {
if ((length & 0x7) != 0) throw std::invalid_argument("the length of the array must be a multiple of 8");
- Long tot, i;
+ Long tot = 0;
U64 ones, twos, twosA, twosB, fours, foursA, foursB, eights;
- tot = 0;
fours = twos = ones = 0;
- for (i = 0; i <= length - 8; i = i + 8) {
+ for (Long i = 0; i <= length - 8; i = i + 8) {
CSA(twosA, ones, ones, A[i+0], A[i+1]);
CSA(twosB, ones, ones, A[i+2], A[i+3]);
CSA(foursA, twos, twos, twosA, twosB);
@@ -272,7 +234,7 @@ Long countBitsSetInMatrix (U64 * A, Long length) {
tot += warrenBitCount(eights);
}
- tot = 8*tot + 4*warrenBitCount(fours) + 2*warrenBitCount(twos) + warrenBitCount(ones);
+ tot = 8 * tot + 4 * warrenBitCount(fours) + 2 * warrenBitCount(twos) + warrenBitCount(ones);
// Because I still don't fully trust this fancy version
// assert(tot == wegnerCountBitsSetInMatrix(A, length));
@@ -280,7 +242,6 @@ Long countBitsSetInMatrix (U64 * A, Long length) {
return (tot);
}
-/*********************************************/
// Here are some timings made with quickTestMerge.c
// for the "5 5" case:
diff --git a/cpc/src/iconEstimator.cpp b/cpc/src/iconEstimator.cpp
index d1d1e48..3b12437 100644
--- a/cpc/src/iconEstimator.cpp
+++ b/cpc/src/iconEstimator.cpp
@@ -28,8 +28,6 @@
#include <stdexcept>
-/******************************************************************************************/
-
// The ICON estimator for FM85 sketches is defined by the arXiv paper.
// The current file provides exact and approximate implementations of this estimator.
@@ -47,8 +45,6 @@
// This file also provides a validation procedure that compares its approximate
// and exact implementations of the FM85 ICON estimator.
-/******************************************************************************************/
-
#define iconMinLogK 4
#define iconMaxLogK 26
@@ -59,9 +55,8 @@
#define iconTableSize (iconPolynomialNumCoefficients * (1 + (iconMaxLogK - iconMinLogK)))
-/******************************************************************************************/
-double iconPolynomialCoefficents [iconTableSize] = {
+const double iconPolynomialCoefficents[iconTableSize] = {
// log K = 4
0.9895027971889700513, 0.3319496644645180128, 0.1242818722715769986, -0.03324149686026930256, -0.2985637298081619817,
@@ -241,67 +236,57 @@ double iconPolynomialCoefficents [iconTableSize] = {
#endif
};
-/******************************************************************************************/
-
-double evaluatePolynomial (double * coefficients, int start, int num, double x) {
- int final = start + num - 1;
+double evaluatePolynomial(const double* coefficients, int start, int num, double x) {
+ const int final = start + num - 1;
double total = coefficients[final];
- int j;
- for (j = final-1; j >= start; j--) {
+ for (int j = final - 1; j >= start; j--) {
total *= x;
total += coefficients[j];
}
return total;
}
-/******************************************************************************************/
-
-double iconExponentialApproximation (double k, double c) {
+double iconExponentialApproximation(double k, double c) {
return (0.7940236163830469 * k * pow(2.0, c / k));
}
-/******************************************************************************************/
-double getIconEstimate (Short lgK, Long c) {
+double getIconEstimate(Short lgK, Long c) {
if (lgK < iconMinLogK || lgK > iconMaxLogK) throw std::out_of_range("lgK out of range");
if (c < 2L) return ((c == 0L) ? 0.0 : 1.0);
- Long k = 1L << lgK;
- double doubleK = (double) k;
- double doubleC = (double) c;
+ const Long k = 1L << lgK;
+ const double doubleK = (double) k;
+ const double doubleC = (double) c;
// Differing thresholds ensure that the approximated estimator is monotonically increasing.
- double thresholdFactor = ((lgK < 14) ? 5.7 : 5.6);
- if (doubleC > (thresholdFactor * doubleK)) return (iconExponentialApproximation (doubleK, doubleC));
- double factor = evaluatePolynomial (iconPolynomialCoefficents,
+ const double thresholdFactor = ((lgK < 14) ? 5.7 : 5.6);
+ if (doubleC > (thresholdFactor * doubleK)) return iconExponentialApproximation(doubleK, doubleC);
+ const double factor = evaluatePolynomial(iconPolynomialCoefficents,
iconPolynomialNumCoefficients * (lgK - iconMinLogK),
iconPolynomialNumCoefficients,
// The somewhat arbitrary constant 2.0 is baked into the table iconPolynomialCoefficents[].
doubleC / (2.0 * doubleK));
- double ratio = doubleC / doubleK;
+ const double ratio = doubleC / doubleK;
// The somewhat arbitrary constant 66.774757 is baked into the table iconPolynomialCoefficents[].
- double term = 1.0 + (ratio * ratio * ratio / 66.774757);
- double result = doubleC * factor * term;
+ const double term = 1.0 + (ratio * ratio * ratio / 66.774757);
+ const double result = doubleC * factor * term;
if (result >= doubleC) return result;
else return doubleC;
}
-/******************************************************************************************/
-/******************************************************************************************/
-
#ifdef SLOW_EXACT_VERSION
// Important note: do not change anything in the following function.
// It has been carefully designed and tested for numerical accuracy.
// In particular, the use of log1p and expm1 is critically important.
-static inline double qnj (double kf, double nf, int col) {
- double tmp1 = -1.0 / (kf * (pow (2.0, ((double) col))));
- double tmp2 = log1p (tmp1);
- return (-1.0 * (expm1 (nf * tmp2)));
+static inline double qnj(double kf, double nf, int col) {
+ const double tmp1 = -1.0 / (kf * (pow(2.0, ((double) col))));
+ const double tmp2 = log1p(tmp1);
+ return (-1.0 * (expm1(nf * tmp2)));
}
-double exactCofN (double kf, double nf) {
+double exactCofN(double kf, double nf) {
double total = 0.0;
- int col;
- for (col=128; col >= 1; col--) {
+ for (int col = 128; col >= 1; col--) {
total += qnj (kf, nf, col);
};
return (kf * total);
@@ -309,7 +294,7 @@ double exactCofN (double kf, double nf) {
#define iconInversionTolerance 1.0e-15
-double exactIconEstimatorBinarySearch (double kf, double targetC, double nLoIn, double nHiIn) {
+double exactIconEstimatorBinarySearch(double kf, double targetC, double nLoIn, double nHiIn) {
int depth = 0;
double nLo = nLoIn;
double nHi = nHiIn;
@@ -319,14 +304,14 @@ double exactIconEstimatorBinarySearch (double kf, double targetC, double nLoIn,
double nMid = nLo + 0.5 * (nHi - nLo);
if (nMid <= nLo || nMid >= nHi) throw std::logic_error("binary search error");
if (((nHi - nLo) / nMid) < iconInversionTolerance) return (nMid);
- double midC = exactCofN (kf, nMid);
+ double midC = exactCofN(kf, nMid);
if (midC == targetC) return (nMid);
if (midC < targetC) {nLo = nMid; depth++; goto tailRecurse;}
if (midC > targetC) {nHi = nMid; depth++; goto tailRecurse;}
throw std::logic_error("bad value in binary search");
}
-double exactIconEstimatorBracketHi (double kf, double targetC, double nLo) {
+double exactIconEstimatorBracketHi(double kf, double targetC, double nLo) {
int depth = 0;
double curN = 2.0 * nLo;
double curC = exactCofN(kf, curN);
@@ -337,43 +322,37 @@ double exactIconEstimatorBracketHi (double kf, double targetC, double nLo) {
curC = exactCofN(kf, curN);
}
if (curC <= targetC) throw std::logic_error("error in exactIconEstimatorBracketHi");
- return (curN);
+ return curN;
}
-double exactIconEstimator (int lgK, long c) {
+double exactIconEstimator(int lgK, long c) {
double targetC = (double) c;
if (c == 0L || c == 1L) return (targetC);
double kf = (double) (1L << lgK);
double nLo = targetC; if (exactCofN(kf, nLo) >= targetC) throw std::logic_error("bracket lo error"); // bracket lo
- double nHi = exactIconEstimatorBracketHi (kf, targetC, nLo); // bracket hi
- return (exactIconEstimatorBinarySearch (kf, targetC, nLo, nHi));
+ double nHi = exactIconEstimatorBracketHi(kf, targetC, nLo); // bracket hi
+ return exactIconEstimatorBinarySearch(kf, targetC, nLo, nHi);
}
-/*
- while (exactCofN(kf, nHi) <= targetC) {nHi *= 2.0;} // bracket
-*/
-
#endif
-/*******************************************************/
-
#ifdef TESTING_DRIVER
-int main (int argc, char ** argv) {
+int main(int argc, char ** argv) {
if (argc != 2) {
- fprintf (stderr, "Usage: %s lg_k > output_file\n", argv[0]); fflush (stderr);
+ fprintf(stderr, "Usage: %s lg_k > output_file\n", argv[0]); fflush(stderr);
exit(EXIT_FAILURE);
}
int lgK = atoi(argv[1]);
- long k = (1L << lgK);
+ const long k = 1L << lgK;
long c = 1;
while (c < k * 64) { // was k * 15
- double exact = exactIconEstimator (lgK, c);
- double approx = approximateIconEstimator (lgK, c);
- double relDiff = (approx - exact) / exact;
- printf ("%ld\t%.19g\t%.19g\t%.19g\n", c, relDiff, exact, approx);
- long a = c+1;
- long b = 1001 * c / 1000;
+ const double exact = exactIconEstimator(lgK, c);
+ const double approx = approximateIconEstimator(lgK, c);
+ const double relDiff = (approx - exact) / exact;
+ printf("%ld\t%.19g\t%.19g\t%.19g\n", c, relDiff, exact, approx);
+ const long a = c + 1;
+ const long b = 1001 * c / 1000;
c = ((a > b) ? a : b);
}
}
diff --git a/cpc/src/u32Table.cpp b/cpc/src/u32Table.cpp
index 8b19d5c..ef4a0c1 100644
--- a/cpc/src/u32Table.cpp
+++ b/cpc/src/u32Table.cpp
@@ -29,52 +29,39 @@
extern void* (*fm85alloc)(size_t);
extern void (*fm85free)(void*);
-/*******************************************************/
-
-u32Table * u32TableMake (Short lgSize, Short numValidBits) {
+u32Table* u32TableMake(Short lgSize, Short numValidBits) {
if (lgSize < 2) throw std::invalid_argument("lgSize must be >= 2");
- Long numSlots = (1LL << lgSize);
- u32Table * self = (u32Table *) fm85alloc (sizeof(u32Table));
+ const Long numSlots = 1LL << lgSize;
+ u32Table* self = (u32Table*) fm85alloc(sizeof(u32Table));
if (self == NULL) throw std::bad_alloc();
- U32 * arr = (U32 *) fm85alloc ((size_t) (numSlots * sizeof(U32)));
+ U32* arr = (U32*) fm85alloc((size_t) (numSlots * sizeof(U32)));
if (arr == NULL) throw std::bad_alloc();
- Long i = 0;
- for (i = 0; i < numSlots; i++) { arr[i] = ALL32BITS; }
+ for (Long i = 0; i < numSlots; i++) { arr[i] = ALL32BITS; }
if (numValidBits < 1 || numValidBits > 32) throw std::invalid_argument("numValidBits must be between 1 and 32");
self->validBits = numValidBits;
self->lgSize = lgSize;
self->numItems = 0;
self->slots = arr;
- return (self);
+ return self;
}
-/*******************************************************/
-
-u32Table * u32TableCopy (u32Table * self) {
+u32Table* u32TableCopy(const u32Table* self) {
if (self == NULL) throw std::invalid_argument("self is null");
if (self->slots == NULL) throw std::invalid_argument("no slots");
- Long numSlots = (1LL << self->lgSize);
- u32Table * newObj = (u32Table *) shallowCopy ((void *) self, sizeof(u32Table));
- newObj->slots = (U32 *) shallowCopy ((void *) self->slots, ((size_t) numSlots) * sizeof(U32));
- return (newObj);
+ const Long numSlots = 1LL << self->lgSize;
+ u32Table* newObj = (u32Table*) shallowCopy((void*) self, sizeof(u32Table));
+ newObj->slots = (U32*) shallowCopy((void*) self->slots, ((size_t) numSlots) * sizeof(U32));
+ return newObj;
}
-// newObj->validBits = self->validBits;
-// newObj->lgSize = self->lgSize;
-// newObj->numItems = self->numItems;
-
-/*******************************************************/
-
-void u32TableFree (u32Table * self) {
+void u32TableFree(u32Table* self) {
if (self != NULL) {
- if (self->slots != NULL) fm85free (self->slots);
- fm85free (self);
+ if (self->slots != NULL) fm85free(self->slots);
+ fm85free(self);
}
}
-/*******************************************************/
-
-void u32TableShow (u32Table * self) {
+void u32TableShow(const u32Table* self) {
Long tableSize = 1LL << self->lgSize;
printf ("\nu32Table (%d valid bits; %lld of %lld slots occupied)\n",
self->validBits, (long long int) self->numItems, (long long int) tableSize);
@@ -87,45 +74,35 @@ void u32TableShow (u32Table * self) {
fflush (stdout);
}
-/*******************************************************/
-
-void u32TableClear (u32Table * self) { // clear the table without resizing it
- Long tableSize = 1LL << self->lgSize;
- U32 * arr = self->slots;
- Long i;
- for (i = 0; i < tableSize; i++) { arr[i] = ALL32BITS; }
+void u32TableClear(u32Table* self) { // clear the table without resizing it
+ const Long tableSize = 1LL << self->lgSize;
+ U32* arr = self->slots;
+ for (Long i = 0; i < tableSize; i++) { arr[i] = ALL32BITS; }
self->numItems = 0;
}
-/*******************************************************/
-
-void printU32Array (U32 * array, Long arrayLength) {
- Long i = 0;
+void printU32Array(const U32* array, Long arrayLength) {
printf ("\nu32Array [%lld]\n", (long long int) arrayLength);
- for (i = 0; i < arrayLength; i++) {
+ for (Long i = 0; i < arrayLength; i++) {
printf ("%d:\t%8X\n", (int) i, array[i]);
}
- fflush (stdout);
+ fflush(stdout);
}
-/*******************************************************/
-
#define U32_TABLE_LOOKUP_SHARED_CODE_SECTION \
- Long tableSize = 1LL << self->lgSize; \
- Long mask = tableSize - 1LL; \
- Short shift = self->validBits - self->lgSize; \
+ const Long tableSize = 1LL << self->lgSize; \
+ const Long mask = tableSize - 1LL; \
+ const Short shift = self->validBits - self->lgSize; \
Long probe = ((Long) item) >> shift; \
if (probe < 0 || probe > mask) throw std::out_of_range("probe out of range"); \
- U32 * arr = self->slots; \
+ U32* arr = self->slots; \
U32 fetched = arr[probe]; \
while (fetched != item && fetched != ALL32BITS) { \
probe = (probe + 1) & mask; \
fetched = arr[probe]; \
}
-/*******************************************************/
-
-void u32TableMustInsert (u32Table * self, U32 item) {
+void u32TableMustInsert(u32Table* self, U32 item) {
U32_TABLE_LOOKUP_SHARED_CODE_SECTION;
if (fetched == item) { throw std::logic_error("item exists"); }
else {
@@ -135,58 +112,48 @@ void u32TableMustInsert (u32Table * self, U32 item) {
}
}
-/*******************************************************/
-
// This one is specifically tailored to be part of our fm85 decompression scheme.
-u32Table * makeU32TableFromPairsArray (U32 * pairs, Long numPairs, Short sketchLgK) {
+u32Table* makeU32TableFromPairsArray(const U32* pairs, Long numPairs, Short sketchLgK) {
Short lgNumSlots = 2;
while (u32TableUpsizeDenom * numPairs > u32TableUpsizeNumer * (1LL << lgNumSlots)) { lgNumSlots++; }
- u32Table * table = u32TableMake (lgNumSlots, 6 + sketchLgK); // Already filled with the "Empty" value which is ALL32BITS.
- Long i = 0;
+ u32Table* table = u32TableMake(lgNumSlots, 6 + sketchLgK); // Already filled with the "Empty" value which is ALL32BITS.
// Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array.
// However, we are starting out with the correct final table size, so the problem might not occur.
- for (i = 0; i < numPairs; i++) {
- u32TableMustInsert (table, pairs[i]);
+ for (Long i = 0; i < numPairs; i++) {
+ u32TableMustInsert(table, pairs[i]);
}
table->numItems = numPairs;
- return (table);
+ return table;
}
-/*******************************************************/
-
-void privateU32TableRebuild (u32Table * self, Short newLgSize) {
+void privateU32TableRebuild(u32Table* self, Short newLgSize) {
if (newLgSize < 2) throw std::logic_error("newLgSize < 2");
- Long newSize = (1LL << newLgSize);
- Long oldSize = (1LL << self->lgSize);
- // printf ("rebuilding: %lld -> %lld; %lld items in table\n", oldSize, newSize, self->numItems); fflush (stdout);
+ const Long newSize = 1LL << newLgSize;
+ const Long oldSize = 1LL << self->lgSize;
if (newSize <= self->numItems) throw std::logic_error("newSize <= numItems");
- U32 * oldSlots = self->slots;
- U32 * newSlots = (U32 *) fm85alloc ((size_t) (newSize * sizeof(U32)));
+ U32* oldSlots = self->slots;
+ U32* newSlots = (U32*) fm85alloc((size_t) (newSize * sizeof(U32)));
if (newSlots == NULL) throw std::bad_alloc();
- Long i;
- for (i = 0; i < newSize; i++) {
+ for (Long i = 0; i < newSize; i++) {
newSlots[i] = ALL32BITS;
}
self->slots = newSlots;
self->lgSize = newLgSize;
- for (i = 0; i < oldSize; i++) {
+ for (Long i = 0; i < oldSize; i++) {
U32 item = oldSlots[i];
if (item != ALL32BITS) {
- u32TableMustInsert (self, item);
+ u32TableMustInsert(self, item);
}
}
- fm85free (oldSlots);
- return;
+ fm85free(oldSlots);
}
-/*******************************************************/
-
// Returns true iff the item was new and was therefore added to the table.
-Boolean u32TableMaybeInsert (u32Table * self, U32 item) {
+bool u32TableMaybeInsert(u32Table* self, U32 item) {
U32_TABLE_LOOKUP_SHARED_CODE_SECTION;
- if (fetched == item) { return 0; }
+ if (fetched == item) { return false; }
else {
if (fetched != ALL32BITS) throw std::logic_error("could not insert");
arr[probe] = item;
@@ -194,17 +161,15 @@ Boolean u32TableMaybeInsert (u32Table * self, U32 item) {
while (u32TableUpsizeDenom * self->numItems > u32TableUpsizeNumer * (1LL << self->lgSize)) {
privateU32TableRebuild(self, self->lgSize + 1);
}
- return 1;
+ return true;
}
}
-/*******************************************************/
-
// Returns true iff the item was present and was therefore removed from the table.
-Boolean u32TableMaybeDelete (u32Table * self, U32 item) {
+bool u32TableMaybeDelete(u32Table* self, U32 item) {
U32_TABLE_LOOKUP_SHARED_CODE_SECTION;
- if (fetched == ALL32BITS) { return 0; }
+ if (fetched == ALL32BITS) { return false; }
else {
if (fetched != item) throw std::logic_error("item does not exist");
// delete the item
@@ -224,24 +189,22 @@ Boolean u32TableMaybeDelete (u32Table * self, U32 item) {
while (u32TableDownsizeDenom * self->numItems < u32TableDownsizeNumer * (1LL << self->lgSize) && self->lgSize > 2) {
privateU32TableRebuild(self, self->lgSize - 1);
}
- return 1;
+ return true;
}
}
-/*******************************************************/
-
// While extracting the items from a linear probing hashtable,
// this will usually undo the wrap-around provided that the table
// isn't too full. Experiments suggest that for sufficiently large tables
// the load factor would have to be over 90 percent before this would fail frequently,
// and even then the subsequent sort would fix things up.
-U32 * u32TableUnwrappingGetItems (u32Table * self, Long * returnNumItems) {
+U32* u32TableUnwrappingGetItems(const u32Table* self, Long* returnNumItems) {
*returnNumItems = self->numItems;
if (self->numItems < 1) { return (NULL); }
- U32 * slots = self->slots;
- Long tableSize = (1LL << self->lgSize);
- U32 * result = (U32 *) fm85alloc ((size_t) (self->numItems * sizeof(U32)));
+ U32* slots = self->slots;
+ const Long tableSize = 1LL << self->lgSize;
+ U32* result = (U32*) fm85alloc((size_t) (self->numItems * sizeof(U32)));
if (result == NULL) throw std::bad_alloc();
Long i = 0;
Long l = 0;
@@ -261,32 +224,31 @@ U32 * u32TableUnwrappingGetItems (u32Table * self, Long * returnNumItems) {
if (look != ALL32BITS) { result[l++] = look; }
}
if (l != r + 1) throw std::logic_error("unwrapping error");
- return (result);
+ return result;
}
-/*******************************************************/
// The Java version won't need this, because it provides a good array sort.
-void u32KnuthShellSort3(U32 a[], Long l, Long r)
-{ Long i, h;
- for (h = 1; h <= (r-l)/9; h = 3*h+1) ;
+void u32KnuthShellSort3(U32 a[], Long l, Long r) {
+ Long h;
+ for (h = 1; h <= (r-l)/9; h = 3*h+1);
for ( ; h > 0; h /= 3) {
- for (i = l+h; i <= r; i++) {
- Long j = i; U32 v = a[i];
- while (j >= l+h && v < a[j-h])
- { a[j] = a[j-h]; j -= h; }
+ for (Long i = l+h; i <= r; i++) {
+ Long j = i; U32 v = a[i];
+ while (j >= l + h && v < a[j - h]) {
+ a[j] = a[j - h]; j -= h;
+ }
a[j] = v;
}
}
Long bad = 0;
- for (i = l; i < r-1; i++) {
- if (a[i] > a[i+1]) bad++;
+ for (Long i = l; i < r - 1; i++) {
+ if (a[i] > a[i + 1]) bad++;
};
if (bad != 0) throw std::logic_error("sorting error");
}
-/*******************************************************/
// In applications where the input array is already nearly sorted,
// insertion sort runs in linear time with a very small constant.
// This introspective version of insertion sort protects against
@@ -294,49 +256,38 @@ void u32KnuthShellSort3(U32 a[], Long l, Long r)
// It keeps track of how much work has been done, and if that exceeds a
// constant times the array length, it switches to a different sorting algorithm.
-void introspectiveInsertionSort(U32 a[], Long l, Long r) // r points AT the rightmost element
-{ Long i;
- Long length = r - l + 1;
+void introspectiveInsertionSort(U32 a[], Long l, Long r) { // r points AT the rightmost element
+ const Long length = r - l + 1;
Long cost = 0;
Long costLimit = 8 * length;
- for (i = l+1; i <= r; i++) {
+ for (Long i = l + 1; i <= r; i++) {
Long j = i;
U32 v = a[i];
- while (j >= l+1 && v < a[j-1]) {
- a[j] = a[j-1];
+ while (j >= l + 1 && v < a[j - 1]) {
+ a[j] = a[j - 1];
j -= 1;
}
a[j] = v;
- cost += (i - j); // distance moved is a measure of work
+ cost += i - j; // distance moved is a measure of work
if (cost > costLimit) {
- //fprintf (stderr, "switching to the other sorting algorithm\n"); fflush (stderr);
u32KnuthShellSort3(a, l, r); // In the Java version, this should be the system's array sort.
return;
}
}
- // The following sanity check can eventually go away, but it seems like a
- // good idea to perform it while the code is under development.
- // Long bad = 0;
- // for (i = l; i < r-1; i++) {
- // if (a[i] > a[i+1]) bad++;
- // };
- // assert (bad == 0);
}
-// printf ("cost was %lld (arrlen=%lld)\n", cost, length); fflush (stdout);
-
-
-/******************************************************/
// This merge is safe to use in carefully designed overlapping scenarios.
-void u32Merge (U32 * arrA, Long startA, Long lengthA, // input
- U32 * arrB, Long startB, Long lengthB, // input
- U32 * arrC, Long startC) { // output
- Long lengthC = lengthA + lengthB;
- Long limA = startA + lengthA;
- Long limB = startB + lengthB;
- Long limC = startC + lengthC;
+void u32Merge(
+ const U32* arrA, Long startA, Long lengthA, // input
+ const U32* arrB, Long startB, Long lengthB, // input
+ U32* arrC, Long startC) // output
+{
+ const Long lengthC = lengthA + lengthB;
+ const Long limA = startA + lengthA;
+ const Long limB = startB + lengthB;
+ const Long limC = startC + lengthC;
Long a = startA;
Long b = startB;
Long c = startC;
@@ -350,12 +301,9 @@ void u32Merge (U32 * arrA, Long startA, Long lengthA, // input
}
-
-/*******************************************************/
-
#ifdef TOKUDA
-Long tokudaIncrements [48] =
+const Long tokudaIncrements [48] =
{ 1LL, 4LL, 9LL, 20LL, 46LL, 103LL, 233LL, 525LL, 1182LL, 2660LL, 5985LL, 13467LL, 30301LL, 68178LL,
153401LL, 345152LL, 776591LL, 1747331LL, 3931496LL, 8845866LL, 19903198LL, 44782196LL,
100759940LL, 226709866LL, 510097200LL, 1147718700LL, 2582367076LL, 5810325920LL,
@@ -367,15 +315,16 @@ Long tokudaIncrements [48] =
// Call with r pointing AT the rightmost element of the subarray.
-void u32TokudaShellSort(U32 a[], Long l, Long r)
-{ Long i, h, k;
- for (k = 0; tokudaIncrements[k] <= (r-l)/5; k++) ;
+void u32TokudaShellSort(U32 a[], Long l, Long r) {
+ Long k;
+ for (k = 0; tokudaIncrements[k] <= (r - l) / 5; k++);
for (; k >= 0; k--) {
- h = tokudaIncrements[k];
- for (i = l+h; i <= r; i++) {
+ Long h = tokudaIncrements[k];
+ for (Long i = l + h; i <= r; i++) {
Long j = i; U32 v = a[i];
- while (j >= l+h && v < a[j-h])
- { a[j] = a[j-h]; j -= h; }
+ while (j >= l+h && v < a[j - h]) {
+ a[j] = a[j - h]; j -= h;
+ }
a[j] = v;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org