You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/09/07 08:39:52 UTC
[GitHub] [arrow] AndrewTsao commented on pull request #8104: fast exit loop if length equals zero
AndrewTsao commented on pull request #8104:
URL: https://github.com/apache/arrow/pull/8104#issuecomment-688166550
On my windows machine, test result. BM_ParseUnsignedBreak is use break statment to break unroll loop. (/O2 /Oi)
```
2020-09-07T13:28:06+08:00
Running release\Release\test_strtoll.exe
Run on (16 X 2095 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 1024 KiB (x8)
L3 Unified 11264 KiB (x1)
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_ParseUnsignedBreak 292 ns 292 ns 2243575
BM_ParseUnsigned 262 ns 260 ns 2639500
```
On my linux machines, (/O3)
```
Running ./centos8/test_strtoll
Run on (4 X 2200 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 256 KiB (x4)
L3 Unified 30720 KiB (x4)
Load Average: 0.61, 0.16, 0.05
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_ParseUnsignedBreak 236 ns 236 ns 2955433
BM_ParseUnsigned 237 ns 236 ns 2963977
```
Bellow is my benchmark code.
```cpp
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#if defined(__GNUC__)
#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#define ARROW_NORETURN __attribute__((noreturn))
#define ARROW_NOINLINE __attribute__((noinline))
#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
#elif defined(_MSC_VER)
#define ARROW_NORETURN __declspec(noreturn)
#define ARROW_NOINLINE __declspec(noinline)
#define ARROW_PREDICT_FALSE(x) (x)
#define ARROW_PREDICT_TRUE(x) (x)
#define ARROW_PREFETCH(addr)
#else
#define ARROW_NORETURN
#define ARROW_PREDICT_FALSE(x) (x)
#define ARROW_PREDICT_TRUE(x) (x)
#define ARROW_PREFETCH(addr)
#endif
#define PARSE_UNSIGNED_ITERATION_BREAK(C_TYPE) \
if (length > 0) { \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
length--; \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
result = static_cast<C_TYPE>(result + digit); \
} else { \
break; \
}
#define PARSE_UNSIGNED_ITERATION(C_TYPE) \
if (length > 0) { \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
length--; \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
result = static_cast<C_TYPE>(result + digit); \
}
#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \
if (length > 0) { \
if (ARROW_PREDICT_FALSE(result > \
std::numeric_limits<C_TYPE>::max() / 10U)) { \
/* Overflow */ \
return false; \
} \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
C_TYPE new_result = static_cast<C_TYPE>(result + digit); \
if (ARROW_PREDICT_FALSE(--length > 0)) { \
/* Too many digits */ \
return false; \
} \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
if (ARROW_PREDICT_FALSE(new_result < result)) { \
/* Overflow */ \
return false; \
} \
result = new_result; \
}
inline uint8_t ParseDecimalDigit(char c) {
return static_cast<uint8_t>(c - '0');
}
inline bool ParseUnsignedBreak(const char* s, size_t length, uint64_t* out) {
uint64_t result = 0;
do {
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
} while (false);
*out = result;
return true;
}
inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
uint64_t result = 0;
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
*out = result;
return true;
}
inline bool ParseUnsignedBreak(const char* s, size_t length, uint32_t* out) {
uint32_t result = 0;
do {
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
} while (false);
*out = result;
return true;
}
static uint64_t x = 0;
static void BM_ParseUnsigned(benchmark::State& state) {
// Perform setup here
std::vector<std::string> samples;
for (size_t i = 0; i < 22; i++) {
samples.emplace_back(std::string(i, '1'));
}
for (auto _ : state) {
// This code gets timed
for (auto& sample : samples) {
uint64_t val;
ParseUnsigned(sample.c_str(), sample.size(), &val);
x += val;
}
}
}
static void BM_ParseUnsignedBreak(benchmark::State& state) {
// Perform setup here
std::vector<std::string> samples;
for (size_t i = 0; i < 22; i++) {
samples.emplace_back(std::string(i, '1'));
}
for (auto _ : state) {
// This code gets timed
for (auto& sample : samples) {
uint64_t val;
ParseUnsignedBreak(sample.c_str(), sample.size(), &val);
x += val;
}
}
// std::cout << x << std::endl;
}
// Register the function as a benchmark
BENCHMARK(BM_ParseUnsignedBreak);
BENCHMARK(BM_ParseUnsigned);
// Run the benchmark
BENCHMARK_MAIN();
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org