You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/10 15:12:37 UTC
[doris-thirdparty] branch clucene updated: [Fix](clucene) fix clucene makefile and file name (#27)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 641247f [Fix](clucene) fix clucene makefile and file name (#27)
641247f is described below
commit 641247fe4989c9de67693139f08d2a92af913a0d
Author: airborne12 <ai...@gmail.com>
AuthorDate: Tue Jan 10 23:12:32 2023 +0800
[Fix](clucene) fix clucene makefile and file name (#27)
---
.clang-format | 16 ++++
.../CLucene/analysis/LanguageBasedAnalyzer.cpp | 5 ++
.../CLucene/analysis/LanguageBasedAnalyzer.h | 9 ++-
.../CLucene/analysis/jieba/ChineseTokenizer.cpp | 28 ++++---
.../CLucene/analysis/jieba/ChineseTokenizer.h | 8 +-
.../CLucene/analysis/jieba/Logging.hpp | 3 +-
src/core/CLucene/analysis/Analyzers.cpp | 4 +-
src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp | 4 +-
src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h | 4 +-
.../bkd/{docIds_writer.cpp => docids_writer.cpp} | 0
.../util/bkd/{docIds_writer.h => docids_writer.h} | 0
src/core/CLucene/util/bkd/point_reader.cpp | 8 +-
src/core/CMakeLists.txt | 88 ++++++++--------------
src/test/analysis/TestAnalysis.cpp | 2 +-
src/test/analysis/TestAnalyzers.cpp | 2 +-
src/test/contribs-lib/analysis/testChinese.cpp | 10 ++-
src/test/document/TestDocument.cpp | 4 +-
src/test/tests.cpp | 13 ++--
src/test/util/TestBKD.cpp | 32 ++++----
src/test/util/TestBKD.h | 10 +--
20 files changed, 128 insertions(+), 122 deletions(-)
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..3b8b570
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,16 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -4
+AllowShortFunctionsOnASingleLine: Inline
+ColumnLimit: 100
+ConstructorInitializerIndentWidth: 8 # double of IndentWidth
+ContinuationIndentWidth: 8 # double of IndentWidth
+DerivePointerAlignment: false # always use PointerAlignment
+IndentCaseLabels: false
+IndentWidth: 4
+PointerAlignment: Left
+ReflowComments: false
+SortUsingDeclarations: false
+SpacesBeforeTrailingComments: 1
+SpaceBeforeCpp11BracedList: true
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
index 13294de..1af3a9c 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
@@ -37,6 +37,11 @@ void LanguageBasedAnalyzer::setLanguage(const TCHAR *language) {
void LanguageBasedAnalyzer::setStem(bool stem) {
this->stem = stem;
}
+void LanguageBasedAnalyzer::initDict(const std::string& dictPath) {
+ if (_tcscmp(lang, _T("chinese")) == 0) {
+ CL_NS2(analysis, jieba)::ChineseTokenizer::init(dictPath);
+ }
+}
TokenStream *LanguageBasedAnalyzer::tokenStream(const TCHAR *fieldName, Reader *reader) {
TokenStream *ret = NULL;
if (_tcscmp(lang, _T("cjk")) == 0) {
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
index 596c86b..147dc1e 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
@@ -15,12 +15,13 @@ class CLUCENE_CONTRIBS_EXPORT LanguageBasedAnalyzer: public CL_NS(analysis)::Ana
TCHAR lang[100];
bool stem;
public:
- LanguageBasedAnalyzer(const TCHAR* language=NULL, bool stem=true);
- ~LanguageBasedAnalyzer();
+ explicit LanguageBasedAnalyzer(const TCHAR* language=nullptr, bool stem=true);
+ ~LanguageBasedAnalyzer() override;
void setLanguage(const TCHAR* language);
void setStem(bool stem);
- TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
- };
+ void initDict(const std::string& dictPath);
+ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) override;
+};
CL_NS_END
#endif
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
index 02c50aa..410c514 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
@@ -2,27 +2,30 @@
#include "ChineseTokenizer.h"
#include "CLucene/util/CLStreams.h"
#include <filesystem>
+#include <memory>
namespace fs = std::filesystem;
CL_NS_DEF2(analysis,jieba)
CL_NS_USE(analysis)
CL_NS_USE(util)
-std::string get_dict_path() {
- if(const char* env_p = std::getenv("DICT_PATH")) {
- return env_p;
- }
- return "";
+std::unique_ptr<cppjieba::Jieba> ChineseTokenizer::cppjieba = nullptr;
+ChineseTokenizer::ChineseTokenizer(lucene::util::Reader *reader) : Tokenizer(reader) {
+ buffer[0] = 0;
}
-static unique_ptr<cppjieba::Jieba> cppjieba = std::make_unique<cppjieba::Jieba>(
- get_dict_path() + "dict/jieba.dict.utf8",
- get_dict_path() + "dict/hmm_model.utf8",
- get_dict_path() + "dict/user.dict.utf8",
- get_dict_path() + "dict/idf.utf8",
- get_dict_path() + "dict/stop_words.utf8");
+void ChineseTokenizer::init(const std::string &dictPath) {
+ if(cppjieba == nullptr) {
+ cppjieba = std::make_unique<cppjieba::Jieba>(
+ dictPath + "/" + "dict/jieba.dict.utf8",
+ dictPath + "/" + "dict/hmm_model.utf8",
+ dictPath + "/" + "dict/user.dict.utf8",
+ dictPath + "/" + "dict/idf.utf8",
+ dictPath + "/" + "dict/stop_words.utf8");
+ }
+}
-CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) {
+CL_NS(analysis)::Token *ChineseTokenizer::next(lucene::analysis::Token *token) {
// try to read all words
if (dataLen == 0) {
auto bufferLen = input->read((const void **) &ioBuffer, 1, 0);
@@ -32,6 +35,7 @@ CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) {
}
char tmp_buffer[4 * bufferLen];
lucene_wcsntoutf8(tmp_buffer, ioBuffer, bufferLen, 4 * bufferLen);
+ init();
cppjieba->Cut(tmp_buffer, tokens_text, true);
dataLen = tokens_text.size();
}
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
index cecdd17..61ab100 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
@@ -32,16 +32,14 @@ private:
* members of Tokenizer)
*/
const TCHAR* ioBuffer{};
- //std::unique_ptr<cppjieba::Jieba> cppjieba;
std::vector<std::string> tokens_text;
std::vector<std::unique_ptr<Token>> tokens;
public:
+ static std::unique_ptr<cppjieba::Jieba> cppjieba;
// Constructor
- explicit ChineseTokenizer(lucene::util::Reader *reader) : Tokenizer(reader) {
-
- buffer[0]=0;
- }
+ explicit ChineseTokenizer(lucene::util::Reader *reader);
+ static void init(const std::string& dictPath="");
// Destructor
~ChineseTokenizer() override {}
diff --git a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
index 77540ce..b0ec473 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
@@ -72,7 +72,8 @@ namespace limonp {
#endif
std::cerr << stream_.str() << std::endl;
if (level_ == LL_FATAL) {
- abort();
+ _CLTHROWA (CL_ERR_UNKNOWN, "failed in chinese tokenizer");
+ //abort();
}
}
diff --git a/src/core/CLucene/analysis/Analyzers.cpp b/src/core/CLucene/analysis/Analyzers.cpp
index 2783a10..6ca4183 100644
--- a/src/core/CLucene/analysis/Analyzers.cpp
+++ b/src/core/CLucene/analysis/Analyzers.cpp
@@ -507,13 +507,13 @@ Token* KeywordTokenizer::next(Token* token){
if (rd == -1)
break;
if ( upto == token->bufferLength() ){
- termBuffer = (TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + 8);
+ termBuffer = (TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + rd);
}
_tcsncpy(termBuffer + upto, readBuffer, rd);
upto += rd;
}
if ( token->bufferLength() < upto + 1 ){
- termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() + 8);
+ termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() + upto);
}
termBuffer[upto]=0;
token->setTermLength<TCHAR>(upto);
diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
index 5e94659..dc77f6b 100644
--- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
+++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
@@ -9,7 +9,7 @@ bkd_msb_radix_sorter::bkd_msb_radix_sorter(
int dim, int32_t bytes) : MSBRadixSorter(bytes), dim(dim), writer(writer), heap_writer(heap_writer) {
}
-int bkd_msb_radix_sorter::byte_at(int i, int k) {
+int bkd_msb_radix_sorter::byteAt(int i, int k) {
assert(k >= 0);
if (k < writer->bytes_per_dim_) {
// dim bytes
@@ -76,4 +76,4 @@ void bkd_msb_radix_sorter::swap(int i, int j) {
}
}
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
index 81ace9e..84db6d1 100644
--- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
+++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
@@ -23,7 +23,7 @@ public:
int dim, int32_t bytes);
protected:
- int byte_at(int i, int k) override;
+ int byteAt(int i, int k) override;
void swap(int i, int j) override;
};
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CLucene/util/bkd/docIds_writer.cpp b/src/core/CLucene/util/bkd/docids_writer.cpp
similarity index 100%
rename from src/core/CLucene/util/bkd/docIds_writer.cpp
rename to src/core/CLucene/util/bkd/docids_writer.cpp
diff --git a/src/core/CLucene/util/bkd/docIds_writer.h b/src/core/CLucene/util/bkd/docids_writer.h
similarity index 100%
rename from src/core/CLucene/util/bkd/docIds_writer.h
rename to src/core/CLucene/util/bkd/docids_writer.h
diff --git a/src/core/CLucene/util/bkd/point_reader.cpp b/src/core/CLucene/util/bkd/point_reader.cpp
index 8fc52ea..08fd24c 100644
--- a/src/core/CLucene/util/bkd/point_reader.cpp
+++ b/src/core/CLucene/util/bkd/point_reader.cpp
@@ -27,19 +27,19 @@ int64_t point_reader::split(int64_t count,
assert(result);
const std::vector<uint8_t> &packedValue = packed_value();
int64_t ordinal = ord();
- int32_t docid = docid();
+ int32_t doc_id = docid();
if (rightTree->Get(ordinal)) {
- right->append(packedValue, ordinal, docid);
+ right->append(packedValue, ordinal, doc_id);
rightCount++;
if (doClearBits) {
rightTree->Clear(ordinal);
}
} else {
- left->append(packedValue, ordinal, docid);
+ left->append(packedValue, ordinal, doc_id);
}
}
return rightCount;
}
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index c526e70..066d453 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -33,32 +33,32 @@ SET(clucene_core_Files
./CLucene/util/MD5Digester.cpp
./CLucene/util/StringIntern.cpp
./CLucene/util/BitSet.cpp
- CLucene/util/bkd/bkd_writer.cpp
- CLucene/util/bkd/bkd_reader.cpp
- CLucene/util/bkd/index_tree.cpp
- CLucene/util/bkd/packed_index_tree.cpp
- CLucene/util/bkd/legacy_index_tree.cpp
- CLucene/util/bkd/heap_point_writer.cpp
- CLucene/util/bkd/heap_point_reader.cpp
- CLucene/util/bkd/point_reader.cpp
- CLucene/util/bkd/docIds_writer.cpp
- CLucene/util/bkd/bkd_msb_radix_sorter.cpp
- CLucene/util/croaring/roaring.c
- CLucene/util/croaring/roaring.h
- CLucene/util/croaring/roaring.hh
- ./CLucene/util/BitUtil.cpp
- ./CLucene/util/BytesRef.cpp
- ./CLucene/util/BytesRefBuilder.cpp
- ./CLucene/util/CodecUtil.cpp
- ./CLucene/util/LongBitSet.cpp
- ./CLucene/util/IntroSorter.cpp
- ./CLucene/util/Sorter.cpp
- ./CLucene/util/MSBRadixSorter.cpp
- ./CLucene/util/FixedBitSet.cpp
- ./CLucene/util/FutureArrays.cpp
- ./CLucene/util/NumericUtils.cpp
- ./CLucene/util/stringUtil.cpp
- ./CLucene/queryParser/FastCharStream.cpp
+ ./CLucene/util/bkd/bkd_writer.cpp
+ ./CLucene/util/bkd/bkd_reader.cpp
+ ./CLucene/util/bkd/index_tree.cpp
+ ./CLucene/util/bkd/packed_index_tree.cpp
+ ./CLucene/util/bkd/legacy_index_tree.cpp
+ ./CLucene/util/bkd/heap_point_writer.cpp
+ ./CLucene/util/bkd/heap_point_reader.cpp
+ ./CLucene/util/bkd/point_reader.cpp
+ ./CLucene/util/bkd/docids_writer.cpp
+ ./CLucene/util/bkd/bkd_msb_radix_sorter.cpp
+ ./CLucene/util/croaring/roaring.c
+ ./CLucene/util/croaring/roaring.h
+ ./CLucene/util/croaring/roaring.hh
+ ./CLucene/util/BitUtil.cpp
+ ./CLucene/util/BytesRef.cpp
+ ./CLucene/util/BytesRefBuilder.cpp
+ ./CLucene/util/CodecUtil.cpp
+ ./CLucene/util/LongBitSet.cpp
+ ./CLucene/util/IntroSorter.cpp
+ ./CLucene/util/Sorter.cpp
+ ./CLucene/util/MSBRadixSorter.cpp
+ ./CLucene/util/FixedBitSet.cpp
+ ./CLucene/util/FutureArrays.cpp
+ ./CLucene/util/NumericUtils.cpp
+ ./CLucene/util/stringUtil.cpp
+ ./CLucene/queryParser/FastCharStream.cpp
./CLucene/queryParser/MultiFieldQueryParser.cpp
./CLucene/queryParser/QueryParser.cpp
./CLucene/queryParser/QueryParserTokenManager.cpp
@@ -69,25 +69,14 @@ SET(clucene_core_Files
./CLucene/queryParser/legacy/QueryParserBase.cpp
./CLucene/queryParser/legacy/QueryToken.cpp
./CLucene/queryParser/legacy/TokenList.cpp
-
- #./CLucene/queryParser/QueryBuilder.cpp
- #./CLucene/queryParser/classic/QueryParserBase.cpp
- #./CLucene/queryParser/classic/QueryParser.cpp
- #./CLucene/queryParser/classic/Token.cpp
- #./CLucene/queryParser/classic/TokenMgrError.cpp
- #./CLucene/queryParser/classic/CharStream.cpp
- #./CLucene/queryParser/classic/ParseException.cpp
- #./CLucene/queryParser/classic/QueryParserTokenManager.cpp
./CLucene/analysis/standard/StandardAnalyzer.cpp
./CLucene/analysis/standard/StandardFilter.cpp
./CLucene/analysis/standard/StandardTokenizer.cpp
- #./CLucene/analysis/mmseg/MMsegAnalyzer.cpp
- #./CLucene/analysis/mmseg/MmsegTokenizer.cpp
./CLucene/analysis/Analyzers.cpp
./CLucene/analysis/AnalysisHeader.cpp
./CLucene/store/MMapInput.cpp
./CLucene/store/IndexInput.cpp
- ./CLucene/store/ByteArrayDataInput.cpp
+ ./CLucene/store/ByteArrayDataInput.cpp
./CLucene/store/Lock.cpp
./CLucene/store/LockFactory.cpp
./CLucene/store/IndexOutput.cpp
@@ -113,10 +102,10 @@ SET(clucene_core_Files
./CLucene/index/Terms.cpp
./CLucene/index/MergePolicy.cpp
./CLucene/index/DocumentsWriter.cpp
- ./CLucene/index/SDocumentWriter.cpp
- ./CLucene/index/SDocumentWriter.h
- ./CLucene/index/DocumentsWriterThreadState.cpp
- ./CLucene/index/SegmentTermVector.cpp
+ ./CLucene/index/SDocumentWriter.cpp
+ ./CLucene/index/SDocumentWriter.h
+ ./CLucene/index/DocumentsWriterThreadState.cpp
+ ./CLucene/index/SegmentTermVector.cpp
./CLucene/index/TermVectorReader.cpp
./CLucene/index/FieldInfos.cpp
./CLucene/index/CompoundFile.cpp
@@ -213,21 +202,6 @@ SET(clucene_core_Files
./CLucene/search/spans/TermSpans.cpp
)
-#if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
-# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=haswell -DAVX2_ON -fstrict-aliasing")
- #ADD_DEFINITIONS(-DAVX2_ON)
- #ADD_DEFINITIONS(-DSSE2_ON)
-#endif()
-
-#SET(pfor_Files_SSE
-# ${clucene-ext_SOURCE_DIR}/for/transpose.c
-# ${clucene-ext_SOURCE_DIR}/for/bitunpack.c
-# ${clucene-ext_SOURCE_DIR}/for/bitpack.c
-# ${clucene-ext_SOURCE_DIR}/for/bitutil.c
-# ${clucene-ext_SOURCE_DIR}/for/vp4d.c
-# ${clucene-ext_SOURCE_DIR}/for/vp4c.c
-# )
-
#if USE_SHARED_OBJECT_FILES then we link directly to the object files (means rebuilding them for the core)
IF ( USE_SHARED_OBJECT_FILES )
GET_SHARED_FILES(clucene_shared_Files)
diff --git a/src/test/analysis/TestAnalysis.cpp b/src/test/analysis/TestAnalysis.cpp
index f741939..73e4337 100644
--- a/src/test/analysis/TestAnalysis.cpp
+++ b/src/test/analysis/TestAnalysis.cpp
@@ -87,7 +87,7 @@ void testChar(CuTest *tc) {
while (stream->next(&t) != NULL) {
count++;
}
- printf("count = %d\n", count);
+ //printf("count = %d\n", count);
_CLDELETE(stream);
}
diff --git a/src/test/analysis/TestAnalyzers.cpp b/src/test/analysis/TestAnalyzers.cpp
index f9a2d09..449d5e2 100644
--- a/src/test/analysis/TestAnalyzers.cpp
+++ b/src/test/analysis/TestAnalyzers.cpp
@@ -486,7 +486,7 @@
_CLLDELETE(reader);
}
-CuSuite *testanalyzers(void)
+CuSuite *testanalyzers()
{
CuSuite *suite = CuSuiteNew(_T("CLucene Analyzers Test"));
diff --git a/src/test/contribs-lib/analysis/testChinese.cpp b/src/test/contribs-lib/analysis/testChinese.cpp
index c74ba1f..d75a000 100644
--- a/src/test/contribs-lib/analysis/testChinese.cpp
+++ b/src/test/contribs-lib/analysis/testChinese.cpp
@@ -142,6 +142,13 @@ void testCJK(CuTest *tc) {
_testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2);
}
+std::string get_dict_path() {
+ if(const char* env_p = std::getenv("DICT_PATH")) {
+ return env_p;
+ }
+ return "";
+}
+
void testSimpleJiebaTokenizer(CuTest* tc) {
LanguageBasedAnalyzer a;
CL_NS(util)::StringReader reader(_T("我爱你中国"));
@@ -152,6 +159,7 @@ void testSimpleJiebaTokenizer(CuTest* tc) {
//test with chinese
a.setLanguage(_T("chinese"));
a.setStem(false);
+ a.initDict(get_dict_path());
ts = a.tokenStream(_T("contents"), &reader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -574,7 +582,7 @@ void testLanguageBasedAnalyzer(CuTest* tc) {
}
CuSuite *testchinese(void) {
- CuSuite *suite = CuSuiteNew(_T("CLucene Analysis Test"));
+ CuSuite *suite = CuSuiteNew(_T("CLucene chinese tokenizer Test"));
SUITE_ADD_TEST(suite, testFile);
SUITE_ADD_TEST(suite, testCJK);
diff --git a/src/test/document/TestDocument.cpp b/src/test/document/TestDocument.cpp
index f6f392c..b90f67d 100644
--- a/src/test/document/TestDocument.cpp
+++ b/src/test/document/TestDocument.cpp
@@ -471,8 +471,8 @@ CuSuite *testdocument(void) {
SUITE_ADD_TEST(suite, TestFields);
SUITE_ADD_TEST(suite, TestMultiSetValueField);
SUITE_ADD_TEST(suite, TestMultiAddValueField);
- SUITE_ADD_TEST(suite, TestSetFieldBench);
- SUITE_ADD_TEST(suite, TestNewFieldBench);
+ //SUITE_ADD_TEST(suite, TestSetFieldBench);
+ //SUITE_ADD_TEST(suite, TestNewFieldBench);
SUITE_ADD_TEST(suite, TestReaderValueField);
SUITE_ADD_TEST(suite, TestAddDocument);
//SUITE_ADD_TEST(suite, TestDateTools);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index 32aedad..f1aee90 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -11,10 +11,10 @@ unittest tests[] = {
// {"indexreader", testindexreader},
// {"indexsearcher", testIndexSearcher},
// {"reuters", testreuters},
-// {"analysis", testanalysis},
-// {"analyzers", testanalyzers},
-// {"document", testdocument},
-// {"field", testField},
+ {"analysis", testanalysis},
+ {"analyzers", testanalyzers},
+ {"document", testdocument},
+ {"field", testField},
// {"numbertools", testNumberTools},
// {"debug", testdebug},
// {"ramdirectory", testRAMDirectory},
@@ -39,14 +39,13 @@ unittest tests[] = {
// {"store", teststore},
// {"utf8", testutf8},
// {"bitset", testBitSet},
-// {"bkd", testBKD},
-// {"MSBRadixSorter",testMSBRadixSorter},
+ {"bkd", testBKD},
+ {"MSBRadixSorter",testMSBRadixSorter},
// {"extractterms", testExtractTerms},
// {"spanqueries", testSpanQueries},
// {"stringbuffer", testStringBuffer},
// {"termvectorsreader", testTermVectorsReader},
#ifdef TEST_CONTRIB_LIBS
{"chinese", testchinese},
- //{"germananalyzer", testGermanAnalyzer},
#endif
{"LastTest", NULL}};
diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp
index 4c43216..2dcc22c 100644
--- a/src/test/util/TestBKD.cpp
+++ b/src/test/util/TestBKD.cpp
@@ -53,14 +53,14 @@ void TestVisitor1::visit(Roaring *docID, std::vector<uint8_t> &packedValue) {
visit(*docID);
}
-void TestVisitor1::visit(bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) {
+void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) {
if (!matches(packedValue.data())) {
return;
}
- int32_t docID = iter->docID_set->nextDoc();
- while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) {
+ int32_t docID = iter->docid_set->nextDoc();
+ while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) {
hits->set(docID);
- docID = iter->docID_set->nextDoc();
+ docID = iter->docid_set->nextDoc();
}
}
@@ -376,9 +376,9 @@ void testBug1Read(CuTest *tc) {
//printf("something wrong in read\n");
printf("clucene error: %s\n", r.what());
}
- printf("hits count=%d\n", result->count());
+ //printf("hits count=%d\n", result->count());
CuAssertEquals(tc, result->count(), 6);
- printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
}
}
@@ -444,9 +444,9 @@ void testLowCardinalInts1DRead2(CuTest *tc) {
//printf("something wrong in read\n");
printf("clucene error: %s\n", r.what());
}
- printf("hits count=%d\n", hits->count());
+ //printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 12928);
- printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
}
}
@@ -475,9 +475,9 @@ void testLowCardinalInts1DRead(CuTest *tc) {
//printf("something wrong in read\n");
printf("clucene error: %s\n", r.what());
}
- printf("hits count=%d\n", hits->count());
+ //printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 256);
- printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
}
}
@@ -551,7 +551,7 @@ void testBasicsInts1DRead(CuTest *tc) {
//assertEquals(L"docID=" + to_wstring(docID), expected, actual);
}
- printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
auto hits1 = std::make_shared<BitSet>(N);
auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1);
str = Misc::currentTimeMillis();
@@ -566,7 +566,7 @@ void testBasicsInts1DRead(CuTest *tc) {
CuAssertEquals(tc, expected, actual);
//assertEquals(L"docID=" + to_wstring(docID), expected, actual);
}
- printf("\nSecond search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\nSecond search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
}
dir->close();
_CLDECDELETE(dir);
@@ -604,15 +604,15 @@ void testHttplogsRead(CuTest *tc) {
//CuAssertEquals(tc, 0, type);
r->read_index(index_in_);
r->intersect(v.get());
- printf("\ntry query result:%ld\n", r->estimate_point_count(v.get()));
- printf("\nsearch time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
+ //printf("\ntry query result:%ld\n", r->estimate_point_count(v.get()));
+ //printf("\nsearch time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str));
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
printf("clucene error: %s\n", r.what());
}
- printf("result size = %d\n", result->count());
+ //printf("result size = %d\n", result->count());
CuAssertEquals(tc, result->count(), 8445);
- printf("stats=%s\n", r->stats.to_string().c_str());
+ //printf("stats=%s\n", r->stats.to_string().c_str());
}
dir->close();
_CLDECDELETE(dir);
diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h
index 8a79552..3264313 100644
--- a/src/test/util/TestBKD.h
+++ b/src/test/util/TestBKD.h
@@ -33,7 +33,7 @@ public:
}
void visit(Roaring *docID, std::vector<uint8_t> &packedValue) override;
void visit(int docID, std::vector<uint8_t> &packedValue) override;
- void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) override;
+ void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) override;
bool matches(uint8_t *packedValue);
@@ -91,14 +91,14 @@ public:
}
visit(*docID);
};
- void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) override {
+ void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) override {
if (!matches(packedValue.data())) {
return;
}
- int32_t docID = iter->docID_set->nextDoc();
- while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) {
+ int32_t docID = iter->docid_set->nextDoc();
+ while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) {
hits->set(docID);
- docID = iter->docID_set->nextDoc();
+ docID = iter->docid_set->nextDoc();
}
};
bool matches(uint8_t *packedValue);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org