You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/20 15:07:52 UTC
[doris-thirdparty] branch clucene updated: [fix](chinese) fix chinese word cut memory leak (#95)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 0a06d9f9 [fix](chinese) fix chinese word cut memory leak (#95)
0a06d9f9 is described below
commit 0a06d9f9da49574b9e03a70a28ab9f58fe6e7174
Author: zzzxl <33...@users.noreply.github.com>
AuthorDate: Tue Jun 20 23:07:47 2023 +0800
[fix](chinese) fix chinese word cut memory leak (#95)
---
src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp | 11 +++++++----
src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h | 3 +++
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
index 8d7d8674..6874555a 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
@@ -33,7 +33,13 @@ LanguageBasedAnalyzer::LanguageBasedAnalyzer(const TCHAR *language, bool stem, A
this->mode = mode;
}
-LanguageBasedAnalyzer::~LanguageBasedAnalyzer() = default;
+LanguageBasedAnalyzer::~LanguageBasedAnalyzer() {
+ if (streams) {
+ _CLDELETE(streams->filteredTokenStream);
+ _CLDELETE(streams);
+ }
+ _CLLDELETE(stopSet);
+}
void LanguageBasedAnalyzer::setStopWords(const TCHAR** stopwords) {
StopFilter::fillStopTable(stopSet, stopwords);
@@ -58,8 +64,6 @@ void LanguageBasedAnalyzer::initDict(const std::string &dictPath) {
}
TokenStream *LanguageBasedAnalyzer::reusableTokenStream(const TCHAR * /*fieldName*/, CL_NS(util)::Reader *reader) {
- SavedStreams* streams = reinterpret_cast<SavedStreams*>(getPreviousTokenStream());
-
if (streams == nullptr) {
streams = _CLNEW SavedStreams();
if (_tcscmp(lang, _T("cjk")) == 0) {
@@ -89,7 +93,6 @@ TokenStream *LanguageBasedAnalyzer::reusableTokenStream(const TCHAR * /*fieldNam
streams->filteredTokenStream =
_CLNEW StopFilter(streams->filteredTokenStream, true, stopSet);
}
- setPreviousTokenStream(streams);
} else {
streams->tokenStream->reset(reader);
}
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
index 7c07a882..a7f0c7cf 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
@@ -48,6 +48,9 @@ public:
void initDict(const std::string &dictPath);
TokenStream *tokenStream(const TCHAR *fieldName, CL_NS(util)::Reader *reader) override;
TokenStream *reusableTokenStream(const TCHAR * /*fieldName*/, CL_NS(util)::Reader *reader) override;
+
+private:
+ SavedStreams* streams = nullptr;
};
CL_NS_END
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org