commit 1cd2509ed74ae47965006d16de3c09db029b4efe Author: Gianfranco Costamagna <costamagnagianfranco@yahoo.it> Date: Mon Jan 4 23:45:13 2021 +0100 Fix various cmake issues: "CMAKE_INSTALL_FULL_LIBDIR" not being correctly evaluated and used pkgconfig directory wrongly set to include instead of lib cmake directory wrongly set to include instead of lib core_libname contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR variables not being substituted to cmake.in files cmake helpers not being correctly set diff --git a/CMakeLists.txt b/CMakeLists.txt index 41de688..71dbbd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) endif() set(LIB_DESTINATION - "${CMAKE_INSTALL_FULL_LIBDIR}" CACHE STRING "Define lib output directory name") + "${CMAKE_INSTALL_LIBDIR}" CACHE STRING "Define lib output directory name") #################################### diff --git a/src/config/CMakeLists.txt b/src/config/CMakeLists.txt index e5e6624..fe8e8b8 100644 --- a/src/config/CMakeLists.txt +++ b/src/config/CMakeLists.txt @@ -1,16 +1,16 @@ #################################### # Set config vars #################################### -set(core_libname, "lucene++") -set(contrib_libname, "lucene++-contrib") +set(core_libname "lucene++") +set(contrib_libname "lucene++-contrib") set( - PACKAGE_CMAKE_INSTALL_INCLUDEDIR, - "${lucene++_INCLUDE_DIR}/lucene++/") + PACKAGE_CMAKE_INSTALL_INCLUDEDIR + "${CMAKE_INSTALL_INCLUDEDIR}/lucene++/") set( - PACKAGE_CMAKE_INSTALL_LIBDIR, - "${LIB_INSTALL_DIR}/cmake") + PACKAGE_CMAKE_INSTALL_LIBDIR + "${LIB_DESTINATION}") #################################### diff --git a/src/config/contrib/CMakeLists.txt b/src/config/contrib/CMakeLists.txt index c0dd86f..b4a4391 100644 --- a/src/config/contrib/CMakeLists.txt +++ b/src/config/contrib/CMakeLists.txt @@ -9,7 +9,7 @@ if(NOT WIN32) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc" - DESTINATION "include/pkgconfig") + DESTINATION "${LIB_DESTINATION}/pkgconfig") endif() @@ -19,7 +19,8 @@ endif() configure_package_config_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contribConfig.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" - INSTALL_DESTINATION "${LIB_DESTINATION}/cmake") + INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib" + PATH_VARS contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" @@ -30,4 +31,4 @@ install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" - DESTINATION "include/cmake") + DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib") diff --git a/src/config/contrib/liblucene++-contribConfig.cmake.in b/src/config/contrib/liblucene++-contribConfig.cmake.in index f92f683..85fdfd2 100644 --- a/src/config/contrib/liblucene++-contribConfig.cmake.in +++ b/src/config/contrib/liblucene++-contribConfig.cmake.in @@ -20,6 +20,6 @@ if (NOT DEFINED set_and_check) endif() -set_and_check(liblucene++-contrib_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@contrib_libname@") -set_and_check(liblucene++-contrib_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") -set(liblucene++-contrib_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@contrib_libname@") +set_and_check(liblucene++-contrib_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") +set_and_check(liblucene++-contrib_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") +set(liblucene++-contrib_LIBRARIES "@contrib_libname@") diff --git a/src/config/core/CMakeLists.txt b/src/config/core/CMakeLists.txt index a3eb17a..65376f5 100644 --- a/src/config/core/CMakeLists.txt +++ b/src/config/core/CMakeLists.txt @@ -9,7 +9,7 @@ if(NOT WIN32) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc" - DESTINATION "include/pkgconfig") + DESTINATION "${LIB_DESTINATION}/pkgconfig") endif() @@ -19,7 +19,8 @@ endif() configure_package_config_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++Config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" - INSTALL_DESTINATION "${LIB_DESTINATION}/cmake") + INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++" + PATH_VARS core_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake @@ -30,4 +31,4 @@ install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake" - DESTINATION "include/cmake") + DESTINATION "${LIB_DESTINATION}/cmake/liblucene++") diff --git a/src/config/core/liblucene++Config.cmake.in b/src/config/core/liblucene++Config.cmake.in index 89b48a3..574f812 100644 --- a/src/config/core/liblucene++Config.cmake.in +++ b/src/config/core/liblucene++Config.cmake.in @@ -20,8 +20,8 @@ if (NOT DEFINED set_and_check) endif() -set_and_check(liblucene++_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@core_libname@") -set_and_check(liblucene++_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") -set(liblucene++_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@core_libname@") +set_and_check(liblucene++_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") +set_and_check(liblucene++_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") +set(liblucene++_LIBRARIES "@core_libname@") commit 127492f1ab85fd74ab9f9dff0be5b0a63bf7c271 Merge: df65bf5 fd9eaf1 Author: Alan Wright <85800+alanw@users.noreply.github.com> Date: Tue Dec 29 17:46:39 2020 +0000 Merge pull request #160 from Kakueeen/master fix a bug of ChineseTokenizer commit fd9eaf10c49239d700af848062acc1d5efd54aa8 Author: liuzhangjian <liuzhangjian@uniontech.com> Date: Fri Dec 4 15:41:31 2020 +0800 Title:fix a bug of ChineseTokenizer Description:When I use ChineseAnalyzer for Chinese word segmentation, I find that English and numbers are treated as one word and I think they should be separated. RootCause:Null Solution: diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp index d2a19f3..8313445 100644 --- a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp +++ b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp @@ -38,7 +38,7 @@ bool ChineseFilter::incrementToken() { if (text.length() > 1) { return true; } - } else if (UnicodeUtil::isOther(text[0])) { + } else if (UnicodeUtil::isOther(text[0]) || UnicodeUtil::isDigit(text[0])) { // One Chinese character as one Chinese word. // Chinese word extraction to be added later here. return true; diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp index 38bf987..3b4de74 100644 --- a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp +++ b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp @@ -65,6 +65,7 @@ bool ChineseTokenizer::incrementToken() { length = 0; start = offset; + bool last_is_en = false, last_is_num = false; while (true) { wchar_t c; @@ -82,11 +83,30 @@ bool ChineseTokenizer::incrementToken() { c = ioBuffer[bufferIndex++]; } - if (UnicodeUtil::isDigit(c) || UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) { + if (UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) { + if (last_is_num) { + --bufferIndex; + --offset; + return flush(); + } + + push(c); + if (length == MAX_WORD_LEN) { + return flush(); + } + last_is_en = true; + } else if (UnicodeUtil::isDigit(c)) { + if (last_is_en) { + --bufferIndex; + --offset; + return flush(); + } + push(c); if (length == MAX_WORD_LEN) { return flush(); } + last_is_num = true; } else if (UnicodeUtil::isOther(c)) { if (length > 0) { --bufferIndex;