commit 1cd2509ed74ae47965006d16de3c09db029b4efe
Author: Gianfranco Costamagna <costamagnagianfranco@yahoo.it>
Date:   Mon Jan 4 23:45:13 2021 +0100

    Fix various cmake issues:
    "CMAKE_INSTALL_FULL_LIBDIR" not being correctly evaluated and used
    pkgconfig directory wrongly set to include instead of lib
    cmake directory wrongly set to include instead of lib
    core_libname contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR variables not being substituted to cmake.in files
    cmake helpers not being correctly set

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41de688..71dbbd5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
 endif()
 
 set(LIB_DESTINATION
-  "${CMAKE_INSTALL_FULL_LIBDIR}" CACHE STRING "Define lib output directory name")
+  "${CMAKE_INSTALL_LIBDIR}" CACHE STRING "Define lib output directory name")
 
 
 ####################################
diff --git a/src/config/CMakeLists.txt b/src/config/CMakeLists.txt
index e5e6624..fe8e8b8 100644
--- a/src/config/CMakeLists.txt
+++ b/src/config/CMakeLists.txt
@@ -1,16 +1,16 @@
 ####################################
 # Set config vars
 ####################################
-set(core_libname, "lucene++")
-set(contrib_libname, "lucene++-contrib")
+set(core_libname "lucene++")
+set(contrib_libname "lucene++-contrib")
 
 set(
-    PACKAGE_CMAKE_INSTALL_INCLUDEDIR,
-    "${lucene++_INCLUDE_DIR}/lucene++/")
+    PACKAGE_CMAKE_INSTALL_INCLUDEDIR
+    "${CMAKE_INSTALL_INCLUDEDIR}/lucene++/")
 
 set(
-    PACKAGE_CMAKE_INSTALL_LIBDIR,
-    "${LIB_INSTALL_DIR}/cmake")
+    PACKAGE_CMAKE_INSTALL_LIBDIR
+    "${LIB_DESTINATION}")
 
 
 ####################################
diff --git a/src/config/contrib/CMakeLists.txt b/src/config/contrib/CMakeLists.txt
index c0dd86f..b4a4391 100644
--- a/src/config/contrib/CMakeLists.txt
+++ b/src/config/contrib/CMakeLists.txt
@@ -9,7 +9,7 @@ if(NOT WIN32)
   install(
     FILES
       "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc"
-    DESTINATION "include/pkgconfig")
+    DESTINATION "${LIB_DESTINATION}/pkgconfig")
 endif()
 
 
@@ -19,7 +19,8 @@ endif()
 configure_package_config_file(
   "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contribConfig.cmake.in"
   "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake"
-  INSTALL_DESTINATION "${LIB_DESTINATION}/cmake")
+  INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib"
+  PATH_VARS contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR)
 
 write_basic_package_version_file(
   "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake"
@@ -30,4 +31,4 @@ install(
   FILES
     "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake"
-  DESTINATION "include/cmake")
+  DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib")
diff --git a/src/config/contrib/liblucene++-contribConfig.cmake.in b/src/config/contrib/liblucene++-contribConfig.cmake.in
index f92f683..85fdfd2 100644
--- a/src/config/contrib/liblucene++-contribConfig.cmake.in
+++ b/src/config/contrib/liblucene++-contribConfig.cmake.in
@@ -20,6 +20,6 @@ if (NOT DEFINED set_and_check)
 endif()
 
 
-set_and_check(liblucene++-contrib_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@contrib_libname@")
-set_and_check(liblucene++-contrib_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
-set(liblucene++-contrib_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@contrib_libname@")
+set_and_check(liblucene++-contrib_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
+set_and_check(liblucene++-contrib_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@")
+set(liblucene++-contrib_LIBRARIES "@contrib_libname@")
diff --git a/src/config/core/CMakeLists.txt b/src/config/core/CMakeLists.txt
index a3eb17a..65376f5 100644
--- a/src/config/core/CMakeLists.txt
+++ b/src/config/core/CMakeLists.txt
@@ -9,7 +9,7 @@ if(NOT WIN32)
   install(
     FILES
       "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc"
-    DESTINATION "include/pkgconfig")
+    DESTINATION "${LIB_DESTINATION}/pkgconfig")
 endif()
 
 
@@ -19,7 +19,8 @@ endif()
 configure_package_config_file(
   "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++Config.cmake.in"
   "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake"
-  INSTALL_DESTINATION "${LIB_DESTINATION}/cmake")
+  INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++"
+  PATH_VARS core_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR)
 
 write_basic_package_version_file(
   ${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake
@@ -30,4 +31,4 @@ install(
   FILES
     "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake"
-  DESTINATION "include/cmake")
+  DESTINATION "${LIB_DESTINATION}/cmake/liblucene++")
diff --git a/src/config/core/liblucene++Config.cmake.in b/src/config/core/liblucene++Config.cmake.in
index 89b48a3..574f812 100644
--- a/src/config/core/liblucene++Config.cmake.in
+++ b/src/config/core/liblucene++Config.cmake.in
@@ -20,8 +20,8 @@ if (NOT DEFINED set_and_check)
 endif()
 
 
-set_and_check(liblucene++_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@core_libname@")
-set_and_check(liblucene++_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
-set(liblucene++_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@core_libname@")
+set_and_check(liblucene++_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
+set_and_check(liblucene++_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@")
+set(liblucene++_LIBRARIES "@core_libname@")
 
 

commit 127492f1ab85fd74ab9f9dff0be5b0a63bf7c271
Merge: df65bf5 fd9eaf1
Author: Alan Wright <85800+alanw@users.noreply.github.com>
Date:   Tue Dec 29 17:46:39 2020 +0000

    Merge pull request #160 from Kakueeen/master
    
    fix a bug of ChineseTokenizer


commit fd9eaf10c49239d700af848062acc1d5efd54aa8
Author: liuzhangjian <liuzhangjian@uniontech.com>
Date:   Fri Dec 4 15:41:31 2020 +0800

    Title:fix a bug of ChineseTokenizer
    
    Description:When I use ChineseAnalyzer for Chinese word segmentation, I find that English and numbers are treated as one word and I think they should be separated.
    
    RootCause:Null
    
    Solution:

diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
index d2a19f3..8313445 100644
--- a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
+++ b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
@@ -38,7 +38,7 @@ bool ChineseFilter::incrementToken() {
                 if (text.length() > 1) {
                     return true;
                 }
-            } else if (UnicodeUtil::isOther(text[0])) {
+            } else if (UnicodeUtil::isOther(text[0]) || UnicodeUtil::isDigit(text[0])) {
                 // One Chinese character as one Chinese word.
                 // Chinese word extraction to be added later here.
                 return true;
diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
index 38bf987..3b4de74 100644
--- a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
+++ b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
@@ -65,6 +65,7 @@ bool ChineseTokenizer::incrementToken() {
 
     length = 0;
     start = offset;
+    bool last_is_en = false, last_is_num = false;
 
     while (true) {
         wchar_t c;
@@ -82,11 +83,30 @@ bool ChineseTokenizer::incrementToken() {
             c = ioBuffer[bufferIndex++];
         }
 
-        if (UnicodeUtil::isDigit(c) || UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) {
+        if (UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) {
+            if (last_is_num) {
+                --bufferIndex;
+                --offset;
+                return flush();
+            }
+
+            push(c);
+            if (length == MAX_WORD_LEN) {
+                return flush();
+            }
+            last_is_en = true;
+        } else if (UnicodeUtil::isDigit(c)) {
+            if (last_is_en) {
+                --bufferIndex;
+                --offset;
+                return flush();
+            }
+
             push(c);
             if (length == MAX_WORD_LEN) {
                 return flush();
             }
+            last_is_num = true;
         } else if (UnicodeUtil::isOther(c)) {
             if (length > 0) {
                 --bufferIndex;