Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace icu with utf8proc #228

Merged
merged 22 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/build_arm_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ jobs:
- name: Restore submodules cache
run: |
cp -a /builder_cache/contrib ./
- name: remove old clang and link clang-17 to clang
- name: remove old clang and link clang-18 to clang
if: matrix.os == 'ubuntu-22.04'
run: |
sudo rm -f /usr/bin/clang || true
sudo ln -s /usr/bin/clang-17 /usr/bin/clang
sudo ln -s /usr/bin/clang-18 /usr/bin/clang
sudo rm -f /usr/bin/clang++ || true
sudo ln -s /usr/bin/clang++-17 /usr/bin/clang++
sudo ln -s /usr/bin/clang++-18 /usr/bin/clang++
which clang++
clang++ --version
- name: Make linux-arm64
Expand All @@ -65,11 +65,11 @@ jobs:
eval "$(pyenv init -)"
pyenv local "${{ matrix.python-version }}"
python3 -m pip install auditwheel
auditwheel -v repair -w dist/ --plat manylinux_2_17_aarch64 dist/*.whl
auditwheel -v repair -w dist/ --plat manylinux_2_18_aarch64 dist/*.whl
continue-on-error: false
- name: Show files
run: |
# e.g: remove chdb-0.11.4-cp310-cp310-linux_aarch64.whl, keep chdb-0.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
# e.g: remove chdb-0.11.4-cp310-cp310-linux_aarch64.whl, keep chdb-0.11.4-cp310-cp310-manylinux_2_18_aarch64.manylinux2014_aarch64.whl
sudo rm -f dist/*linux_aarch64.whl
ls -lh dist
shell: bash
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ jobs:
uname -a
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 17
which clang++-17
clang++-17 --version
sudo ./llvm.sh 18
which clang++-18
clang++-18 --version
sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget
ccache -s
- name: Update git
Expand Down Expand Up @@ -85,13 +85,13 @@ jobs:
key: ${{ matrix.os }}
max-size: 5G
append-timestamp: true
- name: remove old clang and link clang-17 to clang
- name: remove old clang and link clang-18 to clang
if: matrix.os == 'ubuntu-20.04'
run: |
sudo rm -f /usr/bin/clang || true
sudo ln -s /usr/bin/clang-17 /usr/bin/clang
sudo ln -s /usr/bin/clang-18 /usr/bin/clang
sudo rm -f /usr/bin/clang++ || true
sudo ln -s /usr/bin/clang++-17 /usr/bin/clang++
sudo ln -s /usr/bin/clang++-18 /usr/bin/clang++
which clang++
clang++ --version
- name: Run chdb/build.sh
Expand Down Expand Up @@ -120,7 +120,7 @@ jobs:
make wheel
- name: Install patchelf from github
run: |
wget https://github.com/NixOS/patchelf/releases/download/0.17.2/patchelf-0.17.2-x86_64.tar.gz -O patchelf.tar.gz
wget https://github.com/NixOS/patchelf/releases/download/0.18.2/patchelf-0.18.2-x86_64.tar.gz -O patchelf.tar.gz
tar -xvf patchelf.tar.gz
sudo cp bin/patchelf /usr/bin/
sudo chmod +x /usr/bin/patchelf
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -372,3 +372,6 @@
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/auxten/arrow
[submodule "contrib/utf8proc"]
path = contrib/utf8proc
url = https://github.com/JuliaStrings/utf8proc.git
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ endif()

if (ENABLE_PYTHON)
set(USE_PYTHON 1)
set(USE_UTF8PROC 1)
endif()

# Global libraries
Expand Down
68 changes: 48 additions & 20 deletions chdb/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0
-DENABLE_LIBRARIES=0 -DENABLE_RUST=0 \
${GLIBC_COMPATIBILITY} \
-DENABLE_UTILS=0 ${LLVM} ${UNWIND} \
${ICU} ${JEMALLOC} \
${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \
-DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \
-DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 \
-DENABLE_RAPIDJSON=1 \
Expand Down Expand Up @@ -161,12 +161,7 @@ LIBCHDB_SO="libchdb.so"
# Build libchdb.so
cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 ..
ninja -d keeprsp
if [ ! -f CMakeFiles/clickhouse.rsp ]; then
echo "CMakeFiles/clickhouse.rsp not found"
exit 1
fi

cp -a CMakeFiles/clickhouse.rsp CMakeFiles/libchdb.rsp

BINARY=${BUILD_DIR}/programs/clickhouse
echo -e "\nBINARY: ${BINARY}"
Expand All @@ -175,6 +170,18 @@ echo -e "\nldd ${BINARY}"
${LDD} ${BINARY}
rm -f ${BINARY}

cd ${BUILD_DIR}
ninja -d keeprsp -v > build.log || true
USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true)

if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
if [ -f CMakeFiles/clickhouse.rsp ]; then
cp -a CMakeFiles/clickhouse.rsp CMakeFiles/libchdb.rsp
else
echo "CMakeFiles/clickhouse.rsp not found"
exit 1
fi
fi

LIBCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \
| sed "s/-o programs\/clickhouse/-fPIC -shared -o ${LIBCHDB_SO}/" \
Expand All @@ -186,16 +193,21 @@ LIBCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \

# generate the command to generate libchdb.so
LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g')
${SED_INPLACE} 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g' CMakeFiles/libchdb.rsp

if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
${SED_INPLACE} 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g' CMakeFiles/libchdb.rsp
fi

if [ "$(uname)" == "Linux" ]; then
LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${PYINIT_ENTRY}'/ /g')
${SED_INPLACE} 's/ '${PYINIT_ENTRY}'/ /g' CMakeFiles/libchdb.rsp
if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
${SED_INPLACE} 's/ '${PYINIT_ENTRY}'/ /g' CMakeFiles/libchdb.rsp
fi
fi

if [ "$(uname)" == "Darwin" ]; then
LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${PYINIT_ENTRY}'/ -Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result -Wl,-exported_symbol,_query_stable_v2 -Wl,-exported_symbol,_free_result_v2/g')
${SED_INPLACE} 's/ '${PYINIT_ENTRY}'/ -Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result -Wl,-exported_symbol,_query_stable_v2 -Wl,-exported_symbol,_free_result_v2/g' CMakeFiles/libchdb.rsp
# ${SED_INPLACE} 's/ '${PYINIT_ENTRY}'/ -Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result -Wl,-exported_symbol,_query_stable_v2 -Wl,-exported_symbol,_free_result_v2/g' CMakeFiles/libchdb.rsp
fi

LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/libchdb.rsp/g')
Expand All @@ -220,12 +232,16 @@ ninja -d keeprsp || true
cd ${BUILD_DIR}
ninja -d keeprsp -v > build.log || true

if [ ! -f CMakeFiles/clickhouse.rsp ]; then
echo "CMakeFiles/clickhouse.rsp not found"
exit 1
fi
USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true)

cp -a CMakeFiles/clickhouse.rsp CMakeFiles/pychdb.rsp
if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
if [ -f CMakeFiles/clickhouse.rsp ]; then
cp -a CMakeFiles/clickhouse.rsp CMakeFiles/pychdb.rsp
else
echo "CMakeFiles/clickhouse.rsp not found"
exit 1
fi
fi

# extract the command to generate CHDB_PY_MODULE
PYCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \
Expand All @@ -237,19 +253,21 @@ PYCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \
)


# inplace modify the CMakeFiles/pychdb.rsp
${SED_INPLACE} 's/-o programs\/clickhouse/-fPIC -Wl,-undefined,dynamic_lookup -shared ${PYINIT_ENTRY} -o ${CHDB_PY_MODULE}/' CMakeFiles/pychdb.rsp
${SED_INPLACE} 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' CMakeFiles/pychdb.rsp
${SED_INPLACE} 's/ -Xlinker --no-undefined//g' CMakeFiles/pychdb.rsp
# # inplace modify the CMakeFiles/pychdb.rsp
# ${SED_INPLACE} 's/-o programs\/clickhouse/-fPIC -Wl,-undefined,dynamic_lookup -shared ${PYINIT_ENTRY} -o ${CHDB_PY_MODULE}/' CMakeFiles/pychdb.rsp
# ${SED_INPLACE} 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' CMakeFiles/pychdb.rsp
# ${SED_INPLACE} 's/ -Xlinker --no-undefined//g' CMakeFiles/pychdb.rsp


if [ "$(uname)" == "Linux" ]; then
# remove src/CMakeFiles/clickhouse_malloc.dir/Common/stubFree.c.o
PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's/ src\/CMakeFiles\/clickhouse_malloc.dir\/Common\/stubFree.c.o//g')
${SED_INPLACE} 's/ src\/CMakeFiles\/clickhouse_malloc.dir\/Common\/stubFree.c.o//g' CMakeFiles/pychdb.rsp
# put -Wl,-wrap,malloc ... after -DUSE_JEMALLOC=1
PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's/ -DUSE_JEMALLOC=1/ -DUSE_JEMALLOC=1 -Wl,-wrap,malloc -Wl,-wrap,valloc -Wl,-wrap,pvalloc -Wl,-wrap,calloc -Wl,-wrap,realloc -Wl,-wrap,memalign -Wl,-wrap,aligned_alloc -Wl,-wrap,posix_memalign -Wl,-wrap,free/g')
${SED_INPLACE} 's/ -DUSE_JEMALLOC=1/ -DUSE_JEMALLOC=1 -Wl,-wrap,malloc -Wl,-wrap,valloc -Wl,-wrap,pvalloc -Wl,-wrap,calloc -Wl,-wrap,realloc -Wl,-wrap,memalign -Wl,-wrap,aligned_alloc -Wl,-wrap,posix_memalign -Wl,-wrap,free/g' CMakeFiles/pychdb.rsp
if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
${SED_INPLACE} 's/ src\/CMakeFiles\/clickhouse_malloc.dir\/Common\/stubFree.c.o//g' CMakeFiles/pychdb.rsp
${SED_INPLACE} 's/ -DUSE_JEMALLOC=1/ -DUSE_JEMALLOC=1 -Wl,-wrap,malloc -Wl,-wrap,valloc -Wl,-wrap,pvalloc -Wl,-wrap,calloc -Wl,-wrap,realloc -Wl,-wrap,memalign -Wl,-wrap,aligned_alloc -Wl,-wrap,posix_memalign -Wl,-wrap,free/g' CMakeFiles/pychdb.rsp
fi
fi

# save the command to a file for debug
Expand All @@ -264,6 +282,16 @@ LIBCHDB_DIR=${BUILD_DIR}/

PYCHDB=${LIBCHDB_DIR}/${CHDB_PY_MODULE}
LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO}

if [ ${build_type} == "Debug" ]; then
echo -e "\nDebug build, skip strip"
else
echo -e "\nStrip the binary:"
${STRIP} --strip-debug --remove-section=.comment --remove-section=.note ${PYCHDB}
${STRIP} --strip-debug --remove-section=.comment --remove-section=.note ${LIBCHDB}
fi
echo -e "\nStripe the binary:"

echo -e "\nPYCHDB: ${PYCHDB}"
ls -lh ${PYCHDB}
echo -e "\nLIBCHDB: ${LIBCHDB}"
Expand Down
24 changes: 24 additions & 0 deletions chdb/vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,30 @@ pushd ${PROJ_DIR}
CHDB_VERSION=$(python3 -c 'import setup; print(setup.get_latest_git_tag())')
popd

# try to use largest llvm-strip version
# if none of them are found, use llvm-strip or strip
if [ -z "$STRIP" ]; then
STRIP=$(ls -1 /usr/bin/llvm-strip* | sort -V | tail -n 1)
fi
if [ -z "$STRIP" ]; then
STRIP=$(ls -1 /usr/local/bin/llvm-strip* | sort -V | tail -n 1)
fi
# on macOS
if [ -z "$STRIP" ]; then
STRIP=$(ls -1 /usr/local/Cellar/llvm/*/bin/llvm-strip* | sort -V | tail -n 1)
fi
if [ -z "$STRIP" ]; then
STRIP=$(ls -1 /usr/local/opt/llvm/bin/llvm-strip* | sort -V | tail -n 1)
fi

# if none of them are found, use llvm-strip or strip
if [ -z "$STRIP" ]; then
STRIP=$(which llvm-strip)
fi
if [ -z "$STRIP" ]; then
STRIP=$(which strip)
fi

# check current os type, and make ldd command
if [ "$(uname)" == "Darwin" ]; then
LDD="otool -L"
Expand Down
1 change: 1 addition & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ add_contrib (wyhash-cmake wyhash)
add_contrib (cityhash102)
add_contrib (libfarmhash)
add_contrib (icu-cmake icu)
add_contrib (utf8proc-cmake utf8proc)
add_contrib (h3-cmake h3)
add_contrib (mariadb-connector-c-cmake mariadb-connector-c)
add_contrib (libfiu-cmake libfiu)
Expand Down
2 changes: 1 addition & 1 deletion contrib/cassandra-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES})
option(ENABLE_CASSANDRA "Enable Cassandra" 0)

if (NOT ENABLE_CASSANDRA)
message(STATUS "Not using cassandra")
Expand Down
1 change: 1 addition & 0 deletions contrib/utf8proc
Submodule utf8proc added at dce381
17 changes: 17 additions & 0 deletions contrib/utf8proc-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
option(ENABLE_UTF8PROC "Enable UTF8PROC" 1)
if (NOT ENABLE_UTF8PROC)
message(STATUS "Not using utf8proc")
return()
endif()

set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/utf8proc/")
set(UTF8PROC_INCLUDE_DIR "${LIBRARY_DIR}" CACHE STRING "Path to utf8proc")
message(STATUS "Using utf8proc from ${LIBRARY_DIR}")

set(SRCS
"${LIBRARY_DIR}/utf8proc.c"
)

add_library(utf8proc ${SRCS})
add_library(ch_contrib::utf8proc ALIAS utf8proc)
target_include_directories(utf8proc PRIVATE "${LIBRARY_DIR}")
17 changes: 3 additions & 14 deletions programs/local/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,6 @@ if (USE_PYTHON)
endif()
endif()

# add_library(clickhouse-local-lib SHARED ${CLICKHOUSE_LOCAL_SOURCES})

# target_link_libraries(clickhouse-local-lib
# PRIVATE
# boost::program_options
# clickhouse_aggregate_functions
# clickhouse_common_config
# clickhouse_common_io
# clickhouse_functions
# clickhouse_parsers
# clickhouse_storages_system
# clickhouse_table_functions
# )

set (CLICKHOUSE_LOCAL_LINK
PRIVATE
boost::program_options
Expand All @@ -83,6 +69,9 @@ endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries(clickhouse-local-lib PRIVATE ch_contrib::azure_sdk)
endif()
if (TARGET ch_contrib::utf8proc)
target_link_libraries(clickhouse-local-lib PRIVATE ch_contrib::utf8proc)
endif()

# Always use internal readpassphrase
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)
6 changes: 6 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,12 @@ if (USE_ORC)
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${PROJECT_BINARY_DIR}/contrib/orc/c++/include")
endif ()

if (USE_UTF8PROC)
dbms_target_link_libraries(PUBLIC ch_contrib::utf8proc)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${UTF8PROC_INCLUDE_DIR})
message(STATUS "UTF8PROC_INCLUDE_DIR: ${UTF8PROC_INCLUDE_DIR}")
endif()

if (TARGET ch_contrib::rocksdb)
dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb)
endif()
Expand Down
Loading