Install clickhouse and configure mysql sync
Verify support
https://bbs.huaweicloud.com/blogs/369216
grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
Install script
- 这是2024-08-20根据官网安装命令
curl https://clickhouse.com/ | sh
使用如下shell命令下载到本地后,cat得到的脚本内容
The following code was cat from shell command:curl https://clickhouse.com/ > install_clickhouse.sh && cat install_clickhouse.sh
#!/bin/sh -e
OS=$(uname -s)
ARCH=$(uname -m)
DIR=
if [ "${OS}" = "Linux" ]
then
if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
then
# Require at least x86-64 + SSE4.2 (introduced in 2006). On older hardware fall back to plain x86-64 (introduced in 1999) which
# guarantees at least SSE2. The caveat is that plain x86-64 builds are much less tested than SSE 4.2 builds.
HAS_SSE42=$(grep sse4_2 /proc/cpuinfo)
if [ "${HAS_SSE42}" ]
then
DIR="amd64"
else
DIR="amd64compat"
fi
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
# Dispatch between standard and compatibility builds, see cmake/cpu_features.cmake for details. Unfortunately, (1) the ARM ISA level
# cannot be read directly, we need to guess from the "features" in /proc/cpuinfo, and (2) the flags in /proc/cpuinfo are named
# differently than the flags passed to the compiler in cpu_features.cmake.
HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
if [ "${HAS_ARMV82}" ]
then
DIR="aarch64"
else
DIR="aarch64v80compat"
fi
elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ]
then
DIR="powerpc64le"
elif [ "${ARCH}" = "riscv64" ]
then
DIR="riscv64"
elif [ "${ARCH}" = "s390x" ]
then
DIR="s390x"
fi
elif [ "${OS}" = "FreeBSD" ]
then
if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
then
DIR="freebsd"
fi
elif [ "${OS}" = "Darwin" ]
then
if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
then
DIR="macos"
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
DIR="macos-aarch64"
fi
fi
if [ -z "${DIR}" ]
then
echo "Operating system '${OS}' / architecture '${ARCH}' is unsupported."
exit 1
fi
clickhouse_download_filename_prefix="clickhouse"
clickhouse="$clickhouse_download_filename_prefix"
if [ -f "$clickhouse" ]
then
read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer
if [ "$answer" = "y" -o "$answer" = "Y" ]
then
rm -f "$clickhouse"
else
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
fi
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL} into ${clickhouse}"
echo
curl "${URL}" -o "${clickhouse}" && chmod a+x "${clickhouse}" || exit 1
echo
echo "Successfully downloaded the ClickHouse binary, you can run it as:
./${clickhouse}"
if [ "${OS}" = "Linux" ]
then
echo
echo "You can also install it:
sudo ./${clickhouse} install"
fi
Configure MySQL readablity
- Login to MySQL:
mysql -uroot -p123456
CREATE USER 'mysql_clickhouse'@'%' IDENTIFIED BY '123456';
GRANT ALL PRIVILEGES ON *.* TO 'mysql_clickhouse'@'%';
- Define a Table in ClickHouse:
clickhouse-client
CREATE TABLE mysql_table1 (
id UInt64,
column1 String
)
ENGINE = MySQL('mysql-host.domain.com','db1','table1','mysql_clickhouse','123456')
同步整个数据库
- 配置MySQL的/etc/my.cnf添加如下内容:
default_authentication_plugin = mysql_native_password
gtid_mode = ON
enforce_gtid_consistency = ON
- clickhouse设置:
clickhouse-client
set allow_experimental_database_materialized_mysql = 1;
CREATE DATABASE db1_mysql
ENGINE = MaterializedMySQL(
'mysql-host.domain.com:3306',
'db1',
'mysql_clickhouse',
'123456'
);
配置其他机器访问
- vim /etc/clickhouse-server/config.xml
<listen_host>::</listen_host>
把这行从注释掉,改为非注释掉
然后重启服务:sudo systemctl restart clickhouse-server
使用Python获取数据
- 安装:
pip install clickhouse-connect
import clickhouse_connect
client = clickhouse_connect.get_client(host='localhost')
client.command('CREATE TABLE new_table (key UInt32, value String, metric Float64) ENGINE MergeTree ORDER BY key')
row1 = [1000, 'String Value 1000', 5.233]
row2 = [2000, 'String Value 2000', -107.04]
data = [row1, row2]
client.insert('new_table', data, column_names=['key', 'value', 'metric'])
result = client.query('SELECT max(key), avg(metric) FROM new_table')
result.result_rows
Out[13]: [(2000, -50.9035)]
result = client.query('select count(*) from new_table')
count: int = result.first_row[0]