配置文件默认会在:/etc/clickhouse-server/下,需要三个配置文件,单节点,集群的全部提供!
config.xml
<clickhouse>
<logger>
<!-- 日志级别设置,建议设置成warning级别,日志文件路径,可自行修改
- none (turns off logging)
- fatal
- critical
- error
- warning
- notice
- information
- debug
- trace
- test (not for production usage)
-->
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1024M</size>
<count>3</count>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<!-- interserver_http_port 用于在ClickHouse服务器之间交换数据的端口。 -->
<interserver_http_port>9009</interserver_http_port>
<!-- interserver_http_host 一般配置本机IP,也可以配置域名,其他服务器可用于访问此服务器的主机名。 -->
<interserver_http_host>10.0.1.1</interserver_http_host>
<listen_host>0.0.0.0</listen_host>
<!-- 最大连接数 -->
<max_connections>4096</max_connections>
<!-- ClickHouse在关闭连接之前等待传入请求的秒数 -->
<keep_alive_timeout>3</keep_alive_timeout>
<!-- 最大并发查询数 -->
<max_concurrent_queries>100</max_concurrent_queries>
<!-- 服务器进程的最大内存使用,会一直为查询保留占用,当查询超过此配置会引发异常。建议配置为0,即ck默认配置 -->
<max_server_memory_usage>0</max_server_memory_usage>
<!-- 全局线程池中的最大线程数,适合大量数据,如果服务器性能较好,可适当调整此配置,有助于提高并发查询 -->
<max_thread_pool_size>10000</max_thread_pool_size>
<!-- 最大内存使用率配置,一般配置0.8或者0.9,太多可能引起CPU打满-->
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<!-- 按照官方解释类似于java进程oom的时候产生的temp文件,具体没关注过,可配置为0,表示禁用 -->
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<!-- 收集随机分配和回收,并将它们写入系统。概率是每次分配/自由的,而不管分配的大小。 -->
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<!-- 未压缩缓存仅对非常短的查询和极少数情况有利,对于日志等大量的查询场景来说建议配置为0-->
<uncompressed_cache_size>0</uncompressed_cache_size>
<!-- 同步数据时使用的缓存大小,这个配置太小的话表readonly会发生比较频繁,单位是B -->
<mark_cache_size>5368709120</mark_cache_size>
<!-- 数据存储路径 -->
<path>/var/lib/clickhouse/</path>
<!-- 多磁盘配置 -->
<storage_configuration>
<disks>
<default>
<!-- 最少需要保留多少磁盘空间 -->
<keep_free_space_bytes>1024</keep_free_space_bytes>
</default>
<disk_1>
<path>/var/lib/clickhouse/data1/</path>
<keep_free_space_bytes>1024</keep_free_space_bytes>
</disk_1>
<!-- 以下配置可以做冷、热配置使用disk_2作为冷盘配置,ck自带的ttl可实现自动将数据转到冷盘上,数据量大的情况下修改ttl失败的概率会很高 -->
<!-- <disk_2>
<path>/var/lib/clickhouse/data2/</path>
<keep_free_space_bytes>1024</keep_free_space_bytes>
</disk_2> -->
<!-- 以下配置可以做冷、热、温等配置使用 -->
<!-- <s3>
<type>s3</type>
<endpoint>http://path/to/endpoint</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
</s3>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
<container_name>container</container_name>
<account_name>account</account_name>
<account_key>pass123</account_key>
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</blob_storage_disk> -->
</disks>
<!-- 存储策略,跟建表有关 -->
<policies>
<william>
<volumes>
<hot>
<disk>disk_1</disk>
</hot>
<!--cold>
<disk>disk_2</disk>
</cold-->
</volumes>
<!-- 移动因子,当热盘空闲还剩多少的时候将数据转移到冷盘 -->
<move_factor>0.2</move_factor>
</william>
</policies>
</storage_configuration>
<!-- 用户级别的配置 -->
<users_config>users.xml</users_config>
<!-- 处理查询的临时数据的路径 -->
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<!-- 默认配置文件 -->
<default_profile>default</default_profile>
<!-- 逗号分隔的自定义设置前缀列表,适用于有特殊建表规范的项目 -->
<custom_settings_prefixes></custom_settings_prefixes>
<!-- 默认数据库 -->
<default_database>default</default_database>
<mlock_executable>true</mlock_executable>
<distributed_ddl>
<!-- ZooKeeper中DDL查询队列的路径 -->
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<!-- 最大可损坏的部件数 -->
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
<!-- 保存建表的schemas -->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<!-- 集群配置,副本配置 -->
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
<!-- 同步表的配置 -->
<macros incl="macros" optional="true" />
<!-- 内置字典的重新加载间隔,以秒为单位 -->
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<!-- 最大会话超时时间,配置为0表示查询的时候不会中断 -->
<max_session_timeout>3600</max_session_timeout>
<!-- 默认会话超时时间 -->
<default_session_timeout>60</default_session_timeout>
<!-- 以下配置是一些日志记录配置 -->
<!-- 暴露ck自身的指标,如果有监控需求可以打开
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<status_info>true</status_info>
</prometheus>
-->
<!-- 执行sql记录,可以理解成ck的审计日志,排查问题很有用 -->
<query_log>
<database>system</database>
<table>query_log</table>
<!-- 配置默认删除时间,否则会占用很大的存储空间,单位有WEEK、DAY
如需要长期保留,也可以到时见后转存到其他磁盘上,eg:event_date + INTERVAL 2 WEEK TO DISK 'bbb' -->
<ttl>event_date + INTERVAL 5 DAY DELETE</ttl>
<partition_by>toYYYYMM(event_date)</partition_by>
<!-- 数据刷新间隔 -->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<!-- 记录trace日志,即调用链路信息,日志量也很大,可参考query_log配置信息
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log> -->
<!-- 查询线程日志
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log> -->
<!-- 查询视图日志。
<query_views_log>
<database>system</database>
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log> -->
<!-- part日志,会记录每一批数据part的合并、删除等
<part_log>
<database>system</database>
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log> -->
<!-- 记录指标日志
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log> -->
<!-- 外部字典的配置。-->
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<!-- 自定义的可执行函数的配置 -->
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
</clickhouse>
metrika.xml
<clickhouse>
<remote_servers>
<!-- 分布式配置示例,即集群节点配置。集群有多个节点时直接拷贝此文件到所有集群中的节点即可 -->
<test_shard_localhost>
<shard>
<!-- 写入数据时的分片权重-->
<!-- <weight>1</weight> -->
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
</test_shard_localhost>
<test_cluster_one_shard_three_replicas_localhost>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.3</host>
<port>9000</port>
</replica>
</shard>
<!--shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.3</host>
<port>9000</port>
</replica>
</shard-->
</test_cluster_one_shard_three_replicas_localhost>
<test_cluster_two_shards_localhost>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_localhost>
<test_cluster_two_shards>
<shard>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards>
<test_cluster_two_shards_internal_replication>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_internal_replication>
<test_shard_localhost_secure>
<shard>
<replica>
<host>localhost</host>
<port>9440</port>
<secure>1</secure>
</replica>
</shard>
</test_shard_localhost_secure>
<test_unavailable_shard>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>1</port>
</replica>
</shard>
</test_unavailable_shard>
</remote_servers>
<!-- 使用集群的时候需要配置zookeeper,用于副本数据同步 -->
<!--
<zookeeper>
<node>
<host>example1</host>
<port>2181</port>
</node>
<node>
<host>example2</host>
<port>2181</port>
</node>
<node>
<host>example3</host>
<port>2181</port>
</node>
</zookeeper>
-->
<!-- 会压缩30-100%的数据,节省磁盘空间 -->
<compression>
<case>
<!-- 最小的part -->
<min_part_size>10000000000</min_part_size>
<!-- 整个表里最小的part -->
<min_part_size_ratio>0.01</min_part_size_ratio>
<!-- 压缩方式. -->
<method>zstd</method>
</case>
</compression>
</clickhouse>