doris3.0.6版本BE进程常驻内存过高问题求助

Viewed 45

image.png
如图BE节点内存分配为30G,be.conf配置如下,元数据和碎片过高导致查询sql经常受限,求解决思路,另外尝试过碎片清理和表合并的脚本,作用不大,脚本一并贴上。

物理内存上限(30 G 硬顶)

mem_limit=32212254720
soft_mem_limit_frac=0.9

===== 1. 查询层 =====

enable_query_memory_overcommit=false
max_query_instances_per_be=40
query_cache_max_size_mb=256

===== 2. 导入层 =====

load_process_max_memory_limit_percent=20
load_process_soft_mem_limit_percent=15

===== 3. Compaction 层 =====

3 GB = 3221225472 字节,3.0.6 必须纯数字

compaction_memory_bytes_limit=3221225472
vertical_compaction_max_row_source_memory_mb=2048
enable_compaction_pause_on_high_memory=true

===== 4. 缓存层 =====

storage_page_cache_limit=10
inverted_index_searcher_cache_limit=5
row_cache_mem_limit=0

===== 5. 读数据方式 =====

enable_mmap_read_buffer=true
disable_storage_page_cache=false

===== 6. 内存追踪 =====

enable_memory_tracker=true
mem_tracker_consume_min_size_bytes=1048576

===== 7. jemalloc 碎片治理 =====

je_dirty_decay_ms=1000
je_muzzy_decay_ms=1000
je_background_thread=true

===== 8. 必缺项 =====

enable_segment_cache=true
segment_cache_capacity=100000

#!/bin/bash
BE_PID=$(pgrep -f doris_be)
LOG=/tmp/ods_compact.log

echo "1. 碎片回收 ..."
gdb -n -q -batch -p $BE_PID
-ex "call (int)malloc_trim(0)"
-ex "detach" -ex "quit"

echo "2. 设置 je 持续回收 ..."
mysql -h127.1 -P9030 -uroot -e "
ADMIN SET FRONTEND CONFIG ('je_dirty_decay_ms' = '100');
ADMIN SET FRONTEND CONFIG ('je_muzzy_decay_ms' = '100');" >>$LOG 2>&1

echo "3. 生成全库 ODS 表清单 ..."
mysql -h127.1 -P9030 -uroot -N -e "
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = 'ods' AND TABLE_TYPE = 'BASE TABLE';" > /tmp/ods_tables.txt

echo "4. 全库合并(FORCE 绕过无分区报错) ..."
while read -r tbl; do
echo "[$(date '+%F %T')] 开始合并 ods.$tbl ..."
mysql -h127.1 -P9030 -uroot -e "ADMIN COMPACT TABLE ods.$tbl PARTITION (*) FORCE;" >>$LOG 2>&1
done < /tmp/ods_tables.txt

echo "5. 加快合并频率 + 清垃圾 ..."
mysql -h127.1 -P9030 -uroot -e "
ADMIN SET FRONTEND CONFIG ('cumulative_compaction_check_interval_seconds' = '10');
ADMIN CLEAN TRASH;" >>$LOG 2>&1

echo "6. 每 30 秒观察下降 ..."
for i in {1..30}; do
echo ===== $(date '+%F %T') =====
curl -s http://172.20.1.23:8040/memz |
awk '/UntrackedMemory|RowsetMetaMemBytes|PhysicalMemory/ {print strftime("%H:%M:%S"), $0}'
sleep 30
done

1 Answers

补充配置信息如下
Memory Properties

System Physical Mem: 62.43 GB
System Page Size: 65536
Mem Limit: 30.00 GB
Soft Mem Limit: 27.00 GB
System Mem Available Low Water Mark: 3.12 GB
System Mem Available Warning Water Mark: 6.24 GB
Cgroup Mem Limit: 8589934592.00 GB
Cgroup Mem Usage: 34.65 GB
Cgroup Mem Refresh State: 1


Memory Option Settings

block_cache_wait_timeout_ms=1000
cache_capacity_reduce_mem_limit_frac=0.6
cache_lock_held_long_tail_threshold_us=30000000
cache_lock_wait_long_tail_threshold_us=30000000
cache_periodic_prune_stale_sweep_sec=60
cache_prune_interval_sec=10
clear_file_cache=0
common_obj_lru_cache_stale_sweep_time_sec=900
compaction_memory_bytes_limit=3221225472
crash_in_alloc_large_memory_bytes=-1
crash_in_memory_tracker_inaccurate=0
data_page_cache_stale_sweep_time_sec=300
delete_bitmap_agg_cache_capacity=104857600
delete_bitmap_agg_cache_stale_sweep_time_sec=1800
delete_bitmap_dynamic_agg_cache_limit=1.0%
disable_memory_gc=0
disable_pk_storage_page_cache=0
disable_segment_cache=0
disable_storage_page_cache=0
disable_storage_row_cache=1
enable_compaction_pause_on_high_memory=1
enable_evict_file_cache_in_advance=1
enable_file_cache=0
enable_file_cache_keep_base_compaction_output=0
enable_file_cache_query_limit=0
enable_flush_file_cache_async=1
enable_hdfs_mem_limiter=1
enable_inverted_index_cache_check_timestamp=1
enable_low_cardinality_cache_code=1
enable_memory_orphan_check=1
enable_mow_get_agg_by_cache=1
enable_query_memory_overcommit=0
enable_read_cache_file_directly=0
enable_reader_dryrun_when_download_file_cache=1
enable_shrink_memory=0
enable_ttl_cache_evict_using_lru=1
enable_use_cgroup_memory_info=1
enable_workload_group_memory_gc=1
enable_write_index_searcher_cache=1
estimated_mem_per_column_reader=512
file_cache_background_gc_interval_ms=100
file_cache_background_monitor_interval_ms=5000
file_cache_background_ttl_gc_batch=1000
file_cache_background_ttl_gc_interval_ms=3000
file_cache_each_block_size=1048576
file_cache_enable_evict_from_other_queue_by_size=1
file_cache_enter_disk_resource_limit_mode_percent=90
file_cache_enter_need_evict_cache_in_advance_percent=88
file_cache_error_log_limit_bytes=209715200
file_cache_evict_in_advance_batch_bytes=31457280
file_cache_evict_in_advance_interval_ms=1000
file_cache_evict_in_advance_recycle_keys_num_threshold=1000
file_cache_exit_disk_resource_limit_mode_percent=88
file_cache_exit_need_evict_cache_in_advance_percent=85
file_cache_max_file_reader_cache_size=1000000
file_cache_path=[{"path":"/data/doris/doris3.0.6/be/file_cache"}]
file_cache_remove_block_qps_limit=1000
group_commit_memory_rows_for_max_filter_ratio=10000
group_commit_queue_mem_limit=67108864
ignore_file_cache_dir_upgrade_failure=0
in_memory_file_size=1048576
index_cache_entry_stay_time_after_lookup_s=1800
index_page_cache_percentage=10
index_page_cache_stale_sweep_time_sec=600
inverted_index_cache_stale_sweep_time_sec=600
inverted_index_query_cache_limit=10%
inverted_index_query_cache_shards=256
inverted_index_searcher_cache_limit=5
jdbc_connection_pool_cache_clear_time_sec=28800
je_dirty_pages_mem_limit_percent=2%
kerberos_ccache_path=
load_process_max_memory_limit_percent=20
load_process_safe_mem_permit_percent=5
load_process_soft_mem_limit_percent=15
local_exchange_buffer_mem_limit=134217728
lookup_connection_cache_capacity=2048
max_client_cache_size_per_host=10
max_external_file_meta_cache_num=1000
max_hdfs_file_handle_cache_num=1000
max_hdfs_file_handle_cache_time_sec=3600
max_master_fe_client_cache_size=10
max_memory_sink_batch_count=20
max_sys_mem_available_low_water_mark_bytes=-1
mem_alloc_fault_probability=0
mem_limit=32212254720
mem_tracker_consume_min_size_bytes=1048576
memory_gc_sleep_time_ms=500
memory_limitation_per_thread_for_schema_change_bytes=2147483648
memory_limitation_per_thread_for_storage_migration_bytes=100000000
memory_maintenance_sleep_time_ms=20
memory_mode=moderate
memtable_flush_running_count_limit=2
memtable_limiter_reserved_memory_bytes=838860800
pk_index_page_cache_stale_sweep_time_sec=600
pk_storage_page_cache_limit=10%
point_query_row_cache_stale_sweep_time_sec=300
query_cache_elasticity_size_mb=128
query_cache_max_partition_count=1024
query_cache_max_size_mb=256
query_cache_size=512
row_cache_mem_limit=0
schema_cache_capacity=1024
schema_cache_sweep_time_sec=100
schema_change_mem_limit_frac=0.6
schema_dict_cache_capacity=4096
segment_cache_capacity=100000
segment_cache_fd_percentage=20
segment_cache_memory_percentage=5
soft_mem_limit_frac=0.9
stacktrace_in_alloc_large_memory_bytes=2147483648
storage_page_cache_limit=10
storage_page_cache_shard_size=256
tablet_cache_capacity=100000
tablet_cache_shards=16
tablet_lookup_cache_stale_sweep_time_sec=30
tablet_schema_cache_capacity=102400
tablet_schema_cache_recycle_interval=3600
variant_use_cloud_schema_dict_cache=1
vertical_compaction_max_row_source_memory_mb=2048

MemoryProfile:
MemoryOverviewSnapshot:
- PhysicalMemory(VmRSS) Current: 28.02 GB (Peak: 34.29 GB)
- VirtualMemory(VmSize) Current: 81.61 GB (Peak: 81.61 GB)
UntrackedMemory:
- Memory Current: 17.15 GB (Peak: 22.39 GB)
TrackedMemory:
- Memory Current: 10.87 GB (Peak: 20.13 GB)
TasksMemory:
- Memory Current: 0 (Peak: 39.73 MB)
- ReservedMemory Current: 0 (Peak: 0)
Details:
- Compaction Current: 0 (Peak: 12.88 KB)
- Load Current: 0 (Peak: 39.72 MB)
- AllMemTablesMemory Current: 0 (Peak: 7.20 MB)
- Other Current: 0 (Peak: 0)
- Query Current: 0 (Peak: 21.54 MB)
- SchemaChange Current: 0 (Peak: 0)
GlobalMemory:
- Memory Current: 95.53 MB (Peak: 95.54 MB)
MetadataMemory:
- Memory Current: 9.81 GB (Peak: 11.07 GB)
CacheMemory:
- Memory Current: 661.44 KB (Peak: 3.73 MB)
JemallocMemory:
- Memory Current: 992.79 MB (Peak: 9.31 GB)
Details:
- Cache Current: 480.20 MB (Peak: 8.90 GB)
- Metadata Current: 512.59 MB (Peak: 512.78 MB)
GlobalMemorySnapshot:
Orphan@global@id=0e4c95f51ec581d2-b8bf3c07367832ba:
- Memory Current: 0 (Peak: 0)
IOBufBlockMemory@global@id=f844304449fb8a6b-acf2b2de2b2b3f8c:
- Memory Current: 95.28 MB (Peak: 95.29 MB)
PointQueryExecutor@global@id=6547e69af1bfdbe4-fb81da6d23253b90:
- Memory Current: 0 (Peak: 0)
BlockCompression@global@id=9a4f9b666151e471-5f29a6a797d8a09b:
- Memory Current: 258.22 KB (Peak: 258.22 KB)
RowIdStorageReader@global@id=4f4319c968a71917-a9255d7d874c05bf:
- Memory Current: 0 (Peak: 0)
SubcolumnsTree@global@id=9f49c641ec6d9858-272430431a0c47a2:
- Memory Current: 0 (Peak: 0)
S3FileBuffer@global@id=674a72e7f72b0872-cf9a433bfaac8ca2:
- Memory Current: 0 (Peak: 0)
MetadataMemorySnapshot:
Tablets(not in SchemaCache, TabletSchemaCache)@metadata@id=2e4a16a886fa8a05-3501309129d01888:
- Memory Current: -3367021585.00 B (Peak: 636.91 MB)
Segments(not in SegmentCache)@metadata@id=62488a843fda50e9-22bbddf4784575ac:
- Memory Current: 0 (Peak: 12.10 MB)
Rowsets@metadata@id=444b6305b58d57b2-b4ff81ad12d464b2:
- Memory Current: 12.94 GB (Peak: 12.95 GB)
ParquetMeta@metadata@id=27435399180553d8-df91e9a02030a4ab:
- Memory Current: 0 (Peak: 0)
SegmentCache[size]@metadata@id=e34cdb847fd15f42-32dff5f98c72a790:
- Memory Current: 0 (Peak: 237.49 MB)
SchemaCache[number]@metadata@id=98481f5a8206fb16-176a9843584e339f:
- Memory Current: 0 (Peak: 1.01 MB)
TabletSchemaCache[number]@metadata@id=674e60b547ac50f0-352505b071c7999a:
- Memory Current: 669.17 KB (Peak: 669.17 KB)
CacheMemorySnapshot:
QueryCache@cache@id=d64e225285b40f1c-ac5ba8ec30936ea0:
- Memory Current: 0 (Peak: 0)
DataPageCache[size]@cache@id=ec4d0bee9df6a2dd-3d71311bd72793b4:
- Memory Current: 0 (Peak: 1.65 MB)
IndexPageCache[size]@cache@id=9f48e5e8a71000c9-2da9627481059894:
- Memory Current: 0 (Peak: 3.06 MB)
PKIndexPageCache[size]@cache@id=5e47a0dc23e5ef73-5ad6fabe18f2a080:
- Memory Current: 0 (Peak: 169.15 KB)
PointQueryRowCache[size]@cache@id=ec4247cc77921ac5-366320697fc27d88:
- Memory Current: 0 (Peak: 0)
CommonObjLRUCache[number]@cache@id=36469425b207433b-71735aea65979eb7:
- Memory Current: 0 (Peak: 0)
PointQueryLookupConnectionCache[number]@cache@id=c14b4ef8ae876444-e5f5dbc91339759d:
- Memory Current: 0 (Peak: 0)
InvertedIndexSearcherCache[size]@cache@id=04419700a1827210-8d727fa7735c1697:
- Memory Current: 0 (Peak: 0)
InvertedIndexQueryCache[size]@cache@id=ae48b8390434b03a-a536b36e5a88d393:
- Memory Current: 0 (Peak: 0)
QueryCache[size]@cache@id=b84793821b7dc332-af0d6ee3f759319e:
- Memory Current: 0 (Peak: 0)
LastSuccessChannelCache[size]@cache@id=c649f36d1cadcc6f-dbdc40d6c59ba995:
- Memory Current: 0 (Peak: 0)
TabletColumnObjectPool[number]@cache@id=9e41a8a33b8fadc9-b256b5081e3a16ba:
- Memory Current: 517.92 KB (Peak: 517.92 KB)
MowTabletVersionCache[number]@cache@id=634a4df0cfd6e424-84e927d8011d34a7:
- Memory Current: 143.52 KB (Peak: 143.52 KB)
CreateTabletRRIdxCache[number]@cache@id=ba4c712516da4750-995a847367f27a93:
- Memory Current: 0 (Peak: 114.00 B)
MowDeleteBitmapAggCache[size]@cache@id=8a46ea82d00c9baf-705ee643649dc3a7:
- Memory Current: 0 (Peak: 505.97 KB)
TopMemoryTasksSnapshot:
SnapshotManager@other_task@id=704452716cf06751-6106b6094a01cbb2:
- Memory Current: 0 (Peak: 0)
StreamLoadPipe@load@id=774d53decde8353c-265027b6c427e487:
- Memory Current: 0 (Peak: 0)
SegCompaction@compaction@id=8945c6e8ac1af51e-3854220227cabfa5:
- Memory Current: 0 (Peak: 0)
AllTasksMemorySnapshot:
QueryTasks:
LoadTasks:
StreamLoadPipe@load@id=774d53decde8353c-265027b6c427e487:
- Memory Current: 0 (Peak: 0)
CompactionTasks:
SegCompaction@compaction@id=8945c6e8ac1af51e-3854220227cabfa5:
- Memory Current: 0 (Peak: 0)
SchemaChangeTasks:
OtherTasks:
SnapshotManager@other_task@id=704452716cf06751-6106b6094a01cbb2:
- Memory Current: 0 (Peak: 0)
ObjectHeapDump:

  • BitmapIndexReaderMemBytes: 0
  • BitmapIndexReaderNum: 0
  • BloomFilterIndexReaderMemBytes: 0
  • BloomFilterIndexReaderNum: 0
  • ColumnReaderMemBytes: 0
  • ColumnReaderNum: 0
  • IndexPageReaderMemBytes: 0
  • IndexPageReaderNum: 0
  • IndexedColumnReaderMemBytes: 0
  • IndexedColumnReaderNum: 0
  • InvertedIndexReaderMemBytes: 0
  • InvertedIndexReaderNum: 0
  • OrdinalIndexReaderMemBytes: 0
  • OrdinalIndexReaderNum: 0
  • RowsetMemBytes: 3.54 GB
  • RowsetMetaMemBytes: 9.40 GB
  • RowsetMetaNum: 16.39M
  • RowsetNum: 16.39M
  • SegmentMemBytes: 0
  • SegmentNum: 0
  • TabletColumnMemBytes: 19.52 MB
  • TabletColumnNum: 71.09K
  • TabletIndexMemBytes: 336.00 B
  • TabletIndexNum: 2
  • TabletMetaMemBytes: 462.17 MB
  • TabletMetaNum: 1.19M
  • TabletSchemaMemBytes: -3871496803.00 B
  • TabletSchemaNum: 2.49K
  • ZoneMapIndexReaderMemBytes: 0
  • ZoneMapIndexReaderNum: 0