使用group_concat(distinct split_part()),be报错,全部挂掉,版本2.1.3

Viewed 85

执行了这样一个sql:
select
group_concat(distinct split_part(intelligence_content, ';', 1), ',') as high_risk_port
from
intelligence_application_model im
left join intelligence_content_info ic on
im.library_id = ic.library_id
where
model_status = 1
and im.is_delete = 1
and model_name = '高危端口通信'
and ic.intelligence_status = 1
and ic.is_delete = 1
be报错如下:
image.png

be.out内容如下:
start time: 2024年 09月 20日 星期五 10:36:18 CST
INFO: java_cmd /usr/java/jdk1.8.0_171/bin/java
INFO: jdk_version 8
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/java_extensions/preload-extensions/preload-extensions-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/java_extensions/java-udf/java-udf-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/hadoop_hdfs/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Reload4jLoggerFactory]
[WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise never, Doris not recommend turning on THP, which may cause the BE process to use more memory and cannot be freed in time. Turn off THP: echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
*** Query id: 4e9e01764954e5e-a39a0b31905c3e0e ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1727061423 (unix time) try "date -d @1727061423" if you are using GNU date ***
*** Current BE git commitID: 2dc65ce356 ***
*** SIGSEGV unknown detail explain (@0x0) received by PID 45533 (TID 48199 OR 0x7f3e04d2b700) from PID 0; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_release/doris/be/src/common/signal_handler.h:421
1# os::Linux::chained_handler(int, siginfo*, void*) in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
2# JVM_handle_linux_signal in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
3# signalHandler(int, siginfo*, void*) in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
4# __open_catalog in /lib64/libc.so.6
5# doris::vectorized::AggregateFunctionNullVariadicInline<doris::vectorized::AggregateFunctionGroupConcat, true>::add(char*, doris::vectorized::IColumn const**, long, doris::vectorized::Arena*) const at /home/zcp/repo_center/doris_release/doris/be/src/vec/aggregate_functions/aggregate_function_null.h:325
6# doris::vectorized::IAggregateFunctionHelper<doris::vectorized::AggregateFunctionNullVariadicInline<doris::vectorized::AggregateFunctionGroupConcat, true> >::add_batch_single_place(unsigned long, char*, doris::vectorized::IColumn const**, doris::vectorized::Arena*) const in /data01/doris/be/lib/doris_be
7# doris::vectorized::AggFnEvaluator::execute_single_add(doris::vectorized::Block*, char*, doris::vectorized::Arena*) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exprs/vectorized_agg_fn.cpp:235
8# doris::pipeline::AggSinkLocalState::_execute_without_key(doris::vectorized::Block*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:179
9# doris::pipeline::AggSinkLocalState::Executor<true, false>::execute(doris::pipeline::AggSinkLocalState*, doris::vectorized::Block*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.h:75
10# doris::pipeline::AggSinkOperatorX::sink(doris::RuntimeState*, doris::vectorized::Block*, bool) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:743
11# doris::pipeline::PipelineXTask::execute(bool*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/pipeline_x/pipeline_x_task.cpp:315
12# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/task_scheduler.cpp:343
13# doris::ThreadPool::dispatch_thread() in /data01/doris/be/lib/doris_be
14# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_release/doris/be/src/util/thread.cpp:499
15# __nptl_deallocate_tsd in /lib64/libpthread.so.0
16# __futimes in /lib64/libc.so.6

start time: 2024年 09月 23日 星期一 15:58:20 CST
INFO: java_cmd /usr/java/jdk1.8.0_171/bin/java
INFO: jdk_version 8
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/java_extensions/preload-extensions/preload-extensions-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/java_extensions/java-udf/java-udf-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/data01/doris/be/lib/hadoop_hdfs/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Reload4jLoggerFactory]
[WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise never, Doris not recommend turning on THP, which may cause the BE process to use more memory and cannot be freed in time. Turn off THP: echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
*** Query id: feb7936640b84c01-98e1bf53600a9836 ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1727078406 (unix time) try "date -d @1727078406" if you are using GNU date ***
*** Current BE git commitID: 2dc65ce356 ***
*** SIGSEGV invalid permissions for mapped object (@0x7fb71c293000) received by PID 33153 (TID 35316 OR 0x7fb83c23d700) from PID 472461312; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_release/doris/be/src/common/signal_handler.h:421
1# os::Linux::chained_handler(int, siginfo*, void*) in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
2# JVM_handle_linux_signal in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
3# signalHandler(int, siginfo*, void*) in /usr/java/jdk1.8.0_171/jre/lib/amd64/server/libjvm.so
4# 0x00007FBC30745400 in /lib64/libc.so.6
5# memcpy at /home/zcp/repo_center/doris_release/doris/be/src/glibc-compatibility/memcpy/memcpy_x86_64.cpp:219
6# std::__cxx11::basic_string<char, std::char_traits, std::allocator >::_M_mutate(unsigned long, unsigned long, char const*, unsigned long) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:312
7# std::__cxx11::basic_string<char, std::char_traits, std::allocator >::_M_replace(unsigned long, unsigned long, char const*, unsigned long) in /data01/doris/be/lib/doris_be
8# doris::vectorized::AggregateFunctionGroupConcatData::add(doris::StringRef, doris::StringRef) at /home/zcp/repo_center/doris_release/doris/be/src/vec/aggregate_functions/aggregate_function_group_concat.h:57
9# doris::vectorized::IAggregateFunctionHelper<doris::vectorized::AggregateFunctionNullVariadicInline<doris::vectorized::AggregateFunctionGroupConcat, true> >::add_batch_single_place(unsigned long, char*, doris::vectorized::IColumn const**, doris::vectorized::Arena*) const in /data01/doris/be/lib/doris_be
10# doris::vectorized::AggFnEvaluator::execute_single_add(doris::vectorized::Block*, char*, doris::vectorized::Arena*) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exprs/vectorized_agg_fn.cpp:235
11# doris::pipeline::AggSinkLocalState::_execute_without_key(doris::vectorized::Block*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:179
12# doris::pipeline::AggSinkLocalState::Executor<true, false>::execute(doris::pipeline::AggSinkLocalState*, doris::vectorized::Block*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.h:75
13# doris::pipeline::AggSinkOperatorX::sink(doris::RuntimeState*, doris::vectorized::Block*, bool) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:743
14# doris::pipeline::PipelineXTask::execute(bool*) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/pipeline_x/pipeline_x_task.cpp:315
15# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /home/zcp/repo_center/doris_release/doris/be/src/pipeline/task_scheduler.cpp:343
16# doris::ThreadPool::dispatch_thread() in /data01/doris/be/lib/doris_be
17# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_release/doris/be/src/util/thread.cpp:499
18# start_thread in /lib64/libpthread.so.0
19# clone in /lib64/libc.so.6

3 Answers

可能是旧版本GROUP_CONCAT造成be core,建议升级至>=2.1.6稳定版本:https://github.com/apache/doris/pull/34816

麻烦补齐下表结构和be.out的文字版,这边看看的

CREATE TABLE `intelligence_application_model` (

model_id VARCHAR(64) NOT NULL COMMENT '情报模型 ID',
model_name VARCHAR(255) REPLACE_IF_NOT_NULL NULL COMMENT '情报模型名称',
clue_type_id INT REPLACE_IF_NOT_NULL NULL COMMENT '线索大类 ID',
clue_sub_type_id INT REPLACE_IF_NOT_NULL NULL COMMENT '线索小类 ID',
clue_type_name VARCHAR(255) REPLACE_IF_NOT_NULL NULL COMMENT '线索大类名称',
clue_sub_type_name VARCHAR(255) REPLACE_IF_NOT_NULL NULL COMMENT '线索小类名称',
risk_level INT REPLACE_IF_NOT_NULL NULL COMMENT '危害等级:1,很低;2,低;3,中;4,高;5,很高',
model_status INT REPLACE_IF_NOT_NULL NULL COMMENT '模型状态:0,关闭;1,开启',
model_description TEXT REPLACE_IF_NOT_NULL NULL COMMENT '模型描述',
library_id VARCHAR(64) REPLACE_IF_NOT_NULL NULL COMMENT '采用情报库 ID',
intelligence_name TEXT REPLACE_IF_NOT_NULL NULL COMMENT '采用情报名称',
analysis_type INT REPLACE_IF_NOT_NULL NULL COMMENT '数据分析方式:0,实时;1,批量',
pattern_field TEXT REPLACE_IF_NOT_NULL NULL COMMENT '匹配字段:多个字段之间用英文逗号隔开',
execute_type INT REPLACE_IF_NOT_NULL NULL COMMENT '执行类型:0,一次性;1,间隔',
execute_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '执行时间:只在【一次性】时使用',
data_range VARCHAR(255) REPLACE_IF_NOT_NULL NULL COMMENT '数据范围:只在【一次性】时使用',
lose_efficacy_time VARCHAR(255) REPLACE_IF_NOT_NULL NULL COMMENT '有效期限:只在【间隔】时使用',
interval_size INT REPLACE_IF_NOT_NULL NULL COMMENT '间隔时间大小:只在【间隔】时使用',
interval_unit INT REPLACE_IF_NOT_NULL NULL COMMENT '间隔时间单位 只在【间隔】时使用:0,秒;1,分钟;2:小时;3,天;4,月',
is_delete INT REPLACE_IF_NOT_NULL NULL COMMENT '是否删除:0,已删除;1,存在',
update_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '更新时间',
is_alarm INT REPLACE_IF_NOT_NULL NULL COMMENT '是否告警:0,不告警;1,告警',
is_build_in INT REPLACE_IF_NOT_NULL NULL DEFAULT "0" COMMENT '是否为内置模型:0,非内置;1,内置模型',
pattern_field_logic INT REPLACE_IF_NOT_NULL NULL DEFAULT "0" COMMENT '匹配字段逻辑:0,与;1,或',
create_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '创建时间'
) ENGINE=OLAP
AGGREGATE KEY(model_id)
COMMENT '情报应用模型表'
DISTRIBUTED BY HASH(model_id) BUCKETS 6
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);

CREATE TABLE intelligence_content_info (
intelligence_id VARCHAR(64) NOT NULL COMMENT '情报内容 ID',
library_id VARCHAR(64) REPLACE_IF_NOT_NULL NULL COMMENT '情报库 ID',
intelligence_name TEXT REPLACE_IF_NOT_NULL NULL COMMENT '采用情报名称',
intelligence_content TEXT REPLACE_IF_NOT_NULL NULL COMMENT '情报内容:多个内容之间用英文逗号分割',
lose_efficacy_type INT REPLACE_IF_NOT_NULL NULL COMMENT '失效类型:0,长期有效;1,自定义',
lose_efficacy_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '失效时间:只有【自定义】时有效',
lose_efficacy_size INT REPLACE_IF_NOT_NULL NULL COMMENT '失效时长:只有【自定义】时有效',
lose_efficacy_unit INT REPLACE_IF_NOT_NULL NULL COMMENT '失效单位:只有自【定义】时有效',
intelligence_status INT REPLACE_IF_NOT_NULL NULL COMMENT '情报状态:0,关闭;1,开启',
intelligence_description TEXT REPLACE_IF_NOT_NULL NULL COMMENT '情报库描述',
is_delete INT REPLACE_IF_NOT_NULL NULL COMMENT '是否删除:0,已删除;1,存在',
update_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '更新时间',
is_build_in INT REPLACE_IF_NOT_NULL NULL COMMENT '是否为内置模型0:非内置1:内置模型',
dataSource TEXT REPLACE_IF_NOT_NULL NULL COMMENT '数据来源',
reliability INT REPLACE_IF_NOT_NULL NULL COMMENT '可信度,1~10整数',
create_time DATETIME REPLACE_IF_NOT_NULL NULL COMMENT '创建时间'
) ENGINE=OLAP
AGGREGATE KEY(intelligence_id)
COMMENT '情报内容信息表'
DISTRIBUTED BY HASH(intelligence_id) BUCKETS 6
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);