doris 2.1.7, streamload 导入数据,数据有丢失

Viewed 87

通过streamload 导入数据,任务显示导入正常,但是实际查看数据,发现数据少了,日志里说导入149条,实际查看只有51条数据,表的导入日志,参数,配置等看不错什么问题,但是新建一个一样配置的表,导入正常,求解!(急急)

I20250225 15:41:12.642174 73811 stream_load_executor.cpp:72] begin to execute stream load. label=sync_dm_gk_dws_ads_inv_wh_org_invt_turn_days_94667313
0804346881_2_1740469267558, txn_id=63148630, query_id=14412688282d8f5e-16a85dbca664ada5
I20250225 15:41:12.642206 73811 fragment_mgr.cpp:778] query_id: 14412688282d8f5e-16a85dbca664ada5, coord_addr: TNetworkAddress(hostname=gykg-401-sz01p
-ecs-prod-doris-fe02, port=9020), total fragment num on current host: 0, fe process uuid: 0, query type: LOAD, report audit fe:TNetworkAddress(hostnam
e=gykg-401-sz01p-ecs-prod-doris-fe02, port=9020)
I20250225 15:41:12.642385 73811 fragment_mgr.cpp:819] Query/load id: 14412688282d8f5e-16a85dbca664ada5, use workload group: TG[id = 1, name = normal,
cpu_share = 1024, memory_limit = 16.94 GB, enable_memory_overcommit = true, version = 0, cpu_hard_limit = -1, scan_thread_num = 48, max_remote_scan_th
read_num = 512, min_remote_scan_thread_num = 8, spill_low_watermark=50, spill_high_watermark=80, is_shutdown=false, query_num=7, read_bytes_per_second
=-1, remote_read_bytes_per_second=-1], is pipeline: 1
I20250225 15:41:12.642436 73811 fragment_mgr.cpp:830] Register query/load memory tracker, query/load id: 14412688282d8f5e-16a85dbca664ada5 limit: 0
I20250225 15:41:12.642458 73811 pipeline_fragment_context.cpp:253] Preparing instance 14412688282d8f5e-16a85dbca664ada5|14412688282d8f5e-16a85dbca664a
da6, backend_num 0
I20250225 15:41:12.643635 73811 stream_load.cpp:214] finished to handle HTTP header, id=14412688282d8f5e-16a85dbca664ada5, job_id=-1, txn_id=63148630,
label=sync_dm_gk_dws_ads_inv_wh_org_invt_turn_days_946673130804346881_2_1740469267558, elapse(s)=0
I20250225 15:41:12.645391 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645414 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645427 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645437 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645445 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645453 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645462 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645475 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.645495 835538 vtablet_writer.cpp:127] init new node for instance 0, incremantal:0
I20250225 15:41:12.647866 73397 tablets_channel.cpp:136] open tablets channel (load_id=14412688282d8f5e-16a85dbca664ada5, index_id=33936686), tablets
num: 118 timeout(s): 259200
I20250225 15:41:12.648041 73397 tablets_channel.cpp:165] txn 63148630: TabletsChannel of index 33936686 init senders 1 with incremental off
I20250225 15:41:12.658761 73942 fragment_mgr.cpp:736] Query 280dce0bb1ca4405-ad887648115aa398 finished
I20250225 15:41:12.665510 73942 query_context.cpp:156] Query 280dce0bb1ca4405-ad887648115aa398 deconstructed, , deregister query/load memory tracker,
queryId=280dce0bb1ca4405-ad887648115aa398, Limit=2.00 GB, CurrUsed=4.00 KB, PeakUsed=2.50 MB
I20250225 15:41:12.665575 73942 query_context.cpp:188] Query 280dce0bb1ca4405-ad887648115aa398 deconstructed, , deregister query/load memory tracker,
queryId=280dce0bb1ca4405-ad887648115aa398, Limit=2.00 GB, CurrUsed=4.00 KB, PeakUsed=2.50 MB
I20250225 15:41:12.670948 73391 fragment_mgr.cpp:778] query_id: 94eec104f5fe4d4a-acc9195788a48281, coord_addr: TNetworkAddress(hostname=gykg-401-sz01p
-ecs-prod-doris-fe01, port=9020), total fragment num on current host: 24, fe process uuid: 1739783405835, query type: SELECT, report audit fe:TNetwork
Address(hostname=gykg-401-sz01p-ecs-prod-doris-fe01, port=9020)
I20250225 15:41:12.671242 73391 fragment_mgr.cpp:819] Query/load id: 94eec104f5fe4d4a-acc9195788a48281, use workload group: TG[id = 1, name = normal,
cpu_share = 1024, memory_limit = 16.94 GB, enable_memory_overcommit = true, version = 0, cpu_hard_limit = -1, scan_thread_num = 48, max_remote_scan_th
read_num = 512, min_remote_scan_thread_num = 8, spill_low_watermark=50, spill_high_watermark=80, is_shutdown=false, query_num=7, read_bytes_per_second
=-1, remote_read_bytes_per_second=-1], is pipeline: 1
I20250225 15:41:12.671276 73391 fragment_mgr.cpp:830] Register query/load memory tracker, query/load id: 94eec104f5fe4d4a-acc9195788a48281 limit: 0
I20250225 15:41:12.671295 73391 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=94eec104f5fe4d4a-acc9195788a48281|frag
ment_id=8|pthread_id=139790169786112
I20250225 15:41:12.674357 73391 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=94eec104f5fe4d4a-acc9195788a48281|frag
ment_id=7|pthread_id=139790169786112
I20250225 15:41:12.677444 73391 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=94eec104f5fe4d4a-acc9195788a48281|frag
ment_id=6|pthread_id=139790169786112
I20250225 15:41:12.679598 73364 fragment_mgr.cpp:778] query_id: ac235b3576074f26-98d68b3bccb89e5e, coord_addr: TNetworkAddress(hostname=gykg-401-sz01p
-ecs-prod-doris-fe02, port=9020), total fragment num on current host: 25, fe process uuid: 1739784483160, query type: SELECT, report audit fe:TNetwork
Address(hostname=gykg-401-sz01p-ecs-prod-doris-fe02, port=9020)
I20250225 15:41:12.679697 73364 fragment_mgr.cpp:819] Query/load id: ac235b3576074f26-98d68b3bccb89e5e, use workload group: TG[id = 1, name = normal,
cpu_share = 1024, memory_limit = 16.94 GB, enable_memory_overcommit = true, version = 0, cpu_hard_limit = -1, scan_thread_num = 48, max_remote_scan_th
read_num = 512, min_remote_scan_thread_num = 8, spill_low_watermark=50, spill_high_watermark=80, is_shutdown=false, query_num=8, read_bytes_per_second
=-1, remote_read_bytes_per_second=-1], is pipeline: 1
I20250225 15:41:12.679718 73364 fragment_mgr.cpp:830] Register query/load memory tracker, query/load id: ac235b3576074f26-98d68b3bccb89e5e limit: 0
I20250225 15:41:12.679733 73364 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=ac235b3576074f26-98d68b3bccb89e5e|frag
ment_id=3|pthread_id=139790396389120
I20250225 15:41:12.679946 73364 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=ac235b3576074f26-98d68b3bccb89e5e|frag
ment_id=2|pthread_id=139790396389120
I20250225 15:41:12.681603 73364 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=ac235b3576074f26-98d68b3bccb89e5e|frag
ment_id=1|pthread_id=139790396389120
I20250225 15:41:12.685524 73364 pipeline_x_fragment_context.cpp:207] PipelineXFragmentContext::prepare|query_id=ac235b3576074f26-98d68b3bccb89e5e|frag
ment_id=0|pthread_id=139790396389120
I20250225 15:41:12.686415 73941 fragment_mgr.cpp:736] Query 4cf53189dde5494a-ae1f19a549e3098e finished
I20250225 15:41:12.687808 73941 query_context.cpp:156] Query 4cf53189dde5494a-ae1f19a549e3098e deconstructed, , deregister query/load memory tracker,
queryId=4cf53189dde5494a-ae1f19a549e3098e, Limit=2.00 GB, CurrUsed=310.00 KB, PeakUsed=1.48 MB
I20250225 15:41:12.687880 73941 query_context.cpp:188] Query 4cf53189dde5494a-ae1f19a549e3098e deconstructed, , deregister query/load memory tracker,
queryId=4cf53189dde5494a-ae1f19a549e3098e, Limit=2.00 GB, CurrUsed=310.00 KB, PeakUsed=1.48 MB
I20250225 15:41:12.711304 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-12334], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630, nod
e=gykg-401-sz01p-ecs-prod-sjzt-doris-be01:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711342 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-58047094], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be07:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711350 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-51557863], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be04:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711359 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-12335], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630, nod
e=gykg-401-sz01p-ecs-prod-sjzt-doris-be03:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711364 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-51558824], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be05:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711372 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-58049358], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be08:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711380 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-51558827], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be06:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711386 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-58051573], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630,
node=gykg-gksk-sz02p-ecs-pdt-sjzt-doris-be09:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.711473 835538 vtablet_writer.cpp:973] VNodeChannel[33936686-12240], load_id=14412688282d8f5e-16a85dbca664ada5, txn_id=63148630, nod
e=gykg-401-sz01p-ecs-prod-sjzt-doris-be02:8060 mark closed, left pending batch size: 1
I20250225 15:41:12.713649 73566 vtablet_writer.cpp:1013] All node channels are stopped(maybe finished/offending/cancelled), sender thread exit. 144126
88282d8f5e-16a85dbca664ada5
I20250225 15:41:12.713796 73248 tablets_channel.cpp:271] close tablets channel: (load_id=14412688282d8f5e-16a85dbca664ada5, index_id=33936686), sender
id: 0, backend id: 51557863
I20250225 15:41:12.775559 73811 stream_load.cpp:738] put stream_load_record rocksdb successfully. label: sync_dm_gk_dws_ads_inv_wh_org_invt_turn_days_
946673130804346881_2_1740469267558, key: 1740469272775_sync_dm_gk_dws_ads_inv_wh_org_invt_turn_days_946673130804346881_2_1740469267558
I20250225 15:41:12.775575 73811 stream_load.cpp:137] finished to execute stream load. label=sync_dm_gk_dws_ads_inv_wh_org_invt_turn_days_9466731308043
46881_2_1740469267558, txn_id=63148630, query_id=14412688282d8f5e-16a85dbca664ada5, load_cost_ms=140, receive_data_cost_ms=65, read_data_cost_ms=1, wr
ite_data_cost_ms=107, commit_and_publish_txn_cost_ms=25, number_total_rows=149, number_loaded_rows=149, receive_bytes=275085, loaded_bytes=136324
I20250225 15:41:12.776594 163766 task_worker_pool.cpp:337] successfully submit task|type=UPDATE_VISIBLE_VERSION|signature=-1

2 Answers

数据没有导入的原因是因为使用了sequence列,如果数据插入的时间小于当前表中相同主键对应列的时间,数据不会插入,这主要是为了防止数据乱序而导致数据更新顺序紊乱而设计的。

为了排查问题,我们从手动streamload到插入单挑数据,发现下面两段,第一个能正常插入,第二个运行成功,但是无数据插入,区别在于加粗字段,然后我们尝试了一下,8位可以,9位就插入不进去了,但是这个字段的类型是varchar(65533)。表是历史表,不是最近的表,之前没有问题,好像从我fe的节点增加内存重启后,就出现了。
insert into dm_gk_dws.test (hoodie_event_time,year,month,biz_date,busi_form_id,oper_org_id,wh_id,system_code,cdc_partition_key,org_code_lvl_2)values ('
2025/1/1 3:10','2024','202412','2024/12/31','00','6131','400105','1178','20241231','6130000000');
insert into dm_gk_dws.test (hoodie_event_time,year,month,biz_date,busi_form_id,oper_org_id,wh_id,system_code,cdc_partition_key,org_code_lvl_2)values ('
2025/1/1 3:10','2024','202412','2024/12/31','00','6131000000','400105','1178','20241231','6130000000');

建表语句:
CREATE TABLE test (
system_code varchar(65533) NOT NULL COMMENT '',
biz_date date NOT NULL COMMENT '',
busi_form_id varchar(65533) NOT NULL COMMENT '',
oper_org_id varchar(65533) NOT NULL COMMENT '',
wh_id varchar(65533) NOT NULL COMMENT '',
org_code_lvl_2 varchar(65533) NOT NULL '',
hoodie_event_time datetime(3) NULL COMMENT '',
year varchar(65533) NULL COMMENT '',
month varchar(65533) NULL COMMENT '',
busi_form_name varchar(65533) NULL COMMENT '',
wh_support_col varchar(65533) NULL COMMENT '',
org_code_lvl_1 varchar(65533) NULL COMMENT '',
org_name_lvl_1 varchar(65533) NULL COMMENT '',
org_name_lvl_2 varchar(65533) NULL COMMENT '',
org_code_lvl_3 varchar(65533) NULL COMMENT '',
org_name_lvl_3 varchar(65533) NULL COMMENT '',
org_code_lvl_4 varchar(65533) NULL COMMENT '',
org_name_lvl_4 varchar(65533) NULL COMMENT '',
org_code_lvl_5 varchar(65533) NULL COMMENT '',
org_name_lvl_5 varchar(65533) NULL COMMENT '',
org_code_lvl_6 varchar(65533) NULL COMMENT '',
org_name_lvl_6 varchar(65533) NULL COMMENT '',
org_code_lvl_7 varchar(65533) NULL COMMENT '',
org_name_lvl_7 varchar(65533) NULL COMMENT '',
org_code_lvl_8 varchar(65533) NULL COMMENT '',
org_name_lvl_8 varchar(65533) NULL COMMENT '',
org_code_lvl_9 varchar(65533) NULL COMMENT '',
org_name_lvl_9 varchar(65533) NULL COMMENT '',
lst_lvl varchar(65533) NULL COMMENT '',
wh_name varchar(65533) NULL COMMENT '',
invt_tot_amt decimal(38,6) NULL COMMENT '',
invt_tot_amt_begin decimal(38,6) NULL COMMENT '',
invt_tot_amt_end decimal(38,6) NULL COMMENT '',
ytd_sale_cost decimal(38,6) NULL COMMENT '',
ytd_sale_cost_wrt decimal(38,6) NULL COMMENT '',
invt_tot_amt_begin_ly decimal(38,6) NULL COMMENT '',
invt_tot_amt_end_ly decimal(38,6) NULL COMMENT '',
ytd_sale_cost_ly decimal(38,6) NULL COMMENT '',
ytd_sale_cost_wrt_ly decimal(38,6) NULL COMMENT '',
cdc_partition_key varchar(65533) NULL COMMENT '',
sync_ts datetime NULL
) ENGINE=OLAP
UNIQUE KEY(system_code, biz_date, busi_form_id, oper_org_id, wh_id, org_code_lvl_2)
COMMENT ''
AUTO PARTITION BY LIST (org_code_lvl_2)
()
DISTRIBUTED BY HASH(system_code, biz_date, busi_form_id, oper_org_id, wh_id) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"function_column.sequence_col" = "hoodie_event_time",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "2000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);