把presto on hive的数据迁移到doris,通过官方 SQL-convertor 将用户查询的 Presto SQL 转换为 Doris SQL 后进行双查校验,发现第二列数据存在差异,为什么两边查询的数据不一致?主要是第二列涉及的日期函数、聚合函数在两引擎中的返回值差异,第二列presto查出来due_week是2025-05-05,doris查出来是2025-05-07,这个转换是有问题吗?比如是否考虑了闰年之类的,doris查询出来比如presto的第二列多2天。
presto涉及的第二列:
date_add('day', 1 - day_of_week(due_date), due_date) as due_week
doris转换后的第二列:
DATE_ADD(`due_date`, INTERVAL (1 - (DAYOFWEEK(`due_date`) + 5) % 7 + 1) DAY) AS `due_week`
presto查询:
select
due_date
,date_add('day', 1 - day_of_week(due_date), due_date) as due_week
,data_from
,expected_confirm_data_from
,sum(due_amt_principal_profit) / 10000 as due_amt_principal_profit
,sum(coalesce(can_operate_amt, 0)) / 10000 as can_operate_amt
,sum(coalesce(end_date_quota, 0)) / 10000 as end_date_quota
,sum(coalesce(expected_open_amt, 0)) / 10000 as expected_open_amt
,sum(coalesce(end_date_collect, 0)) / 10000 as renewal_apply_amt
,sum(coalesce(expected_confirm_amt, 0)) / 10000 as expected_confirm_amt
,sum(coalesce(renewal_success_amt, 0)) / 10000 as renewal_success_amt
,sum(coalesce(end_date_quota3, 0)) / 10000 as end_date_quota3
,sum(coalesce(end_date_collect3, 0)) / 10000 as renewal_apply_amt3
,sum(coalesce(renewal_success_amt3, 0)) / 10000 as renewal_success_amt3
,sum(coalesce(end_date_quota6, 0)) / 10000 as end_date_quota6
,sum(coalesce(end_date_collect6, 0)) / 10000 as renewal_apply_amt6
,sum(coalesce(renewal_success_amt6, 0)) / 10000 as renewal_success_amt6
,sum(coalesce(end_date_quota9, 0)) / 10000 as end_date_quota9
,sum(coalesce(end_date_collect9, 0)) / 10000 as renewal_apply_amt9
,sum(coalesce(renewal_success_amt9, 0)) / 10000 as renewal_success_amt9
,sum(coalesce(end_date_quota12, 0)) / 10000 as end_date_quota12
,sum(coalesce(end_date_collect12, 0)) / 10000 as renewal_apply_amt12
,sum(coalesce(renewal_success_amt12, 0)) / 10000 as renewal_success_amt12
from operation_tmp.hcl_renew_routine_expected_confirm_amt
where due_date between date'2025-05-05' and date'2025-07-06'
group by 1, 2, 3, 4
order by 1, 2, 3, 4;
doris查询:
SELECT
due_date,
DATE_ADD(due_date, INTERVAL (1 - (DAYOFWEEK(due_date) + 5) % 7 + 1) DAY) AS due_week,
data_from,
expected_confirm_data_from,
SUM(due_amt_principal_profit) / 10000 AS due_amt_principal_profit,
SUM(COALESCE(can_operate_amt, 0)) / 10000 AS can_operate_amt,
SUM(COALESCE(end_date_quota, 0)) / 10000 AS end_date_quota,
SUM(COALESCE(expected_open_amt, 0)) / 10000 AS expected_open_amt,
SUM(COALESCE(end_date_collect, 0)) / 10000 AS renewal_apply_amt,
SUM(COALESCE(expected_confirm_amt, 0)) / 10000 AS expected_confirm_amt,
SUM(COALESCE(renewal_success_amt, 0)) / 10000 AS renewal_success_amt,
SUM(COALESCE(end_date_quota3, 0)) / 10000 AS end_date_quota3,
SUM(COALESCE(end_date_collect3, 0)) / 10000 AS renewal_apply_amt3,
SUM(COALESCE(renewal_success_amt3, 0)) / 10000 AS renewal_success_amt3,
SUM(COALESCE(end_date_quota6, 0)) / 10000 AS end_date_quota6,
SUM(COALESCE(end_date_collect6, 0)) / 10000 AS renewal_apply_amt6,
SUM(COALESCE(renewal_success_amt6, 0)) / 10000 AS renewal_success_amt6,
SUM(COALESCE(end_date_quota9, 0)) / 10000 AS end_date_quota9,
SUM(COALESCE(end_date_collect9, 0)) / 10000 AS renewal_apply_amt9,
SUM(COALESCE(renewal_success_amt9, 0)) / 10000 AS renewal_success_amt9,
SUM(COALESCE(end_date_quota12, 0)) / 10000 AS end_date_quota12,
SUM(COALESCE(end_date_collect12, 0)) / 10000 AS renewal_apply_amt12,
SUM(COALESCE(renewal_success_amt12, 0)) / 10000 AS renewal_success_amt12
FROM operation_tmp.hcl_renew_routine_expected_confirm_amt AS hcl_renew_routine_expected_confirm_amt
WHERE
due_date BETWEEN CAST('2025-05-05' AS DATE) AND CAST('2025-07-06' AS DATE)
GROUP BY
1,
2,
3,
4
ORDER BY
1 NULLS LAST,
2 NULLS LAST,
3 NULLS LAST,
4 NULLS LAST;